In [13]:
from google.colab import userdata
my_gemini_api_key = userdata.get('my_gemini_api_key')

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import google.generativeai as genai

# ==========================
# 1. Generate a simulated dataset
# ==========================
np.random.seed(42)
size = np.random.randint(50, 200, 100)          # House size (in m²)
bedrooms = np.random.randint(1, 5, 100)         # Number of bedrooms
location_score = np.random.randint(1, 10, 100)  # Location score
price = size * 3000 + bedrooms * 10000 + location_score * 5000 + np.random.randint(0, 50000, 100)

df = pd.DataFrame({"Size": size, "Bedrooms": bedrooms, "LocationScore": location_score, "Price": price})
print("Dataset sample:")
print(df.head())

# New data point for prediction
new_data = {"Size": 120, "Bedrooms": 3, "LocationScore": 8}

# ==========================
# 2. Use Google Gemini to generate prediction logic
# ==========================
genai.configure(api_key=f"{my_gemini_api_key}")

prompt = f"""
Here is a housing price prediction dataset (in JSON format):
{df.to_json(orient="records", force_ascii=False)}

Please generate Python code that:
- Uses sklearn's LinearRegression to train a model
- Features: Size, Bedrooms, LocationScore
- Target variable: Price
- Splits the dataset into train and test sets (80/20 split)
- Trains the model and predicts for the following new house: {new_data}
- Finally, print the predicted price
Only output Python code. Do not include any explanation.
"""

model = genai.GenerativeModel("gemini-2.0-flash")
response = model.generate_content(
    contents=[{"role": "user", "parts": [prompt]}]
)

# Clean up the code
raw_code = response.text.strip()
if raw_code.startswith("```"):
    raw_code = raw_code.replace("```python", "").replace("```", "").strip()

print("\n=== AI-Generated Cleaned Code ===\n")
print(raw_code)

# Execute the cleaned code (verify before running)
exec(raw_code)


Dataset sample:
   Size  Bedrooms  LocationScore   Price
0   152         2              4  519328
1   142         3              1  486876
2    64         3              1  248295
3   156         1              4  501987
4   121         3              7  450399

=== AI-Generated Cleaned Code ===

import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np

data = [{"Size":152,"Bedrooms":2,"LocationScore":4,"Price":519328},{"Size":142,"Bedrooms":3,"LocationScore":1,"Price":486876},{"Size":64,"Bedrooms":3,"LocationScore":1,"Price":248295},{"Size":156,"Bedrooms":1,"LocationScore":4,"Price":501987},{"Size":121,"Bedrooms":3,"LocationScore":7,"Price":450399},{"Size":70,"Bedrooms":1,"LocationScore":2,"Price":276214},{"Size":152,"Bedrooms":3,"LocationScore":3,"Price":534986},{"Size":171,"Bedrooms":2,"LocationScore":1,"Price":558880},{"Size":124,"Bedrooms":3,"LocationScore":5,"Price":431735},{"Size":137,"Bedrooms":1,"Loca

In [15]:
# api_key