In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_absolute_error

# 1. Load the dataset (California Housing)
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['Price'] = data.target  # This is the "Label" we want to predict

# 2. Select Features (X) and Target (y)
# We'll use: Median Income, House Age, and Average Rooms
X = df[['MedInc', 'HouseAge', 'AveRooms']]
y = df['Price']

# 3. Split data (80% for training, 20% for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Initialize and Train the Model
model = LinearRegression()
model.fit(X_train, y_train)

# 5. Test the model
predictions = model.predict(X_test)
error = mean_absolute_error(y_test, predictions)

print(f"Average Prediction Error: ${error * 100000:.2f}")

# 6. Make a custom prediction
# Example: Income 5.0, Age 20, Rooms 6
new_house = [[5.0, 20, 6]]
predicted_price = model.predict(new_house)
print(f"Predicted Price for your house: ${predicted_price[0] * 100000:.2f}")

Average Prediction Error: $60332.14
Predicted Price for your house: $240891.12


