In [6]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Fetch the dataset and load it as a pandas dataframe
housing = fetch_california_housing(as_frame=True)

# Split dataset
X = housing.data
y = housing.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Import and instantiate model
model = LinearRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

print("Training complete!")

# Make predictions on the unseen test data
y_pred = model.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\n--- Model Evaluation ---")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-Squared Score: {r2:.2f}")

print("\n--- Sample Predictions ---")
predictions_df = pd.DataFrame({'Actual Price': y_test, 'Predicted Price': y_pred})
print(predictions_df.head())

Training complete!

--- Model Evaluation ---
Mean Squared Error: 0.56
R-Squared Score: 0.58

--- Sample Predictions ---
       Actual Price  Predicted Price
20046       0.47700         0.719123
3024        0.45800         1.764017
15663       5.00001         2.709659
20484       2.18600         2.838926
9814        2.78000         2.604657
