In [1]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Fetch the dataset and load it as a pandas dataframe
housing = fetch_california_housing(as_frame=True)

# Split dataset
X = housing.data
y = housing.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Instantiate the scaler
scaler = StandardScaler()

# Fit the scaler to the TRAINING data and transform it
X_train_scaled = scaler.fit_transform(X_train)

# Transform the TEST data using teh same fitted scaler
X_test_scaled = scaler.transform(X_test)

# Import and instantiate model on scaled data
model = RandomForestRegressor(random_state=42, n_jobs=-1)
model.fit(X_train_scaled, y_train)

print("Training complete!")

# Make predictions on the unseen scaled test data
y_pred = model.predict(X_test_scaled)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\n--- Model Evaluation ---")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-Squared Score: {r2:.2f}")

Training complete!

--- Model Evaluation ---
Mean Squared Error: 0.26
R-Squared Score: 0.81
