In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

# Load data
cali = fetch_california_housing()
df = pd.DataFrame(cali.data, columns=cali.feature_names)
df['Price'] = cali.target  # Keep original units (100k USD)

# Train-test split
X = df.drop('Price', axis=1)
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest
rf = RandomForestRegressor(
    n_estimators=200,
    min_samples_leaf=5,
    max_features=0.5,
    n_jobs=-1,
    random_state=42
)
rf.fit(X_train_scaled, y_train)
preds = rf.predict(X_test_scaled)


# Metrics
print("Random Forest Performance:")
print(f"MAE: ${mean_absolute_error(y_test, preds)*100000:,.2f}")
print(f"RMSE: ${np.sqrt(mean_squared_error(y_test, preds))*100000:,.2f}")
score = r2_score(y_test, preds)
print(f"R²: {score:.4f}")
print(f"Adjusted R²: {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1):.4f}")

# Set up plot grid
plt.figure(figsize=(18, 12))
plt.suptitle('Model Diagnostic Plots', y=1.02, fontsize=16)

# 1. Actual vs Predicted Plot
plt.subplot(2, 3, 1)
sns.regplot(x=y_test, y=preds, scatter_kws={'alpha':0.3}, line_kws={'color':'red'})
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--')
plt.xlabel('Actual Price ($100k)')
plt.ylabel('Predicted Price ($100k)')
plt.title('Actual vs Predicted Prices')

# 2. Residual Plot
residuals = y_test - preds
plt.subplot(2, 3, 2)
sns.scatterplot(x=preds, y=residuals, alpha=0.3)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted')

# 3. Error Distribution
plt.subplot(2, 3, 3)
sns.histplot(residuals, kde=True, bins=30)
plt.axvline(x=0, color='r', linestyle='--')
plt.xlabel('Prediction Error ($100k)')
plt.title('Error Distribution')

# 4. Feature Importance (Bar Plot)
importances = pd.DataFrame({
    'feature': X_train.columns,
    'importance': rf.feature_importances_
}).sort_values('importance', ascending=False)

plt.subplot(2, 3, 4)
sns.barplot(x='importance', y='feature', data=importances, palette='viridis')
plt.title('Feature Importances')

# 5. Actual vs Predicted Distribution
plt.subplot(2, 3, 5)
sns.kdeplot(y_test, label='Actual', color='blue', fill=True)
sns.kdeplot(preds, label='Predicted', color='orange', fill=True, alpha=0.5)
plt.xlabel('Price ($100k)')
plt.title('Distribution Comparison')
plt.legend()

# 6. Error vs Feature Value (Top Feature)
top_feature = importances.iloc[0]['feature']
plt.subplot(2, 3, 6)
sns.scatterplot(x=X_test[top_feature], y=residuals, alpha=0.3)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel(top_feature)
plt.ylabel('Residual Error')
plt.title(f'Errors vs {top_feature}')

plt.tight_layout()
plt.show()

from sklearn.inspection import PartialDependenceDisplay
print("\nPartial Dependence Plots:")
fig, ax = plt.subplots(figsize=(12, 4))

PartialDependenceDisplay.from_estimator(
    rf, 
    X_train, 
    features=[importances.iloc[0]['feature']], 
    ax=ax
)
plt.tight_layout()
plt.show()
rf.predict(scaler.transform(cali.data[0].reshape(1,-1)))

: 

### New Data Prediction

In [None]:
deployment_assets = {
    'model': rf,
    'scaler': scaler,
    'feature_names': X.columns.tolist()
}

with open("california_housing_rf.pkl", "wb") as f:
    pickle.dump(deployment_assets, f)

print("\n✅ Model, scaler, and feature names saved to 'california_housing_rf.pkl'")

In [240]:
with open('california_housing_rf.pkl', 'rb') as f:
     loaded_model = pickle.load(f)

In [242]:
loaded_model.predict(scaler.transform(cali.data[0].reshape(1,-1)))



array([4.38917435])