In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [22]:
data=pd.read_csv(r"D:\Users\DELL\Downloads\california_housing.csv")

In [23]:
data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,MedHouseValue
0,8.32,41,6.98,4.52
1,7.25,52,8.28,3.52
2,5.64,30,5.81,2.85
3,4.12,21,4.5,2.1
4,3.85,35,6.28,3.42


In [24]:
data.isnull().sum()

MedInc           0
HouseAge         0
AveRooms         0
MedHouseValue    0
dtype: int64

In [25]:
data.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,MedHouseValue
count,20.0,20.0,20.0,20.0
mean,5.5965,34.3,6.3185,3.1605
std,1.864102,8.092231,1.403827,0.908425
min,2.5,18.0,4.0,1.6
25%,4.0525,29.75,5.1875,2.425
50%,5.745,34.5,6.39,3.26
75%,7.0625,39.25,7.2875,3.8625
max,8.32,52.0,8.5,4.52


In [26]:
print(data,5)

    MedInc  HouseAge  AveRooms  MedHouseValue
0     8.32        41      6.98           4.52
1     7.25        52      8.28           3.52
2     5.64        30      5.81           2.85
3     4.12        21      4.50           2.10
4     3.85        35      6.28           3.42
5     6.75        28      7.25           3.90
6     2.95        18      4.12           1.80
7     5.25        40      5.50           2.75
8     7.85        33      8.50           4.10
9     3.50        29      5.00           2.20
10    4.95        45      6.20           3.00
11    8.10        39      7.80           4.30
12    2.50        25      4.00           1.60
13    6.10        32      6.90           3.60
14    7.00        36      7.10           3.85
15    3.20        31      4.80           2.00
16    5.85        42      6.50           3.10
17    4.25        38      5.25           2.50
18    6.50        34      7.40           3.70
19    8.00        37      8.20           4.40 5


In [27]:
X = data.drop("MedHouseValue", axis=1) 
y = data["MedHouseValue"]

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
rf = RandomForestRegressor(random_state=42)

In [31]:
param_grid = {
    "n_estimators": [50, 100],
    "max_depth": [5, 10, None],
    "min_samples_split": [2, 5]
}

grid_search = GridSearchCV(rf, param_grid, cv=3, scoring="r2", n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print("\n Best Random Forest Parameters ")
print(grid_search.best_params_)


Fitting 3 folds for each of 12 candidates, totalling 36 fits

 Best Random Forest Parameters 
{'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 50}


In [32]:
best_rf = grid_search.best_estimator_
rf_pred = best_rf.predict(X_test)

In [33]:
lr = LinearRegression()
ridge = Ridge(alpha=1.0, random_state=42)
lasso = Lasso(alpha=0.001, random_state=42, max_iter=10000)

In [34]:
lr.fit(X_train, y_train)
ridge.fit(X_train, y_train)
lasso.fit(X_train, y_train)

In [35]:
lr_pred = lr.predict(X_test)
ridge_pred = ridge.predict(X_test)
lasso_pred = lasso.predict(X_test)

In [36]:
def evaluate_model(name, y_true, y_pred):
    print(f"\n{name} Performance:")
    print(f"MAE: {mean_absolute_error(y_true, y_pred):.4f}")
    print(f"MSE: {mean_squared_error(y_true, y_pred):.4f}")
    print(f"R2 : {r2_score(y_true, y_pred):.4f}")

In [37]:
evaluate_model("Random Forest (Tuned)", y_test, rf_pred)


Random Forest (Tuned) Performance:
MAE: 0.3767
MSE: 0.1677
R2 : 0.8215


In [38]:
evaluate_model("Linear Regression", y_test, lr_pred)
evaluate_model("Ridge Regression", y_test, ridge_pred)
evaluate_model("Lasso Regression", y_test, lasso_pred)


Linear Regression Performance:
MAE: 0.4818
MSE: 0.3742
R2 : 0.6017

Ridge Regression Performance:
MAE: 0.4411
MSE: 0.2983
R2 : 0.6825

Lasso Regression Performance:
MAE: 0.4816
MSE: 0.3732
R2 : 0.6027
