In [1]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [25]:
california_housing = fetch_california_housing(as_frame=True).frame
california_housing.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [26]:
X = california_housing.drop(columns='MedHouseVal')
y = california_housing['MedHouseVal']

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

In [29]:
rf_regressor.fit(X_train, y_train)
y_pred = rf_regressor.predict(X_test)

In [30]:
y_pred[:5]

array([0.5095   , 0.74161  , 4.9232571, 2.52961  , 2.27369  ])

In [31]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [40]:
single_data = X_test.iloc[0].values.reshape(1, -1)
predicted_value = rf_regressor.predict(single_data)
print(f"Predicted Value: {predicted_value[0]*100000:.2f} $")
print(f"Actual Value: {y_test.iloc[0]*100000:.2f} $")

Predicted Value: 50950.00 $
Actual Value: 47700.00 $




In [34]:
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared Score: {r2:.2f}")

Mean Squared Error: 0.26
R-squared Score: 0.81
