#importing libraries

In [18]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

#fetching housing data
housing = fetch_california_housing(as_frame = True)
#Set features and target
X = housing.data
y = housing.target

#Combine into one dataframe
df = pd.concat([X,y], axis = 1)

#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

#Initialize the model

#Linear Regression
model = LinearRegression()

#fit on training data
model.fit(X_train, y_train)

#predict y
y_pred = model.predict(X_test)

#compare test values and model predictions
comparisons = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})

#model metrics with plain Linear regression
print("metrics with linear regeression\n")
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2_l = r2_score(y_test, y_pred)

print(f"R2 Score: {r2_l}\nMean Absolute Error: {mae}\nMean Squared Error: {mse}\nRoot Mean Squared Error: {rmse}\n\n\n")

#using scaler
scaler = StandardScaler()
#train
model_scaled = make_pipeline(scaler, LinearRegression())
model_scaled.fit(X_train, y_train)
#predict
y_pred_scaled = model_scaled.predict(X_test) 

#Model metrics with scaling
print("Metrics with Scaling(Linear Regression)\n")
mae_scaled = mean_absolute_error(y_test, y_pred_scaled)
mse_scaled = mean_squared_error(y_test, y_pred_scaled)
rmse_scaled = np.sqrt(mse_scaled)
r2_scaled = r2_score(y_test, y_pred_scaled)
print(f"R2 Score: {r2_scaled}\nMean Absolute Error: {mae_scaled}\nMean Squared Error: {mse_scaled}\nRoot Mean Squared Error: {rmse_scaled}\n\n\n")


#Using decision trees


metrics with linear regeression

R2 Score: 0.575787706032451
Mean Absolute Error: 0.5332001304956563
Mean Squared Error: 0.5558915986952442
Root Mean Squared Error: 0.7455813830127763



Metrics with Scaling(Linear Regression)

R2 Score: 0.575787706032451
Mean Absolute Error: 0.5332001304956562
Mean Squared Error: 0.5558915986952442
Root Mean Squared Error: 0.7455813830127763



