# Feature Scaling and Normalization

In [15]:
# ● Demonstrate the effect of standardizing features using StandardScaler. 
# ● Compare model performance before and after scaling.

In [16]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [17]:
data = fetch_california_housing()
x = data.data
y = data.target
print(data.feature_names)
print(data.target_names)

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
['MedHouseVal']


In [18]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [19]:
# trained using all features wothout scaling
raw_model = LinearRegression()
raw_model.fit(X_train,y_train)
y_pred = raw_model.predict(X_test)
print(y_pred)
print(y_test)

[0.71912284 1.76401657 2.70965883 ... 4.46877017 1.18751119 2.00940251]
[0.477   0.458   5.00001 ... 5.00001 0.723   1.515  ]


In [20]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


model_scaled = LinearRegression()
model_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = model_scaled.predict(X_test_scaled)

print(y_pred_scaled)
print(y_test)

[0.71912284 1.76401657 2.70965883 ... 4.46877017 1.18751119 2.00940251]
[0.477   0.458   5.00001 ... 5.00001 0.723   1.515  ]


In [21]:
print("Without Scaling")
print("Score : ",raw_model.score(X_test,y_test))
print("R² Score:", r2_score(y_test, y_pred))
print("MSE :", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

print()
print("With Scaling")
print("Score : ",model_scaled.score(X_test_scaled,y_test))
print("R² Score:", r2_score(y_test, y_pred_scaled))
print("MSE :", mean_squared_error(y_test, y_pred_scaled))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_scaled)))

Without Scaling
Score :  0.5757877060324521
R² Score: 0.5757877060324521
MSE : 0.5558915986952425
RMSE: 0.7455813830127751

With Scaling
Score :  0.575787706032451
R² Score: 0.575787706032451
MSE : 0.5558915986952442
RMSE: 0.7455813830127763
