In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

houses_df = pd.read_csv("Houses.csv")

X = houses_df[['Area (Marla)', 'Beds', 'Baths']]
Y = houses_df['Price (PKR:Crore)'] 
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
lr = LinearRegression()
lr.fit(X_train, y_train)

In [8]:
# Multivariate Regression.
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

y_pred = lr.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: ", mse)

# Without features scaling
# Training a model with different learning rates
learning_rates = [0.001, 0.01, 0.1, 0.5, 1]
for lr in learning_rates:
    sgd = SGDRegressor(max_iter=10000, eta0=lr, random_state=42)
    sgd.fit(X_train, y_train.ravel())
    y_pred = sgd.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("\nLearning Rate: ", lr)
    print("Mean Squared Error: ", mse)


Mean Squared Error:  4.831565660405444

Learning Rate:  0.001
Mean Squared Error:  4.1945878714675885e+23

Learning Rate:  0.01
Mean Squared Error:  5.0047128559581983e+23

Learning Rate:  0.1
Mean Squared Error:  1.66508475394773e+27

Learning Rate:  0.5
Mean Squared Error:  3.8244616152954563e+28

Learning Rate:  1
Mean Squared Error:  1.7158820568654663e+29


In [9]:
# With features scaling
X_mean = np.mean(X_train, axis=0)
X_std = np.std(X_train, axis=0)
X_train_scaled = (X_train - X_mean) / X_std

X_mean = np.mean(X_test, axis=0)
X_std = np.std(X_test, axis=0)
X_test_scaled = (X_test - X_mean) / X_std

# Training a model with different learning rates
for lr in learning_rates:
    sgd = SGDRegressor(max_iter=10000, eta0=lr, random_state=42)
    sgd.fit(X_train_scaled, y_train.ravel())
    y_pred = sgd.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    print("\nLearning Rate: ", lr)
    print("Mean Squared Error: ", mse)


Learning Rate:  0.001
Mean Squared Error:  5.587585095665656

Learning Rate:  0.01
Mean Squared Error:  5.5777547169955195

Learning Rate:  0.1
Mean Squared Error:  5.10727109770817

Learning Rate:  0.5
Mean Squared Error:  41.02136007653824

Learning Rate:  1
Mean Squared Error:  3.831660528847993e+22


In [10]:
# Polynomial Regression:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

lr_poly = LinearRegression()
lr_poly.fit(X_train_poly, y_train)

y_poly_pred = lr_poly.predict(X_test_poly)

mse_poly = mean_squared_error(y_test, y_poly_pred)
print("Mean Squared Error Pollynomial Regression: ", mse_poly)
# Without feature scaling
# Training a model with different learning rates
for lr in learning_rates:
    sgd = SGDRegressor(max_iter=10000, eta0=lr, random_state=42)
    sgd.fit(X_train_poly, y_train.ravel())
    y_pred = sgd.predict(X_test_poly)
    mse = mean_squared_error(y_test, y_pred)
    print("\nLearning Rate: ", lr)
    print("Mean Squared Error: ", mse)


Mean Squared Error Pollynomial Regression:  4.78528733436451

Learning Rate:  0.001
Mean Squared Error:  4.9165684234088947e+33

Learning Rate:  0.01
Mean Squared Error:  4.2911137288906214e+35

Learning Rate:  0.1
Mean Squared Error:  4.294422769435838e+37

Learning Rate:  0.5
Mean Squared Error:  1.073873836504878e+39

Learning Rate:  1
Mean Squared Error:  4.261118428576438e+39


In [11]:
scaler = StandardScaler()
X_train_poly_scaled = scaler.fit_transform(X_train_poly)
X_test_poly_scaled = scaler.transform(X_test_poly)
# With feature scaling
# Training a model with different learning rates
for lr in learning_rates:
    sgd = SGDRegressor(max_iter=10000, eta0=lr, random_state=42)
    sgd.fit(X_train_poly_scaled, y_train.ravel())
    y_pred = sgd.predict(X_test_poly_scaled)
    mse = mean_squared_error(y_test, y_pred)
    print("\nLearning Rate: ", lr)
    print("Mean Squared Error: ", mse)


Learning Rate:  0.001
Mean Squared Error:  4.901112256067842

Learning Rate:  0.01
Mean Squared Error:  4.785950157912555

Learning Rate:  0.1
Mean Squared Error:  5.0221816695956765

Learning Rate:  0.5
Mean Squared Error:  3.8352821545616684e+22

Learning Rate:  1
Mean Squared Error:  9.579838193932118e+23


In [12]:
# Normal equation
X_b = np.c_[np.ones((len(X_train), 1)), X_train]
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)

# Making predictions on the testing set
X_test_b = np.c_[np.ones((len(X_test), 1)), X_test]
y_pred_norm = X_test_b.dot(theta_best)

print("Score of Normal Equation:", mean_squared_error(y_test, y_pred_norm))


Score of Normal Equation: 4.831565660405443
