In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error


In [10]:
# Load cleaned dataset
df = pd.read_csv("SP_cleaned.csv")

TARGET = df.columns[-1]
X = df.drop(columns=[TARGET]).values
y = df[TARGET].values

print("X shape:", X.shape)
print("y shape:", y.shape)


X shape: (1000, 7)
y shape: (1000,)


In [11]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling (VERY IMPORTANT for GD & Regularization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [12]:
# Add bias column
X_train_gd = np.c_[np.ones(X_train.shape[0]), X_train]
X_test_gd = np.c_[np.ones(X_test.shape[0]), X_test]

def gradient_descent(X, y, lr=0.01, epochs=1000):
    m, n = X.shape
    weights = np.zeros(n)

    for epoch in range(epochs):
        predictions = X.dot(weights)
        errors = predictions - y
        gradients = (2/m) * X.T.dot(errors)
        weights -= lr * gradients

        if epoch % 200 == 0:
            loss = np.mean(errors**2)
            print(f"Epoch {epoch} | MSE: {loss}")

    return weights


In [13]:
# Train using Gradient Descent
weights = gradient_descent(X_train_gd, y_train)

# Prediction
y_pred_gd = X_test_gd.dot(weights)

print("Gradient Descent MSE:", mean_squared_error(y_test, y_pred_gd))


Epoch 0 | MSE: 0.985988235398363
Epoch 200 | MSE: 0.05921507412914316
Epoch 400 | MSE: 0.05827034654543789
Epoch 600 | MSE: 0.057846977153291006
Epoch 800 | MSE: 0.057626007197129736
Gradient Descent MSE: 0.07139608527813601


In [14]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_pred_lr = lin_reg.predict(X_test)
print("Linear Regression MSE:", mean_squared_error(y_test, y_pred_lr))


Linear Regression MSE: 0.07190996544883094


In [15]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

y_pred_ridge = ridge.predict(X_test)
print("Ridge Regression MSE:", mean_squared_error(y_test, y_pred_ridge))


Ridge Regression MSE: 0.07187799376479187


In [16]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.1, max_iter=5000)
lasso.fit(X_train, y_train)

y_pred_lasso = lasso.predict(X_test)
print("Lasso Regression MSE:", mean_squared_error(y_test, y_pred_lasso))

print("Lasso removed features:", sum(lasso.coef_ == 0))


Lasso Regression MSE: 0.10965579919527584
Lasso removed features: 4


In [17]:
from sklearn.linear_model import ElasticNet

elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic.fit(X_train, y_train)

y_pred_elastic = elastic.predict(X_test)
print("ElasticNet MSE:", mean_squared_error(y_test, y_pred_elastic))


ElasticNet MSE: 0.08278928536007961


In [18]:
results = {
    "Gradient Descent": mean_squared_error(y_test, y_pred_gd),
    "Linear Regression": mean_squared_error(y_test, y_pred_lr),
    "Ridge Regression": mean_squared_error(y_test, y_pred_ridge),
    "Lasso Regression": mean_squared_error(y_test, y_pred_lasso),
    "ElasticNet": mean_squared_error(y_test, y_pred_elastic)
}

for model, mse in results.items():
    print(f"{model} → MSE: {mse}")


Gradient Descent → MSE: 0.07139608527813601
Linear Regression → MSE: 0.07190996544883094
Ridge Regression → MSE: 0.07187799376479187
Lasso Regression → MSE: 0.10965579919527584
ElasticNet → MSE: 0.08278928536007961
