In [None]:
# Install libraries (if needed in Colab)
!pip install -q scikit-learn pandas matplotlib seaborn

# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import mean_squared_error

In [None]:
data = pd.read_csv('rounded_hours_student_scores.csv')

# See column names
print(data.columns)

# See first 5 rows
print(data.head())

Index(['Hours', 'Scores'], dtype='object')
   Hours  Scores
0    1.1      41
1    1.2      40
2    1.4      38
3    1.5      39
4    1.6      36


In [None]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset from CSV
data = pd.read_csv('rounded_hours_student_scores.csv')

# Keep only cgpa and lpa (remove placed column)
data = data[['Hours', 'Scores']]

# Separate features and target
X = data[['Hours']]
y = data['Scores']
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Add bias term (for manual linear regression calculations)
X_train_scaled = np.c_[np.ones(X_train_scaled.shape[0]), X_train_scaled]
X_test_scaled = np.c_[np.ones(X_test_scaled.shape[0]), X_test_scaled]

print("Data loaded and preprocessed!")

Data loaded and preprocessed!


In [None]:
def gradient_descent_vanilla(X, y, lr=0.1, n_iters=1000):
    m, n = X.shape
    theta = np.zeros(n)  # Initialize weights
    for _ in range(n_iters):
        y_pred = X @ theta
        error = y_pred - y
        gradients = (1/m) * X.T @ error
        theta -= lr * gradients
    return theta

In [None]:
def gradient_descent_l2(X, y, lr=0.1, n_iters=5000, lambda_=0.1):
    m, n = X.shape
    theta = np.zeros(n)
    for _ in range(n_iters):
        y_pred = X @ theta
        error = y_pred - y
        gradients = (1/m) * (X.T @ error + lambda_ * np.r_[0, theta[1:]])  # Bias not regularized
        theta -= lr * gradients
    return theta

In [None]:
def gradient_descent_l1(X, y, lr=0.1, n_iters=1000, lambda_=0.1):
    m, n = X.shape
    theta = np.zeros(n)
    for _ in range(n_iters):
        y_pred = X @ theta
        error = y_pred - y
        gradients = (1/m) * X.T @ error
        # Add L1 subgradient (sign of theta) except bias term
        gradients[1:] += lambda_ * np.sign(theta[1:])
        theta -= lr * gradients
    return theta

In [None]:
def gradient_descent_elasticnet(X, y, lr=0.1, n_iters=1000, lambda1=0.1, lambda2=0.1):
    m, n = X.shape
    theta = np.zeros(n)
    for _ in range(n_iters):
        y_pred = X @ theta
        error = y_pred - y
        gradients = (1/m) * X.T @ error
        gradients[1:] += lambda1 * np.sign(theta[1:]) + lambda2 * theta[1:]
        theta -= lr * gradients
    return theta

In [None]:
def evaluate_model(X_test, y_test, theta, name="Model"):
    y_pred = X_test @ theta
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    error_percent = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

    print(f"\n📊 {name}")
    print(f"   ➤ Mean Squared Error (MSE): {mse:.4f}")
    print(f"   ➤ R² Score: {r2:.4f}")
    print(f"   ➤ Mean Absolute Percentage Error: {error_percent:.2f}%")

    return {"Model": name, "MSE": mse, "R²": r2, "Error %": error_percent}

In [None]:
results = []

# Vanilla Linear Regression
theta_vanilla = gradient_descent_vanilla(X_train_scaled, y_train)
results.append(evaluate_model(X_test_scaled, y_test, theta_vanilla, "Vanilla Linear Regression"))

# L2 Regularization (Ridge)
theta_l2 = gradient_descent_l2(X_train_scaled, y_train, lambda_=9)
results.append(evaluate_model(X_test_scaled, y_test, theta_l2, "L2 Regularization (Ridge)"))

# L1 Regularization (Lasso)
theta_l1 = gradient_descent_l1(X_train_scaled, y_train, lambda_=0.01)
results.append(evaluate_model(X_test_scaled, y_test, theta_l1, "L1 Regularization (Lasso)"))

# ElasticNet (L1 + L2)
theta_elastic = gradient_descent_elasticnet(X_train_scaled, y_train, lambda1=0.1, lambda2=0.1)
results.append(evaluate_model(X_test_scaled, y_test, theta_elastic, "ElasticNet (L1 + L2)"))


📊 Vanilla Linear Regression
   ➤ Mean Squared Error (MSE): 15.6678
   ➤ R² Score: 0.5810
   ➤ Mean Absolute Percentage Error: 5.63%

📊 L2 Regularization (Ridge)
   ➤ Mean Squared Error (MSE): 16.7879
   ➤ R² Score: 0.5510
   ➤ Mean Absolute Percentage Error: 6.08%

📊 L1 Regularization (Lasso)
   ➤ Mean Squared Error (MSE): 15.6747
   ➤ R² Score: 0.5808
   ➤ Mean Absolute Percentage Error: 5.64%

📊 ElasticNet (L1 + L2)
   ➤ Mean Squared Error (MSE): 16.3267
   ➤ R² Score: 0.5633
   ➤ Mean Absolute Percentage Error: 5.93%
