In [1]:
import kagglehub
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split


2025-09-20 01:23:50.382340: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-20 01:23:50.390926: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758345830.400592  119723 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758345830.404369  119723 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-20 01:23:50.415680: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
path = kagglehub.dataset_download("nikhil7280/student-performance-multiple-linear-regression")



In [3]:

files = os.listdir(path)
csv_files = [f for f in files if f.endswith('.csv')]

if csv_files:
    # Load the first CSV file found
    csv_file = csv_files[0]
    csv_path = os.path.join(path, csv_file)
    df = pd.read_csv(csv_path)
    print(f"Loaded {csv_file}")
    print(df.head())

Loaded Student_Performance.csv
   Hours Studied  Previous Scores Extracurricular Activities  Sleep Hours  \
0              7               99                        Yes            9   
1              4               82                         No            4   
2              8               51                        Yes            7   
3              5               52                        Yes            5   
4              7               75                         No            8   

   Sample Question Papers Practiced  Performance Index  
0                                 1               91.0  
1                                 2               65.0  
2                                 2               45.0  
3                                 2               36.0  
4                                 5               66.0  


In [4]:
#  Basic cleaning / column selection
target = "Performance Index"
feature_names = [
    "Hours Studied",
    "Previous Scores",
    "Extracurricular Activities",   # often 0/1
    "Sleep Hours",
    "Sample Question Papers Practiced"
]
X = df[feature_names].copy()
y = df[target].astype(float)

# If Extracurricular Activities is 'Yes'/'No', convert to 1/0:
if X["Extracurricular Activities"].dtype == object:
    X["Extracurricular Activities"] = X["Extracurricular Activities"].str.strip().str.lower().map({"yes":1, "no":0})

In [5]:
# Train/val/test split
X_train, X_temp, y_train, y_temp = train_test_split(X.values, y.values, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test   = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [6]:
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(X_train) # fit on train only

I0000 00:00:1758345832.484051  119723 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5520 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [8]:
def betas_on_original_scale(model, normalizer=None):
    dense = next(l for l in model.layers if isinstance(l, tf.keras.layers.Dense))
    W, b = dense.get_weights()         # W: (n_features,1)
    W = W[:,0]
    if normalizer is None:
        return W, float(b)
    means = normalizer.get_mean().numpy()
    stds  = np.sqrt(normalizer.get_variance().numpy())
    beta  = W / stds
    beta0 = b - np.sum(means * beta)
    return beta, float(beta0)

def build_model(lambda_l2, n_feats, normalizer=None):
    inp = keras.Input(shape=(n_feats,))
    x = inp if normalizer is None else normalizer(inp)
    out = keras.layers.Dense(1, activation=None,
                             kernel_regularizer=keras.regularizers.l2(lambda_l2))(x)
    m = keras.Model(inp, out)
    m.compile(optimizer=keras.optimizers.Adam(1e-2), loss="mse")
    return m

lams = np.concatenate([[0], np.logspace(-6, 1, 19)])  # Start with 0, then exponential spacing
coef_paths = []
bias_paths = []

for lam in lams:
    model_lam = build_model(lam, X_train.shape[1], normalizer=None)
    model_lam.fit(X_train, y_train, epochs=200, batch_size=32,
                  validation_data=(X_val, y_val), verbose=0)
    betas, bias = betas_on_original_scale(model_lam, normalizer=None)
    coef_paths.append(betas)
    bias_paths.append(bias)

coef_paths = np.array(coef_paths)  # shape: (n_lams, n_features)
bias_paths = np.array(bias_paths)  # shape: (n_lams,)

# Create a single plot with all 6 parameters
plt.figure(figsize=(12, 8))

# Define colors for each line
colors = ['blue', 'red', 'green', 'orange', 'purple', 'brown']

# Plot coefficients for each feature
for j, name in enumerate(feature_names):
    plt.plot(lams, coef_paths[:, j], color=colors[j], linewidth=2, 
             label=f'w{j+1}: {name}', marker='o', markersize=3)

# Plot bias term
plt.plot(lams, bias_paths, color=colors[5], linewidth=2, 
         label='Bias (w0)', marker='s', markersize=3)

plt.xscale("log")
plt.xlabel("Lambda (L2 Regularization Strength)", fontsize=12)
plt.ylabel("Parameter Values (Original Scale)", fontsize=12)
plt.title("Ridge Regression Coefficient Paths\n(All Parameters vs Lambda)", fontsize=14)
plt.grid(True, alpha=0.3)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# Optional: Also create a linear scale version to better see the lambda=0 point
plt.figure(figsize=(12, 8))

# Plot the first few lambda values on linear scale to better see the OLS point
lams_subset = lams[:10]  # First 10 lambda values including 0
coef_subset = coef_paths[:10]
bias_subset = bias_paths[:10]

for j, name in enumerate(feature_names):
    plt.plot(lams_subset, coef_subset[:, j], color=colors[j], linewidth=2, 
             label=f'w{j+1}: {name}', marker='o', markersize=4)

plt.plot(lams_subset, bias_subset, color=colors[5], linewidth=2, 
         label='Bias (w0)', marker='s', markersize=4)

plt.xlabel("Lambda (L2 Regularization Strength)", fontsize=12)
plt.ylabel("Parameter Values (Original Scale)", fontsize=12)
plt.title("Ridge Regression Coefficient Paths (Linear Scale - First 10 Lambda Values)", fontsize=14)
plt.grid(True, alpha=0.3)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


  return W, float(b)


KeyboardInterrupt: 