In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv(r"C:\Users\X\Desktop\X\2023\ML\studentperformance.csv")
print(df)

      Hours Studied  Previous Scores  Extracurricular Activities  Sleep Hours  \
0                 7               99                           1            9   
1                 4               82                           0            4   
2                 8               51                           1            7   
3                 5               52                           1            5   
4                 7               75                           0            8   
...             ...              ...                         ...          ...   
9995              1               49                           1            4   
9996              7               64                           1            8   
9997              6               83                           1            8   
9998              9               97                           1            7   
9999              7               74                           0            8   

      Sample Question Paper

In [2]:
df.describe()
df.shape

(10000, 6)

In [3]:
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

def normalize_features(X):
    min_vals = X.min(axis=0)
    max_vals = X.max(axis=0)
    norm_X = (X - min_vals) / (max_vals - min_vals)
    return norm_X, min_vals, max_vals

def normalize_target(y):
    y_min = np.min(y)
    y_max = np.max(y)
    norm_y = (y - y_min) / (y_max - y_min)
    return norm_y, y_min, y_max

X_norm, X_min, X_max = normalize_features(X)
y_norm, y_min, y_max = normalize_target(y)
X_b = np.c_[np.ones((X_norm.shape[0], 1)), X_norm]
print("X_min (min values for each feature):", X_min)
print("X_max (max values for each feature):", X_max)
print("y_min (min Performance Index):", y_min)
print("y_max (max Performance Index):", y_max)

class MultiLinearRegressionGD:
    def __init__(self, learning_rate=0.01, max_epochs=10000, tolerance=1e-6):
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        self.tolerance = tolerance
    
    def fit(self, X, y):
        m, n = X.shape
        self.theta = np.zeros(n)
        prev_mse = float('inf')
        
        for epoch in range(self.max_epochs):
            y_pred = X.dot(self.theta)
            error = y_pred - y
            gradient = (1/m) * X.T.dot(error)
            self.theta -= self.learning_rate * gradient
            
            mse = (1/(2*m)) * np.sum(error**2)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, MSE: {mse:.6f}")
            
            if abs(prev_mse - mse) < self.tolerance:
                print(f"Early stopping at epoch {epoch}")
                break
            
            prev_mse = mse

    def predict(self, X):
        return X.dot(self.theta)

model = MultiLinearRegressionGD()
model.fit(X_b, y_norm)
y_pred_norm = model.predict(X_b)
def denormalize(y_norm, y_min, y_max):
    return y_norm * (y_max - y_min) + y_min
y_pred = denormalize(y_pred_norm, y_min, y_max)

mse_train = mean_squared_error(y, y_pred)
r2_train = r2_score(y, y_pred)

print("Final theta (weights + bias):", model.theta)
print(f"Train MSE: {mse_train:.4f}, R2: {r2_train:.4f}")

X_min (min values for each feature): [ 1 40  0  4  0]
X_max (max values for each feature): [ 9 99  1  9  9]
y_min (min Performance Index): 10
y_max (max Performance Index): 100
Epoch 0, MSE: 0.149035
Epoch 100, MSE: 0.015052
Epoch 200, MSE: 0.011551
Epoch 300, MSE: 0.009673
Epoch 400, MSE: 0.008164
Epoch 500, MSE: 0.006932
Epoch 600, MSE: 0.005918
Epoch 700, MSE: 0.005077
Epoch 800, MSE: 0.004376
Epoch 900, MSE: 0.003788
Epoch 1000, MSE: 0.003293
Epoch 1100, MSE: 0.002874
Epoch 1200, MSE: 0.002519
Epoch 1300, MSE: 0.002217
Epoch 1400, MSE: 0.001960
Epoch 1500, MSE: 0.001740
Epoch 1600, MSE: 0.001551
Epoch 1700, MSE: 0.001389
Epoch 1800, MSE: 0.001249
Epoch 1900, MSE: 0.001128
Early stopping at epoch 1981
Final theta (weights + bias): [ 0.11534573  0.21262401  0.54261867 -0.00097958  0.01696971  0.01257947]
Train MSE: 16.8658, R2: 0.9543


In [4]:
import numpy as np
theta = np.array([0.11534573, 0.21262401, 0.54261867, -0.00097958, 0.01696971, 0.01257947])
x = np.array([1, 8, 80, 1, 7, 3])
y_pred = np.dot(theta, x)
print(f"predicted performance index (without normalization): {y_pred:.2f}")

predicted performance index (without normalization): 45.38


In [5]:
import numpy as np

theta = np.array([0.11534573, 0.21262401, 0.54261867, -0.00097958, 0.01696971, 0.01257947])
x = np.array([1, 8, 80, 1, 7, 3])

X_min = np.array([1, 40, 0, 4, 0])
X_max = np.array([9, 99, 1, 9, 9])
y_min = 10
y_max = 100

x_features = x[1:]
x_norm = (x_features - X_min) / (X_max - X_min)

x_norm_b = np.insert(x_norm, 0, 1)
y_pred_norm = np.dot(theta, x_norm_b)

y_pred = y_pred_norm * (y_max - y_min) + y_min
print(f"predicted performance index (with normalization): {y_pred:.2f}")

predicted performance index (with normalization): 71.44
