In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy
import math
import sklearn

In [3]:
df = pd.read_csv("datasets/Student_Performance.csv")

# Clean the data
Y = df["Performance Index"]
X = df.drop(columns=["Performance Index"])
X["Extracurricular Activities"] = X["Extracurricular Activities"].map({"Yes": 1, "No": 0})

# Normalize all features
X = (X - X.mean())/X.std()
X = X.values
Y = Y.values

In [4]:
X

array([[ 0.77514895,  1.70409044,  1.01040412,  1.45613179, -1.24969145],
       [-0.38346141,  0.72387648, -0.98960404, -1.49221962, -0.9009371 ],
       [ 1.16135241, -1.06357251,  1.01040412,  0.27679123, -0.9009371 ],
       ...,
       [ 0.3889455 ,  0.78153612,  1.01040412,  0.86646151,  0.14532594],
       [ 1.54755586,  1.58877115,  1.01040412,  0.27679123, -1.5984458 ],
       [ 0.77514895,  0.26259932, -0.98960404,  0.86646151, -1.24969145]],
      shape=(10000, 5))

In [5]:
Y

array([91., 65., 45., ..., 74., 95., 64.], shape=(10000,))

In [6]:
def compute_cost(X, Y, w, b):

    m,n = X.shape
    cost = 0.0

    for i in range(m):
        cost += ((np.dot(w,X[i]) + b) - Y[i])**2

    return cost/(2*m)

def compute_gradient(X, Y, w, b):

    m,n = X.shape
    dj_dw = np.zeros(n)
    dj_db = 0.0

    for i in range(m):
        error = (np.dot(w, X[i]) + b) - Y[i]
        dj_dw += error * X[i]
        dj_db += error

    dj_dw = dj_dw/m
    dj_db = dj_db/m

    return dj_dw, dj_db

def gradient_descent(X, Y, w_in, b_in, alpha, num_iters):

    J_history = []
    w = copy.deepcopy(w_in)
    b = copy.deepcopy(b_in)

    for i in range(num_iters):

        dj_dw, dj_db = compute_gradient(X, Y, w, b)

        # Update all parameters
        w = w - alpha*dj_dw
        b = b - alpha*dj_db

        # Compute cost
        if i < 100000:
            J_history.append(compute_cost(X,Y,w,b))

        if i % (math.ceil(num_iters/10)) == 0:
            print(f"Iteration: {i:4d} | Cost: {J_history[-1]}")
    
    return w, b, J_history

In [7]:
# Initialize the parameters
m,n = X.shape
w = np.zeros(n)
b = 0.0
alpha = 0.001
num_iters = 10000

w,b, J_history = gradient_descent(X, Y, w, b, alpha, num_iters)

Iteration:    0 | Cost: 1706.0215084026452
Iteration: 1000 | Cost: 232.81103486754643
Iteration: 2000 | Cost: 33.32614971678544
Iteration: 3000 | Cost: 6.3090414311588034
Iteration: 4000 | Cost: 2.649269335479664
Iteration: 5000 | Cost: 2.1534101982269123
Iteration: 6000 | Cost: 2.0862124614331456
Iteration: 7000 | Cost: 2.077103976968746
Iteration: 8000 | Cost: 2.0758690649764495
Iteration: 9000 | Cost: 2.0757015985606246


In [9]:
# Final cost
J_history[-1]

np.float64(2.0756788900564214)

In [10]:
w

array([ 7.38680795, 17.66200818,  0.30651024,  0.81498441,  0.55580262])

In [11]:
b

np.float64(55.22230531100317)

In [12]:
def mapper(x):
    return np.dot(w, x) + b

y_pred = np.array(list(map(mapper, X)))

ss_res = np.sum((Y - y_pred)**2)              # residual sum of squares
ss_tot = np.sum((Y - np.mean(Y))**2)          # total sum of squares
r2 = 1 - (ss_res / ss_tot)

print("R^2:", r2)

R^2: 0.9887523130162745
