In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import copy, math
import matplotlib.pyplot as plt

In [None]:
filename = "/kaggle/input/student-performance-multiple-linear-regression/Student_Performance.csv"
df = pd.read_csv(filename, sep=',', header=None)

In [None]:
data = df.to_numpy()
y = data[1:, 5]
X = data[1:,:5]

In [None]:
print(X)

In [None]:
for i in range(len(X)):
    if X[i, 2] == 'Yes':
        X[i, 2] = 1
    else:
        X[i, 2] = 0
X = X.astype(float)
y = y.astype(float)
print(y)
print(X[0])

Perform z-score normalization

In [None]:
for i in range(len(X[0])):
    mu = X[:, i].mean()
    std = X[:, i].std()
    X[:, i] = (X[:, i] - mu)/std
print(X)

In [None]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb = np.dot(X[i], w) + b
        cost = cost +(f_wb - y[i])**2
    cost = cost / (2*m)
    return cost

In [None]:
def compute_gradient(X,y,w,b):
    m,n = X.shape
    dw_term = np.zeros((n,))
    db_term = 0.
    
    for i in range(m):
        err = (np.dot(X[i], w) + b) - y[i]
        for j in range(n):
            dw_term[j] = dw_term[j] + err * X[i, j]
        db_term = db_term + err
    dw_term = dw_term/m
    db_term = db_term/m
    
    return db_term, dw_term

In [None]:
def gradient_descent(X, y, w_initial, b_initial, cost_function, gradient_function, alpha, iters):
    w = copy.deepcopy(w_initial)
    b = b_initial
    J_history = []

    for i in range(iters):
        db_term, dw_term = gradient_function(X, y, w, b)
        
        w = w-alpha*dw_term
        b = b- alpha*db_term
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
    return w, b, J_history

In [None]:
initial_w = np.zeros(X.shape[1])
initial_b = 0.
inters = 6000
alpha = 1.0e-3

w_final, b_final, J_hist = gradient_descent(X, y, initial_w, initial_b, compute_cost, compute_gradient, alpha, inters)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")

In [None]:
m,_ = X.shape
total = 0
for i in range(m):
    total+=abs(((np.dot(X[i], w_final) + b_final)-y[i])/y[i])
avg_pe = total/m*100

In [None]:
for i in range(m):
    print(f"prediction: {np.dot(X[i], w_final) + b_final:0.2f}, target value: {y[i]}")
    

In [None]:
print(f"on average, this MLR model resulted in a +-{avg_pe:.2f}% error")

### Converging at a **2.23** cost value, with *z-score normalization* within an *MLR model*, we achieve an average percentage error of **+-3.45%**

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 4))
ax1.plot(J_hist)
ax2.plot(100 + np.arange(len(J_hist[100:])), J_hist[100:])
ax1.set_title("Cost vs. iteration");  ax2.set_title("Cost vs. iteration (tail)")
ax1.set_ylabel('Cost')             ;  ax2.set_ylabel('Cost') 
ax1.set_xlabel('iteration step')   ;  ax2.set_xlabel('iteration step') 
plt.show()

In [None]:
def predict(X, w, b):
    return np.dot(X,w) + b

In [None]:
# Lets predict the performance rating of a student
input_features = np.array([0.5, 1.7, 1, 1.4, -1])
print(predict(input_features, w_final, b_final))

In [None]:
print(X[0])