# Multiple Regression Using Scratch - Wine Quality

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("winequality.csv")

In [3]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [94]:
X = df.drop("quality", axis= 1).values
y = df['quality'].values

In [103]:
m = len(y)
x0 = np.ones([X.shape[0],1])
X = np.hstack([x0, X])

theta = np.zeros(X.shape[1])
alpha = 0.0001

In [104]:
X.shape, x0.shape

((1599, 12), (1599, 1))

In [105]:
def cost_function(X, y, theta):
    cost = np.sum((X.dot(theta) - y)**2)/(2*m)
    
    return cost

In [112]:
def gradient_descent(X, y, theta, iterations):
    cost_history = [0]*iterations
    
    for i in range(iterations):
        h = X.dot(theta)
        loss = h - y
        
        gradient = X.T.dot(loss) / m
        
        theta -= alpha * gradient
        
        cost = cost_function(X, y, theta)
        cost_history[i] = cost
        
    return theta, cost_history

In [113]:
theta_new, cost_history = gradient_descent(X, y, theta, 100000)

In [114]:
theta_new

array([ 0.08540986,  0.08502574, -0.23428835,  0.06340265, -0.00720521,
       -0.00431578,  0.00682569, -0.00255344,  0.08450778,  0.21805464,
        0.23757095,  0.38485442])

In [115]:
cost = cost_history[-1]

In [116]:
y_pred = X.dot(theta_new)

In [117]:
# Model Evaluaton -- RMSE

def rmse(y, y_pred):
    rmse = np.sqrt(sum((y-y_pred)**2)/len(y))
    return rmse

In [118]:
# Model Evaluaton -- R2 Score

def r2_score(y, y_pred):
    mean_y = np.mean(y)
    ss_tot = sum((y - mean_y)**2)
    ss_res = sum((y - y_pred)**2)
    r2 = 1 - (ss_res/ss_tot)
    return r2

In [119]:
print("RMSE:")
print(rmse(y, y_pred))

RMSE:
0.6806693530476048


In [120]:
print("R2_Score:")
print(r2_score(y, y_pred))

R2_Score:
0.289139584457651


In [122]:
y_pred[:10]

array([5.12521126, 5.23797612, 5.24172514, 5.63917574, 5.12521126,
       5.13363839, 5.09094886, 5.37710594, 5.2231687 , 5.48594476])

In [123]:
y[:10]

array([5, 5, 5, 6, 5, 5, 5, 7, 7, 5], dtype=int64)