# Stochastic Gradient Descent (SGD) for Linear Regression with L2-regularization
<br>
<b>Name:</b> Rohan Pillai<br>
<b>Unity ID:</b> rspillai<br>

### Import Files

In [1]:
import numpy as np

### Prediction Function

In [2]:
def predict(X, coefficients):
    """
    Input:
        X: Data Row (np.Array)
        coefficients: Coefficients Array (np.Array)
        
    Output:
        Predicted Value for the given Data Row. (float)
    """
    
    #Remove any NAN values from the coefficients to avoid errors.
    coefficients = np.nan_to_num(coefficients)
    
    intercept = coefficients[0]
    predictedValue = 0.0
    
    #Note 1: Skip last value of X as it is the actual Y value.
    #Note 2: Skip coefficients[0] because it is intercept value.
    for i in range(len(X)-1):
        predictedValue += X[i]*coefficients[i+1]
        
    return predictedValue+intercept

### Gradient Descent Function

In [3]:
def updateCoefficients(X, learningRate, epochs, l2):
    """
    Input:
        X: Data Set (np.Array)
        learningRate: Learning Rate for the gradient descent (float)
        epochs: Number of iterations (int)
        l2: Regularization Constant Lambda (float)
        
    Output:
        coefficients: Array of coefficients (np.Array)
    """
    
    #Initialize the coefficients array to all 0s
    coefficients = np.array([0.0]*len(X[0]))

    #Run for all iterations.
    for epoch in range(epochs):
        
        #Calculate squareError for all iterations.
        squaredError=0
        
        for i in X:      
            predictedValue = predict(i, coefficients)
            error = predictedValue - i[-1]

            squaredError+=error**2
            
            #Update the coefficients.
            coefficients[0]-= learningRate*error
            for j in range(len(i)-1):
                # Updating the coefficients based on L2 Regularization
                coefficients[j+1]-= learningRate*(error*i[j] + l2*coefficients[j+1])
        
        squaredError/=(X.shape[0])
        rms = np.sqrt(squaredError)
        print("Epoch: {}\tRoot Mean Squared Error: {}".format(epoch, squaredError))
        
    return coefficients

### Main Function

In [4]:
def linearRegression(X_train, learningRate, epochs, l2):
    """
    Input:
        X: Data Set (np.Array)
        learningRate: Learning Rate for the gradient descent (float)
        epochs: Number of iterations (int)
        l2: Regularization Constant Lambda (float)
        
    Output:
        coefficients: Array of coefficients (np.Array)
        
    """
    coefficients = updateCoefficients(X_train, learningRate, epochs, l2)
    return coefficients

## Testing

### Training
Train the Linear Regression Function using Sklearn's Boston Dataset

In [5]:
#Import Files
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

#Load the Boston Dataset
X, Y = load_boston(return_X_y=True)

#Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

#Normalise the dataset
#Necessary, otherwise Coefficients throw NAN/INF error due to large values.
train_min = X_train.min(0)
train_max = X_train.max(0)
X_train = (X_train - train_min)/(train_max - train_min)
X_test = (X_test - train_min)/(train_max - train_min)

#Combine the Training X & Y set
dataset = np.insert(X_train, X_train.shape[1], values=Y_train, axis=1)

#Calling the Fit function
coefficients = linearRegression(dataset, 0.01, 500, 0.01)

print("\nIntercept:{}\nCoefficients: {}".format(coefficients[0],coefficients[1:]))

Epoch: 0	Root Mean Squared Error: 106.17661013833626
Epoch: 1	Root Mean Squared Error: 54.647358530589635
Epoch: 2	Root Mean Squared Error: 46.128949505901026
Epoch: 3	Root Mean Squared Error: 41.53308945049576
Epoch: 4	Root Mean Squared Error: 38.39489111772134
Epoch: 5	Root Mean Squared Error: 36.09979694592487
Epoch: 6	Root Mean Squared Error: 34.36280120683172
Epoch: 7	Root Mean Squared Error: 33.016075655193916
Epoch: 8	Root Mean Squared Error: 31.95174358354047
Epoch: 9	Root Mean Squared Error: 31.09711117469757
Epoch: 10	Root Mean Squared Error: 30.401526072909917
Epoch: 11	Root Mean Squared Error: 29.828736194558253
Epoch: 12	Root Mean Squared Error: 29.35220511892027
Epoch: 13	Root Mean Squared Error: 28.952130564747645
Epoch: 14	Root Mean Squared Error: 28.613488372987803
Epoch: 15	Root Mean Squared Error: 28.32471477581022
Epoch: 16	Root Mean Squared Error: 28.076797233391456
Epoch: 17	Root Mean Squared Error: 27.862633558642532
Epoch: 18	Root Mean Squared Error: 27.67657146

Epoch: 155	Root Mean Squared Error: 25.992055254204967
Epoch: 156	Root Mean Squared Error: 25.992011560437327
Epoch: 157	Root Mean Squared Error: 25.991969560332993
Epoch: 158	Root Mean Squared Error: 25.991929182679918
Epoch: 159	Root Mean Squared Error: 25.991890359544893
Epoch: 160	Root Mean Squared Error: 25.99185302610886
Epoch: 161	Root Mean Squared Error: 25.991817120511122
Epoch: 162	Root Mean Squared Error: 25.99178258370214
Epoch: 163	Root Mean Squared Error: 25.99174935930397
Epoch: 164	Root Mean Squared Error: 25.99171739347813
Epoch: 165	Root Mean Squared Error: 25.99168663480081
Epoch: 166	Root Mean Squared Error: 25.991657034144332
Epoch: 167	Root Mean Squared Error: 25.991628544565252
Epoch: 168	Root Mean Squared Error: 25.991601121197938
Epoch: 169	Root Mean Squared Error: 25.991574721154105
Epoch: 170	Root Mean Squared Error: 25.991549303427604
Epoch: 171	Root Mean Squared Error: 25.99152482880369
Epoch: 172	Root Mean Squared Error: 25.991501259773756
Epoch: 173	Root 

Epoch: 306	Root Mean Squared Error: 25.99086158792411
Epoch: 307	Root Mean Squared Error: 25.990861361844207
Epoch: 308	Root Mean Squared Error: 25.99086114314922
Epoch: 309	Root Mean Squared Error: 25.990860931596906
Epoch: 310	Root Mean Squared Error: 25.99086072695321
Epoch: 311	Root Mean Squared Error: 25.990860528991536
Epoch: 312	Root Mean Squared Error: 25.990860337493068
Epoch: 313	Root Mean Squared Error: 25.990860152245947
Epoch: 314	Root Mean Squared Error: 25.990859973045396
Epoch: 315	Root Mean Squared Error: 25.99085979969337
Epoch: 316	Root Mean Squared Error: 25.990859631998443
Epoch: 317	Root Mean Squared Error: 25.990859469775344
Epoch: 318	Root Mean Squared Error: 25.99085931284493
Epoch: 319	Root Mean Squared Error: 25.99085916103402
Epoch: 320	Root Mean Squared Error: 25.990859014175136
Epoch: 321	Root Mean Squared Error: 25.9908588721062
Epoch: 322	Root Mean Squared Error: 25.99085873467055
Epoch: 323	Root Mean Squared Error: 25.990858601716713
Epoch: 324	Root Mea

Epoch: 457	Root Mean Squared Error: 25.990854697671182
Epoch: 458	Root Mean Squared Error: 25.990854696137436
Epoch: 459	Root Mean Squared Error: 25.990854694653525
Epoch: 460	Root Mean Squared Error: 25.990854693217763
Epoch: 461	Root Mean Squared Error: 25.99085469182865
Epoch: 462	Root Mean Squared Error: 25.99085469048464
Epoch: 463	Root Mean Squared Error: 25.990854689184257
Epoch: 464	Root Mean Squared Error: 25.99085468792614
Epoch: 465	Root Mean Squared Error: 25.990854686708893
Epoch: 466	Root Mean Squared Error: 25.99085468553111
Epoch: 467	Root Mean Squared Error: 25.99085468439159
Epoch: 468	Root Mean Squared Error: 25.990854683289108
Epoch: 469	Root Mean Squared Error: 25.990854682222402
Epoch: 470	Root Mean Squared Error: 25.990854681190374
Epoch: 471	Root Mean Squared Error: 25.990854680191813
Epoch: 472	Root Mean Squared Error: 25.99085467922573
Epoch: 473	Root Mean Squared Error: 25.99085467829098
Epoch: 474	Root Mean Squared Error: 25.990854677386604
Epoch: 475	Root M

### Predicting
Predicting the Boston dataset with the built Linear Regression Model

In [6]:
mseError = 0

#Combining the testing X and Y values
testDataset = np.insert(X_test,X_test.shape[1], values=Y_test, axis=1)

print("Data Sample\tPredicted\tActual\t\tDifference")

#Calling the Prediction function
for i,dataRow in enumerate(testDataset):
    yHat = round(predict(dataRow, coefficients),2)
    print("{}\t\t{}\t\t{}\t\t{}".format(i, yHat, Y_test[i], round(yHat-Y_test[i],2)))
    mseError+= ((yHat-Y_test[i])**2)
    
mseError/=len(X_test)
print("MSE Error: {}".format(mseError))

Data Sample	Predicted	Actual		Difference
0		27.71		23.6		4.11
1		35.22		32.4		2.82
2		16.17		13.6		2.57
3		25.27		22.8		2.47
4		18.56		16.1		2.46
5		22.89		20.0		2.89
6		18.92		17.8		1.12
7		14.66		14.0		0.66
8		21.31		19.6		1.71
9		21.03		16.8		4.23
10		25.74		21.5		4.24
11		21.08		18.9		2.18
12		-0.16		7.0		-7.16
13		22.25		21.2		1.05
14		20.49		18.5		1.99
15		23.96		29.8		-5.84
16		19.23		18.8		0.43
17		8.12		10.2		-2.08
18		37.46		50.0		-12.54
19		17.76		14.1		3.66
20		26.77		25.2		1.57
21		28.56		29.1		-0.54
22		13.7		12.7		1.0
23		23.56		22.4		1.16
24		17.88		14.2		3.68
25		15.85		13.8		2.05
26		23.21		20.3		2.91
27		15.12		14.9		0.22
28		24.01		21.7		2.31
29		20.22		18.3		1.92
30		22.54		23.1		-0.56
31		25.17		23.8		1.37
32		23.87		15.0		8.87
33		18.59		20.8		-2.21
34		16.09		19.1		-3.01
35		19.5		19.4		0.1
36		30.16		34.7		-4.54
37		21.57		19.5		2.07
38		25.46		24.4		1.06
39		24.79		23.4		1.39
40		15.82		19.7		-3.88
41		29.46		28.2		1.26
42		38.43		50.0		-11.57
43		19.82		17.4	