# Learning Implementation for Batch Gradient Descent

In [3]:
import numpy as np
from sklearn.metrics import mean_squared_error
import random
random.seed = 1000

Issues Recognized:
 * Bias Changes Slowly: large bias -> Slow convergence
 * Compares well with sklearn with small no. of features
 * More than 6 features -> Slow convergence
 * large values in X -> more error in y

### Batch Gradient Descent

X: (m, n), \
y: (m), \
parameters: w,b: (m), (1)

In [4]:
def single_update(X, y, y_, W, b, alpha):
    m, n = X.shape
    dJ_dW = np.random.rand(n)
    dJ_db = np.random.rand()
    for i in range(m):
        for j in range(n):
            dJ_dW[j] += (y_[i] - y[i])*X[i][j] 
        dJ_db += (y_[i] - y[i])

    W -= (alpha * dJ_dW)/m
    b -= (alpha * dJ_db)/m
    return (W,b)

In [48]:
def GD_LinearRegression(X, y, iterations = 100, alpha = 0.0001):
    m, n = X.shape
    W = np.zeros(n)
    b = 0
    k = 0
    while k<=iterations:
        y_ = np.matmul(X, W) + b
        if k % (iterations//10) == 0:
            print(f"Interation {k} Weights:{W}",f"Bias: {b}", "Cost: ", mean_squared_error(y, y_))
        k+=1
        W,b = single_update(X,y,y_, W, b,alpha)
    return W,b

### Running Tests

In [59]:
X = np.array([[x] for x in np.arange(500,1000)])
y = np.array([x*100  for x in np.arange(500,1000)])

In [62]:
W,b =  GD_LinearRegression(X,y,500, 0.0000005)

Interation 0 Weights:[0.] Bias: 0 Cost:  5825835000.0
Interation 50 Weights:[99.99983115] Bias: 0.12865073726804216 Cost:  0.0005983637336139651
Interation 100 Weights:[99.99983449] Bias: 0.12865060232834455 Cost:  0.0005918647778524105
Interation 150 Weights:[99.99983449] Bias: 0.12865046123373008 Cost:  0.0005918634782688594
Interation 200 Weights:[99.99983449] Bias: 0.1286503234262163 Cost:  0.0005918622101841862
Interation 250 Weights:[99.99983449] Bias: 0.12865018616422247 Cost:  0.0005918609457392604
Interation 300 Weights:[99.99983449] Bias: 0.1286500463694985 Cost:  0.0005918596600490625
Interation 350 Weights:[99.99983449] Bias: 0.12864990506544974 Cost:  0.0005918583610758914
Interation 400 Weights:[99.99983449] Bias: 0.12864976559414015 Cost:  0.0005918570773541645
Interation 450 Weights:[99.99983449] Bias: 0.12864962729768437 Cost:  0.0005918558073410754
Interation 500 Weights:[99.99983449] Bias: 0.1286494879090824 Cost:  0.0005918545232997878


In [63]:
pred = np.matmul(X,W) + b


### Comparison with LinearRegression from sklearn


In [64]:
from sklearn.linear_model import LinearRegression

In [66]:
m = LinearRegression().fit(X,y)
pred_m = m.predict(X)
mean_squared_error(y, pred_m),mean_squared_error(y,pred)

(7.220955875342911e-23, 0.0005918544976746625)