# Learning Implementation for Batch Gradient Descent

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression

### Batch Gradient Descent

In [2]:
def single_update(X, y, y_, W, b, alpha,m):
    """Returns W and b after training once through the dataset.
    """
    
    res = y - y_ # Residuals

    dJ_dW = np.dot(X.T, res) / m
    dJ_db = np.mean(res)

    W += alpha * dJ_dW
    b += alpha * dJ_db

    return W, b

In [3]:
def GD_LinearRegression(X, y, iterations = 100, alpha = 0.001):
    """Returns W,b after training lr using full batch gradient descent.
    
    Parameter
    ---------
    iterations: int, default=100
        Number of complete iterations through X.
        
    alpha: float, default=0.000001
        Constant Learning Rate.
    
    Returns
    -------
    W: ndarray
        Optimized weights.
    b: float
        Optimized intercept.
        
    """
    
    m, n = X.shape
    W = np.zeros(n) # Zero Initialization
    b = 0
    
    for k in range(iterations+1): # +1 for printing facilitation
        y_ = np.dot(X, W) + b
            
        if k % (iterations//10) == 0:
            print(f"Iteration: {k} > ",
                  f"Cost: {mean_squared_error(y, y_)}",
                  f" Weights:{W}",
                  f"Bias: {b}")

        W,b = single_update(X,y,y_, W, b,alpha,m)
        
    return W,b

### Example Usage

In [4]:
X = np.random.rand(1000, 2)
y = 5*X[:,0] - 4*X[:,1] + 20
with np.printoptions(precision=4):
    GD_LinearRegression(X,y,500, 0.2)

Iteration: 0 >  Cost: 421.4565116053958  Weights:[0. 0.] Bias: 0
Iteration: 50 >  Cost: 3.017846223001017  Weights:[6.5555 1.8221] Bias: 16.12455906149543
Iteration: 100 >  Cost: 0.9193569760104219  Weights:[ 6.1029 -0.8772] Bias: 17.78204195923754
Iteration: 150 >  Cost: 0.2858890945419459  Weights:[ 5.7239 -2.3081] Bias: 18.732802162255872
Iteration: 200 >  Cost: 0.09011624395480997  Weights:[ 5.4544 -3.0753] Bias: 19.276958842639747
Iteration: 250 >  Cost: 0.028654289954405242  Weights:[ 5.2773 -3.4909] Bias: 19.58786624574116
Iteration: 300 >  Cost: 0.009161335218302391  Weights:[ 5.166 -3.718] Bias: 19.765269686079215
Iteration: 350 >  Cost: 0.0029390837938868116  Weights:[ 5.0981 -3.843 ] Bias: 19.866391954313357
Iteration: 400 >  Cost: 0.0009448936429752958  Weights:[ 5.0574 -3.9123] Bias: 19.923987016778195
Iteration: 450 >  Cost: 0.00030417177344535605  Weights:[ 5.0333 -3.9508] Bias: 19.956770454054663
Iteration: 500 >  Cost: 9.799452056248919e-05  Weights:[ 5.0193 -3.9723] B

In [5]:
X, y = make_regression(n_samples=1000, n_features=2, noise=0.1, random_state=0)
with np.printoptions(precision=4):
    GD_LinearRegression(X,y+10,1000, 0.5)

Iteration: 0 >  Cost: 3182.6390476828487  Weights:[0. 0.] Bias: 0
Iteration: 100 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 200 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 300 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 400 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 500 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 600 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 700 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 800 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 900 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 1000 >  Cost: 0.0090592787330044  Weights:[41.0888 40.0554] Bias: 9.995484684