# Learning Implementation for Batch Gradient Descent

Todo:
* Add Testing

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression

### Batch Gradient Descent

In [2]:
def single_update(X, y, y_, W, b, alpha,m):
    """Returns W and b after training once through the dataset.
    """
    
    res = y - y_ # Residuals

    dJ_dW = np.dot(X.T, res) / m
    dJ_db = np.mean(res)

    W += alpha * dJ_dW
    b += alpha * dJ_db

    return W, b

In [3]:
def GD_LinearRegression(X, y, iterations = 100, alpha = 0.001):
    """Returns W,b after training lr using full batch gradient descent.
    
    Parameter
    ---------
    iterations: int, default=100
        Number of complete iterations through X.
        
    alpha: float, default=0.000001
        Constant Learning Rate.
    
    Returns
    -------
    W: ndarray
        Optimized weights.
    b: float
        Optimized intercept.
        
    """
    
    m, n = X.shape
    W = np.zeros(n) # Zero Initialization
    b = 0
    
    for k in range(iterations+1): # +1 for printing facilitation
        y_ = np.dot(X, W) + b
            
        if k % (iterations//10) == 0:
            print(f"Iteration: {k} > ",
                  f"Cost: {mean_squared_error(y, y_)}",
                  f" Weights:{W}",
                  f"Bias: {b}")

        W,b = single_update(X,y,y_, W, b,alpha,m)
        
    return W,b

### Example Usage

In [4]:
X = np.random.rand(1000, 2)
y = 5*X[:,0] - 4*X[:,1] + 20
with np.printoptions(precision=4):
    GD_LinearRegression(X,y,500, 0.2)

Iteration: 0 >  Cost: 421.13196037178534  Weights:[0. 0.] Bias: 0
Iteration: 50 >  Cost: 3.0129087295959236  Weights:[6.7405 1.6779] Bias: 16.059920135182356
Iteration: 100 >  Cost: 0.9034764806010332  Weights:[ 6.2882 -1.0161] Bias: 17.735110467960556
Iteration: 150 >  Cost: 0.28180352933360386  Weights:[ 5.8615 -2.4034] Bias: 18.698520377685796
Iteration: 200 >  Cost: 0.0900289729282785  Weights:[ 5.5461 -3.1326] Bias: 19.252318920294044
Iteration: 250 >  Cost: 0.029163452313427228  Weights:[ 5.3353 -3.5228] Bias: 19.5705489718718
Iteration: 300 >  Cost: 0.009520876090697478  Weights:[ 5.2017 -3.7348] Bias: 19.753367200700765
Iteration: 350 >  Cost: 0.003121635045466223  Weights:[ 5.1196 -3.8515] Bias: 19.85837371234889
Iteration: 400 >  Cost: 0.0010259090144777566  Weights:[ 5.0703 -3.9163] Bias: 19.918678672291982
Iteration: 450 >  Cost: 0.00033759151999810457  Weights:[ 5.0411 -3.9526] Bias: 19.953308137983683
Iteration: 500 >  Cost: 0.00011116701776645924  Weights:[ 5.0239 -3.973

In [5]:
X, y = make_regression(n_samples=1000, n_features=2, noise=0.1, random_state=0)
with np.printoptions(precision=4):
    GD_LinearRegression(X,y+10,1000, 0.5)

Iteration: 0 >  Cost: 3182.6390476828487  Weights:[0. 0.] Bias: 0
Iteration: 100 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 200 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 300 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 400 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 500 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 600 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 700 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 800 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 900 >  Cost: 0.009059278733004392  Weights:[41.0888 40.0554] Bias: 9.995484684435944
Iteration: 1000 >  Cost: 0.009059278733004392  Weights:[41.0888 40.055