# Learning Implementation for Stochastic Linear Regression
with regularization.

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Observations:
* Reacts better to large bias than BatchGD
* If features have noncompareable sizes then bigger feature gets more weight
* Runs 1 step after last print  


# Query:
* Should bias update be multiplied by lr
* What diff does dividing by m makes

## Implementation

In [3]:
def added_cost(Lambda,W,m):
    """Returns added cost of regularization.
    """
    
    return (np.sum(W**2)) * Lambda/(2*m)

In [4]:
# Check for alpha and its multiplication with the reg term
def single_update(i,X,y,y_,W,b,alpha,Lambda):
    """Returns W,b after single update of ith data point.
    """
    m,n = X.shape
    res = y[i]-y_[i]
        
    dJ_dW = np.dot(res,X[i])  - Lambda*W
    dJ_db = res.mean()
    
    W += dJ_dW*alpha/m
    b += (y[i]-y_[i])*alpha

    return W,b

In [5]:
def SGD_LinearRegression(X,y, iterations = 100,alpha = 0.000001,Lambda=0.0001, output_limit=10):
    """Returns W,b after training lr using stochastic gradient descent with regularization applied.
    
    Parameter
    ---------
    iterations: int, default=100
        Number of complete iterations through X
        
    alpha: float, default=0.000001
        Constant Learning Rate
    
    Lambda: float, default=0.0001
        Rate for l2 Regularization
        
    output_limit: int, default=10
        Number of iterations to show
        
    Returns
    -------
    W: numpy.ndarray
        The optimized weights.
    b: numpy.longdouble
        The optimized itercept.
    """
    
    if output_limit<=0:
        print("Choose natural output limit!")
        return None, None
    m,n = X.shape
    W = np.zeros(n)
    b = np.float128(0)
    
    try:
        for k in range(iterations+1):
            y_ = np.dot(X,W) +b
            
            if k % (iterations//output_limit) == 0:
                print(f"({k//(iterations//output_limit)}/{output_limit}) > Iteration: {k}",
                      f"Cost: {mean_squared_error(y,y_) + added_cost(Lambda,W,m) }",
                      f"   Weights: {W}",
                      f"Bias: {b:.4f}"
                     )
                
            for i in range(m):
                W,b = single_update(i,X,y,y_,W,b,alpha,Lambda)
                y_ = np.dot(X,W) + b
    except KeyboardInterrupt:
        print(f"\nTerminated! Returned: Weights: {W}, Bias: {b}")
        return W,b
    return W,b

## Running Example

In [6]:
X = np.random.rand(50,3)
y = 5*X[:,0] + 11*X[:,1] + 5

In [7]:
with np.printoptions(precision=4):
    W,b = SGD_LinearRegression(X,y, 1000,0.25,output_limit=15)

(0/15) > Iteration: 0 Cost: 191.90536503310975    Weights: [0. 0. 0.] Bias: 0.0000
(1/15) > Iteration: 66 Cost: 0.9953610503431713    Weights: [3.1967 7.7181 0.136 ] Bias: 7.4594
(2/15) > Iteration: 132 Cost: 0.10336241976122358    Weights: [ 4.3433 10.0057  0.0448] Bias: 5.7822
(3/15) > Iteration: 198 Cost: 0.011415146594860922    Weights: [4.7609e+00 1.0694e+01 8.1587e-03] Bias: 5.2560
(4/15) > Iteration: 264 Cost: 0.0015311334988053506    Weights: [ 4.9105e+00  1.0900e+01 -7.1853e-04] Bias: 5.0892
(5/15) > Iteration: 330 Cost: 0.0003695470986607692    Weights: [ 4.9634e+00  1.0961e+01 -1.4799e-03] Bias: 5.0361
(6/15) > Iteration: 396 Cost: 0.00020603884385756658    Weights: [ 4.9820e+00  1.0978e+01 -7.8539e-04] Bias: 5.0191
(7/15) > Iteration: 462 Cost: 0.00017594916936634246    Weights: [ 4.9885e+00  1.0983e+01 -1.9530e-04] Bias: 5.0137
(8/15) > Iteration: 528 Cost: 0.00016883708161496378    Weights: [4.9907e+00 1.0985e+01 1.2906e-04] Bias: 5.0120
(9/15) > Iteration: 594 Cost: 0.00

In [None]:
# Using sklearn SGDRegressor
m = make_pipeline(
    SGDRegressor(
    max_iter=1000,
    n_iter_no_change=100, 
    penalty='l2', 
    verbose=1, 
    learning_rate='constant', eta0=0.01, 
    tol=0.001
    )
)
    
m.fit(X,y)

In [9]:
m[0].coef_

array([ 4.94568172, 10.92544288, -0.01962791])