# Learning Implementation for Stochastic Linear Regression
with regularization.

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error



In [2]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Observations:
* Reacts better to large bias than BatchGD
* Faster maybe
* If features have noncompareable sizes then bigger feature gets more weight
* Runs 1 step after last print  


# Query:
* Should bias update be multiplied by lr
* What diff does dividing by m makes

## Implementation

In [3]:
def l2squared(W):
    return np.sum(W**2)

In [4]:
def added_cost(Lambda,W,m):
    return Lambda*l2squared(W)/(2*m)

In [5]:
# Check for alpha and its multiplication with the reg term
def single_update(i,X,y,y_,W,b,alpha,Lambda):
    m,n = X.shape
    dJ_dW = np.zeros(n)
    for j in range(n):
        dJ_dW[j] += (y[i]-y_[i])*X[i][j]  - Lambda*W[j]
    dJ_db = y[i]-y_[i]
    
    W += dJ_dW*alpha/m
    b += dJ_db*alpha/m
    return W,b

In [6]:
def SGD_LinearRegression(X,y, iterations = 100,alpha = 0.000001,Lambda=0.0001, output_limit=10):
    """Returns W,b after updating iterations times.

    iterations: int, default=100
        Number of complete iterations through X
        
    alpha: float, default=0.000001
        Constant Learning Rate
    
    Lambda: float, default=0.0001
        Rate for l2 Regularization
        
    output_limit: int, default=10
        Number of iterations to show
    
    """
    if output_limit<=0:
        print("Choose natural output limit!")
        return None, None
    m,n = X.shape
    W = np.zeros(n)
    b = np.float128(0)
    y_ = np.matmul(X,W) +b
    
    try:
        for k in range(iterations+1):
            if k % (iterations//output_limit) == 0:
                print(f"({k//(iterations//output_limit)}/{output_limit}) > Iteration: {k}",
                      f"Cost: {mean_squared_error(y,y_) + added_cost(Lambda,W,m) }",
                      f"   Weights: {W}",
                      f"Bias: {b:.4f}"
                     )
            #print(f"Iteration: {k}", f"Cost: {mean_squared_error(y,y_)}", f"Weights: {W}",f"Bias: {b}")
            for i in range(m):
                W,b = single_update(i,X,y,y_,W,b,alpha,Lambda)
                y_ = np.matmul(X,W) + b
    except KeyboardInterrupt:
        print(f"\nTerminated! Returned: Weights: {W}, Bias: {b}")
        return W,b
    return W,b

## Running Example

In [7]:
X = np.array([[x, x**2] for x in (np.random.random(500))*10])
y = np.array([5.5*x + 2004.88*x2  + np.random.randint(0,5) for (x,x2) in X])

In [8]:
with np.printoptions(precision=4):
    W,b = SGD_LinearRegression(X,y, 1000,0.1,output_limit=20)

(0/20) > Iteration: 0 Cost: 7467993065.090451    Weights: [0. 0.] Bias: 0.0000
(1/20) > Iteration: 50 Cost: 219.55285744323575    Weights: [  23.0028 2003.2854] Bias: -38.4740
(2/20) > Iteration: 100 Cost: 78.76187122152155    Weights: [  15.915  2003.9394] Bias: -22.0461
(3/20) > Iteration: 150 Cost: 29.22126565479466    Weights: [  11.6814 2004.33  ] Bias: -12.2300
(4/20) > Iteration: 200 Cost: 11.962393345499098    Weights: [   9.1519 2004.5633] Bias: -6.3649
(5/20) > Iteration: 250 Cost: 6.056103504055336    Weights: [   7.6405 2004.7027] Bias: -2.8607
(6/20) > Iteration: 300 Cost: 4.099967523397469    Weights: [   6.7375 2004.7861] Bias: -0.7669
(7/20) > Iteration: 350 Cost: 3.4926805838300172    Weights: [   6.1979 2004.8358] Bias: 0.4841
(8/20) > Iteration: 400 Cost: 3.330279348200866    Weights: [   5.8756 2004.8656] Bias: 1.2316
(9/20) > Iteration: 450 Cost: 3.3048044575858078    Weights: [   5.6829 2004.8833] Bias: 1.6782
(10/20) > Iteration: 500 Cost: 3.315129309639066    We

In [9]:
m = make_pipeline(
    SGDRegressor(
    max_iter=1000,
    n_iter_no_change=100, 
    penalty='l2', 
    verbose=1, 
    learning_rate='constant', eta0=0.0001, 
    tol=0.001
    )
)
    
m.fit(X,y)

-- Epoch 1
Norm: 1987.56, NNZs: 2, Bias: 26.871619, T: 500, Avg. loss: 23808331.908949
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 1985.52, NNZs: 2, Bias: 17.364635, T: 1000, Avg. loss: 59705.282037
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 1985.99, NNZs: 2, Bias: 8.737986, T: 1500, Avg. loss: 50181.794766
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 1991.39, NNZs: 2, Bias: 1.177860, T: 2000, Avg. loss: 40471.488317
Total training time: 0.01 seconds.
-- Epoch 5
Norm: 1993.50, NNZs: 2, Bias: -5.674495, T: 2500, Avg. loss: 32078.222206
Total training time: 0.01 seconds.
-- Epoch 6
Norm: 1989.40, NNZs: 2, Bias: -11.730139, T: 3000, Avg. loss: 24611.183832
Total training time: 0.01 seconds.
-- Epoch 7
Norm: 1994.76, NNZs: 2, Bias: -16.928706, T: 3500, Avg. loss: 19807.526063
Total training time: 0.01 seconds.
-- Epoch 8
Norm: 1993.19, NNZs: 2, Bias: -21.541027, T: 4000, Avg. loss: 15105.164373
Total training time: 0.01 seconds.
-- Epoch 9
Norm: 1993.13, NNZs: 2, Bi

Norm: 2003.87, NNZs: 2, Bias: -26.464088, T: 79000, Avg. loss: 55.964642
Total training time: 0.17 seconds.
-- Epoch 159
Norm: 2003.98, NNZs: 2, Bias: -26.311171, T: 79500, Avg. loss: 54.856612
Total training time: 0.17 seconds.
-- Epoch 160
Norm: 2003.95, NNZs: 2, Bias: -26.161273, T: 80000, Avg. loss: 53.951919
Total training time: 0.17 seconds.
-- Epoch 161
Norm: 2003.92, NNZs: 2, Bias: -26.011579, T: 80500, Avg. loss: 53.447724
Total training time: 0.17 seconds.
-- Epoch 162
Norm: 2003.93, NNZs: 2, Bias: -25.862759, T: 81000, Avg. loss: 53.217761
Total training time: 0.18 seconds.
-- Epoch 163
Norm: 2003.98, NNZs: 2, Bias: -25.717292, T: 81500, Avg. loss: 52.732304
Total training time: 0.18 seconds.
-- Epoch 164
Norm: 2003.94, NNZs: 2, Bias: -25.573091, T: 82000, Avg. loss: 52.569316
Total training time: 0.18 seconds.
-- Epoch 165
Norm: 2003.86, NNZs: 2, Bias: -25.430869, T: 82500, Avg. loss: 51.902919
Total training time: 0.18 seconds.
-- Epoch 166
Norm: 2004.01, NNZs: 2, Bias: -2

-- Epoch 350
Norm: 2004.54, NNZs: 2, Bias: -8.238684, T: 175000, Avg. loss: 8.620471
Total training time: 0.38 seconds.
-- Epoch 351
Norm: 2004.53, NNZs: 2, Bias: -8.184552, T: 175500, Avg. loss: 8.540189
Total training time: 0.38 seconds.
-- Epoch 352
Norm: 2004.53, NNZs: 2, Bias: -8.129029, T: 176000, Avg. loss: 8.492905
Total training time: 0.38 seconds.
-- Epoch 353
Norm: 2004.55, NNZs: 2, Bias: -8.073202, T: 176500, Avg. loss: 8.340800
Total training time: 0.38 seconds.
-- Epoch 354
Norm: 2004.55, NNZs: 2, Bias: -8.019555, T: 177000, Avg. loss: 8.326819
Total training time: 0.38 seconds.
-- Epoch 355
Norm: 2004.53, NNZs: 2, Bias: -7.965601, T: 177500, Avg. loss: 8.181931
Total training time: 0.38 seconds.
-- Epoch 356
Norm: 2004.54, NNZs: 2, Bias: -7.909784, T: 178000, Avg. loss: 7.943551
Total training time: 0.38 seconds.
-- Epoch 357
Norm: 2004.54, NNZs: 2, Bias: -7.857192, T: 178500, Avg. loss: 8.074603
Total training time: 0.38 seconds.
-- Epoch 358
Norm: 2004.54, NNZs: 2, Bia

Norm: 2004.75, NNZs: 2, Bias: -1.204036, T: 279500, Avg. loss: 1.969064
Total training time: 0.58 seconds.
-- Epoch 560
Norm: 2004.77, NNZs: 2, Bias: -1.184208, T: 280000, Avg. loss: 1.917576
Total training time: 0.58 seconds.
-- Epoch 561
Norm: 2004.77, NNZs: 2, Bias: -1.165717, T: 280500, Avg. loss: 1.972345
Total training time: 0.58 seconds.
-- Epoch 562
Norm: 2004.79, NNZs: 2, Bias: -1.146689, T: 281000, Avg. loss: 1.917507
Total training time: 0.58 seconds.
-- Epoch 563
Norm: 2004.79, NNZs: 2, Bias: -1.128243, T: 281500, Avg. loss: 1.907594
Total training time: 0.58 seconds.
-- Epoch 564
Norm: 2004.77, NNZs: 2, Bias: -1.110382, T: 282000, Avg. loss: 1.962328
Total training time: 0.59 seconds.
-- Epoch 565
Norm: 2004.75, NNZs: 2, Bias: -1.091268, T: 282500, Avg. loss: 1.883180
Total training time: 0.59 seconds.
-- Epoch 566
Norm: 2004.77, NNZs: 2, Bias: -1.073368, T: 283000, Avg. loss: 1.959379
Total training time: 0.59 seconds.
-- Epoch 567
Norm: 2004.76, NNZs: 2, Bias: -1.055096,

Norm: 2004.87, NNZs: 2, Bias: 1.060621, T: 376000, Avg. loss: 1.287707
Total training time: 0.78 seconds.
-- Epoch 753
Norm: 2004.85, NNZs: 2, Bias: 1.067248, T: 376500, Avg. loss: 1.243783
Total training time: 0.78 seconds.
-- Epoch 754
Norm: 2004.85, NNZs: 2, Bias: 1.074030, T: 377000, Avg. loss: 1.232833
Total training time: 0.78 seconds.
-- Epoch 755
Norm: 2004.87, NNZs: 2, Bias: 1.081294, T: 377500, Avg. loss: 1.264187
Total training time: 0.78 seconds.
-- Epoch 756
Norm: 2004.85, NNZs: 2, Bias: 1.087407, T: 378000, Avg. loss: 1.242126
Total training time: 0.78 seconds.
-- Epoch 757
Norm: 2004.84, NNZs: 2, Bias: 1.094895, T: 378500, Avg. loss: 1.190232
Total training time: 0.78 seconds.
-- Epoch 758
Norm: 2004.87, NNZs: 2, Bias: 1.102170, T: 379000, Avg. loss: 1.253499
Total training time: 0.79 seconds.
-- Epoch 759
Norm: 2004.82, NNZs: 2, Bias: 1.107727, T: 379500, Avg. loss: 1.247513
Total training time: 0.79 seconds.
-- Epoch 760
Norm: 2004.83, NNZs: 2, Bias: 1.114121, T: 38000

In [10]:
m[0].coef_

array([   5.61081179, 2004.88725643])