# Learning Implementation for Stochastic Linear Regression

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error



# Observations:
* Reacts better to large bias than BatchGD
* Faster maybe
* If features have noncompareable sizes then bigger feature gets more weight


## Implementation

In [2]:
def single_update(i,X,y,y_,W,b,alpha):
    m,n = X.shape
    dJ_dW = np.zeros(n)
    for j in range(n):
        dJ_dW[j] += (y[i]-y_[i])*X[i][j]
    dJ_db = y[i]-y_[i]
    
    W += dJ_dW*alpha
    b += dJ_db*alpha
    return W,b

In [3]:
def SGD_LinearRegression(X,y, iterations = 100,alpha = 0.000001):
    """Returns W,b after updating iterations times"""
    m,n = X.shape
    W = np.zeros(n)
    b = 0
    y_ = np.matmul(X,W) +b
    
    for k in range(iterations+1):
        if k % (iterations//10) == 0:
            print(f"Iteration: {k}", f"Cost: {mean_squared_error(y,y_)}", f"Weights: {W}",f"Bias: {b}")
        #print(f"Iteration: {k}", f"Cost: {mean_squared_error(y,y_)}", f"Weights: {W}",f"Bias: {b}")
        for i in range(m):
            W,b = single_update(i,X,y,y_,W,b,alpha)
            y_ = np.matmul(X,W) + b
    return W,b

## Running Examples

In [4]:
X = np.array([[x] for x in np.arange(10,100)])
y = np.array([ -1015.48 + x*5.6  for x in np.arange(10,100)])
W,b = SGD_LinearRegression(X,y,10000,0.0002)

Iteration: 0 Cost: 525663.0650666667 Weights: [0.] Bias: 0
Iteration: 1000 Cost: 28351.593950798517 Weights: [2.35935707] Bias: -693.0439172436983
Iteration: 2000 Cost: 2859.6709774316355 Weights: [4.57079835] Bias: -913.0769410247393
Iteration: 3000 Cost: 288.4394476500768 Weights: [5.27313399] Bias: -982.9576272002469
Iteration: 4000 Cost: 29.093317244268164 Weights: [5.49619003] Bias: -1005.1511607728299
Iteration: 5000 Cost: 2.934484569189197 Weights: [5.5670308] Bias: -1012.1996452596633
Iteration: 6000 Cost: 0.2959854874800522 Weights: [5.58952925] Bias: -1014.4381862021597
Iteration: 7000 Cost: 0.029854445212339927 Weights: [5.59667458] Bias: -1015.1491284707672
Iteration: 8000 Cost: 0.0030112554046548092 Weights: [5.59894387] Bias: -1015.3749179123181
Iteration: 9000 Cost: 0.00030372894382764697 Weights: [5.59966458] Bias: -1015.4466267896253
Iteration: 10000 Cost: 3.063548551088833e-05 Weights: [5.59989347] Bias: -1015.4694009417278


In [5]:
print(W,b)
pred = np.dot(X ,W) + b
mean_squared_error(pred,y)

[5.5998936] -1015.4694130917545


3.0565288968276583e-05

! One run after last function output.

In [6]:
X = np.array([[x, x**2] for x in np.random.randint(1,50,100)])
y = np.array([5*x - 2*y  for (x,y) in X])
W,b = SGD_LinearRegression(X,y,10000,0.0000005)

Iteration: 0 Cost: 4556927.95 Weights: [0. 0.] Bias: 0
Iteration: 1000 Cost: 5.917599975618974 Weights: [ 4.72036959 -1.99174961] Bias: 0.36172562394409785
Iteration: 2000 Cost: 0.030286528972296573 Weights: [ 4.95905841 -1.99912605] Bias: 0.3782625586360989
Iteration: 3000 Cost: 0.013238641625326316 Weights: [ 4.97108741 -1.99949613] Bias: 0.37748054865035857
Iteration: 4000 Cost: 0.012967020516365448 Weights: [ 4.97180743 -1.99951665] Bias: 0.375841637556848
Iteration: 5000 Cost: 0.012845631084885704 Weights: [ 4.9719627  -1.99951972] Bias: 0.37416711201826414
Iteration: 6000 Cost: 0.012730976690940038 Weights: [ 4.97208926 -1.99952191] Bias: 0.3724979155435511
Iteration: 7000 Cost: 0.012617619697543736 Weights: [ 4.97221385 -1.99952404] Bias: 0.3708360591698918
Iteration: 8000 Cost: 0.01250528563967721 Weights: [ 4.97233782 -1.99952616] Bias: 0.36918161167031477
Iteration: 9000 Cost: 0.012393952362097005 Weights: [ 4.97246123 -1.99952828] Bias: 0.3675345450558697
Iteration: 10000 Co

In [None]:
X = np.array([[x] for x in np.random.randint(1,50,100)])
y = np.array([5.5*x + 42 for (x,) in X])
W,b = SGD_LinearRegression(X,y,10000,0.00005)

Iteration: 0 Cost: 35141.7675 Weights: [0.] Bias: 0
Iteration: 1000 Cost: 34.29916708379966 Weights: [5.85342511] Bias: 30.440406820159744
Iteration: 2000 Cost: 2.6244709913565423 Weights: [5.59776345] Bias: 38.80241828648585
Iteration: 3000 Cost: 0.20081677107949367 Weights: [5.52704305] Bias: 41.11549406146569
