# Improved Implementation for Stochastic Linear Regression
with regularization.\
with efficiency improvements.\
Recommended to check cost after convergence.

In [1]:
import numpy as np

In [2]:
from scripts.linear_regression import LinearRegression

In [3]:
?LinearRegression.fit

[0;31mSignature:[0m
[0mLinearRegression[0m[0;34m.[0m[0mfit[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mX[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0my[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mepochs[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malpha[0m[0;34m=[0m[0;36m0.01[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mLambda[0m[0;34m=[0m[0;36m0.0001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0merror_threshold[0m[0;34m=[0m[0;36m0.001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mvalidation_size[0m[0;34m=[0m[0;36m0.2[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moutput_limit[0m[0;34m=[0m[0;36m10[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Fit the linear regression model to the given data.

Parameter
---------
epochs: int, default=1000
    Number of complete iterations through X

alpha : float, default=0.01
    Constant Le

## Usage

In [4]:
m = LinearRegression()
X = np.random.rand(1000,2)
y = 5.55*X[:,0] + 11.22*X[:,1] + 50
m.fit(X, y ,epochs= 1000, alpha = 0.2, error_threshold = 0.00001, output_limit=10)

(0/10) > Epoch: 0 cost: 3290.88951321 vCost: 3292.58096256
(1/10) > Epoch: 100 cost: 0.60022780 vCost: 0.66980475
(2/10) > Epoch: 200 cost: 0.02596417 vCost: 0.02915099
(3/10) > Epoch: 300 cost: 0.00135797 vCost: 0.00153047
       * Epoch: 360 vCost: 0.00034356

Epoch 360 > vCost Converged with threshold 1e-05. Or performance degraded.


(array([ 5.5235, 11.1719]), 50.04508499125764)

In [5]:
from sklearn.datasets import make_regression

In [6]:
X, y = make_regression(n_samples=1000,n_features=20, n_informative=19)
m = LinearRegression()
m.fit(X, y ,epochs= 100, alpha = 0.5, error_threshold = 0.01, output_limit=10)

(0/10) > Epoch: 0 cost: 66201.18675244 vCost: 67411.43261039
(1/10) > Epoch: 10 cost: 7.31509916 vCost: 7.76301295
(2/10) > Epoch: 20 cost: 0.00827110 vCost: 0.00844575
       * Epoch: 28 vCost: 0.00515609

Epoch 28 > vCost Converged with threshold 0.01. Or performance degraded.


(array([1.6202e-03, 3.6319e+01, 8.0540e+01, 8.9729e+01, 2.0123e+00,
        9.0274e+01, 7.6121e+01, 5.3553e+01, 1.4814e+01, 8.5510e+01,
        1.0999e+01, 1.8179e+01, 5.9873e+01, 7.0130e+01, 5.1684e+01,
        9.4293e+01, 2.0486e+01, 3.1294e+01, 3.2769e+01, 6.8133e+01]),
 -0.013485729993408191)

In [7]:
from sklearn.datasets import fetch_california_housing

In [8]:
X,y = fetch_california_housing(return_X_y=True)
X = X[:1000]
y = y[:1000]

In [9]:
m = LinearRegression()
m.fit(X, y ,epochs= 1000, alpha = 0.0001, error_threshold = 1/1000, validation_size=1/3 ,output_limit=10)

(0/10) > Epoch: 0 cost: 26622.88485666 vCost: 72914.10736218
(1/10) > Epoch: 100 cost: 10.06995948 vCost: 13.77101287
(2/10) > Epoch: 200 cost: 3.90297015 vCost: 6.00731887
(3/10) > Epoch: 300 cost: 2.23724824 vCost: 3.59325967
(4/10) > Epoch: 400 cost: 1.76365068 vCost: 2.74865372
(5/10) > Epoch: 500 cost: 1.60991848 vCost: 2.40157684
(6/10) > Epoch: 600 cost: 1.54441812 vCost: 2.22941997
       * Epoch: 664 vCost: 2.15865535

Epoch 664 > vCost Converged with threshold 0.001. Or performance degraded.


(array([ 5.7298e-01, -2.5039e-03,  3.0372e-01,  7.1983e-01,  2.6108e-04,
         9.2114e-01,  5.7559e-01,  2.2344e-01]),
 0.8296507702577295)