# Improved Implementation for Stochastic Linear Regression
with regularization.\
with efficiency improvements.\
Recommended to check cost after convergence.

In [9]:
import numpy as np

In [10]:
from algorithms import LinearRegression

In [11]:
help(LinearRegression.fit)

Help on function fit in module algorithms.linear_regression:

fit(self, X, y, epochs=100, alpha=0.01, Lambda=0.0001, error_threshold=0.001, validation_size=0.2, output_limit=10)
    Fit the linear regression model to the training data.
    
    Parameters
    ----------
    X : numpy.ndarray
        Feature matrix for training.
    y : numpy.ndarray
        Target values.
    epochs : int, optional
        Number of iterations over the training set (default is DEFAULT_EPOCHS).
    alpha : float, optional
        Learning rate for gradient descent (default is DEFAULT_ALPHA).
    Lambda : float, optional
        Regularization rate for L2 regularization (default is DEFAULT_LAMBDA).
    error_threshold : float, optional
        Minimal decrease in cost to avoid early stopping (default is DEFAULT_ERROR_THRESHOLD).
    validation_size : float, optional
        Fraction of the data to reserve for validation (default is DEFAULT_VALIDATION_SIZE).
    output_limit : int, optional
        Freque

## Usage

In [12]:
m = LinearRegression()
X = np.random.rand(1000,2)
y = 5.55*X[:,0] + 11.22*X[:,1] + 50
m.fit(X, y ,epochs= 1000, alpha = 0.2, error_threshold = 0.00001, output_limit=10)

(0/10) > Epoch: 0 cost: 3312.03793210 vCost: 3366.56602828
(1/10) > Epoch: 100 cost: 0.50846949 vCost: 0.49097533
(2/10) > Epoch: 200 cost: 0.02054858 vCost: 0.01991962
(3/10) > Epoch: 300 cost: 0.00102947 vCost: 0.00100105
       * Epoch: 344 vCost: 0.00032685

Epoch 344 > Validation cost converged within threshold 1e-05 or performance degraded.


(array([ 5.5221, 11.17  ]), np.float64(50.04611032535716))

In [13]:
from sklearn.datasets import make_regression

In [14]:
X, y = make_regression(n_samples=1000,n_features=20, n_informative=19)
m = LinearRegression()
m.fit(X, y ,epochs= 100, alpha = 0.5, error_threshold = 0.01, output_limit=10)

(0/10) > Epoch: 0 cost: 64064.48329779 vCost: 50283.80112023
(1/10) > Epoch: 10 cost: 7.51719903 vCost: 7.01634490
(2/10) > Epoch: 20 cost: 0.00807893 vCost: 0.00782682
       * Epoch: 28 vCost: 0.00475715

Epoch 28 > Validation cost converged within threshold 0.01 or performance degraded.


(array([1.6726e+01, 9.7937e+01, 7.1087e+01, 3.0723e-03, 2.3855e+01,
        8.5078e+01, 9.4439e+01, 4.5841e+01, 1.3823e+01, 5.4478e+01,
        9.4214e+01, 5.8567e+00, 1.4339e+01, 9.1789e+01, 7.2753e+01,
        6.5580e+01, 3.3559e+01, 2.6675e+01, 2.1721e+01, 8.9356e+00]),
 np.float64(-0.0017604227477098985))

In [15]:
from sklearn.datasets import fetch_california_housing

In [16]:
california = fetch_california_housing(as_frame=True)

In [17]:
X,y = california["data"], california["target"]
X,y = X.to_numpy(), y.to_numpy()

In [18]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [19]:
m = LinearRegression()
m.fit(X, y ,epochs= 25, alpha = 0.0001, error_threshold = 1/1000, validation_size=1/3 ,output_limit=10)

(0/10) > Epoch: 0 cost: 5.45977716 vCost: 6.08267042
(1/10) > Epoch: 2 cost: 2.81220186 vCost: 2.53382627
(2/10) > Epoch: 4 cost: 2.80991919 vCost: 2.47483540
(3/10) > Epoch: 6 cost: 2.80903275 vCost: 2.47112168
(4/10) > Epoch: 8 cost: 2.80755582 vCost: 2.47025420
(5/10) > Epoch: 10 cost: 2.80603965 vCost: 2.46956599
(6/10) > Epoch: 12 cost: 2.80452216 vCost: 2.46888966
(7/10) > Epoch: 14 cost: 2.80300577 vCost: 2.46821454
       * Epoch: 15 vCost: 2.46787718

Epoch 15 > Validation cost converged within threshold 0.001 or performance degraded.


(array([0.2187, 0.8749, 0.8419, 0.3172, 0.0179, 0.5083, 0.0411, 0.0218]),
 np.float64(2.0260348337620897))