# Improved Implementation for Stochastic Linear Regression
with regularization.\
with efficiency improvements.\
Recommended to check cost after convergence.

In [1]:
# FIX docs

In [2]:
import numpy as np

## Implementation

In [3]:
class LinearRegression:
    """Linear regression model with L2 regularization."""
    
    DEFAULT_EPOCHS = 1000
    DEFAULT_ALPHA = 0.01
    DEFAULT_LAMBDA = 0.0001
    DEFAULT_ERROR_THRESHOLD = 0.001
    DEFAULT_VALIDATION_SIZE = 0.2


    def compute_cost(self, y, y_, Lambda, W, m):
        """Compute cost function with L2 regularization."""
        return np.mean((y-y_)**2) + ((np.sum(W**2)) * Lambda/(2*m))
    
    
    def validation_split(self, X, y, validation_size=DEFAULT_VALIDATION_SIZE):
        """Splits X and y into train and validatation set"""
        val = int(X.shape[0] * (1 - validation_size))
        return X[:val], y[:val], X[val:], y[val:]
    
    
    def log_current(self, k, num_out, output_limit,cost, vcost, alter=False):
        """Log current training information. Alter for exit print."""
        if alter: # For printing at arbitrary epoch, w vCost only
            print(f"       > Epoch: {k}",
                  f"vCost: {vcost:.8f}")
            return None
            
        print(f"({k//num_out}/{output_limit}) > Epoch: {k}",
              f"cost: {cost:.8f}",
              f"vCost: {vcost:.8f}")
        
    
    def convergence_test(self, current_cost, past_cost, error_threshold, k):
        # Simple convergence test
        if  (past_cost - current_cost <= error_threshold):
            self.c+=1
            if self.c >= 10:
                self.log_current(k=k, num_out=0, output_limit=0, cost=0, vcost=current_cost, alter=True)
                print(f"\nEpoch {k} > vCost Converged with threshold {error_threshold}. OR Performance degraded.")
                self.EXIT = True # Also returns in case of validation perf degradation (overfit)
                
        else: 
            self.c=0 # For counting consecutive iterations of convergence

    def single_step(self, Xi, yi, m, W, b, alpha, Lambda):
        """Perform a single step of gradient descent."""
        
        y_i = np.dot(Xi, W) + b 
        res = yi - y_i
        
        dJ_dW = np.dot(res, Xi)  - Lambda * W
        dJ_db = res.mean()

        W += dJ_dW * alpha / m
        b += dJ_db * alpha

        return W,b
    
    def fit(self, X, y,
            epochs = DEFAULT_EPOCHS,
            alpha = DEFAULT_ALPHA,
            Lambda=DEFAULT_LAMBDA,
            error_threshold = DEFAULT_ERROR_THRESHOLD,
            validation_size = DEFAULT_VALIDATION_SIZE,
            output_limit=10):
        """Fit the linear regression model to the given data.
        
        Parameter
        ---------
        epochs: int, default=1000
            Number of complete iterations through X

        alpha : float, default=0.01
            Constant Learning Rate

        Lambda : float, default=0.0001
            Rate for l2 Regularization
        
        error_threshold: float, default=0.001
            Threshold for vCost convergence
        
        validation_size: float, default=0.2
            Percent of data for validation, 0 <= vs < 1

        output_limit : int, default=10
            Number of iterations to show

        Returns
        -------
        W : numpy.ndarray
            The optimized weights.
        b : numpy.longdouble
            The optimized itercept.
        """
 
        if output_limit<=0:
            raise ValueError("Output limit should be greater than 0")
        
        num_out = epochs//output_limit
        np.set_printoptions(precision=4)
        
        X, y, X_val, y_val = self.validation_split(X,y, validation_size)
        m,n = X.shape
        
        W = np.random.rand(n)
        b = np.random.rand()
        
        y_ = np.dot(X,W) + b
        y_val_ = np.dot(X_val,W) + b

        cost = self.compute_cost(y,y_,Lambda,W,m)
        past_cost = self.compute_cost(y_val,y_val_,Lambda,W,m)
        
        self.c=0 # to count convergence for consecutive iterations
        self.EXIT = False # Exit flag for convergence
        
        self.log_current(0, num_out, output_limit, cost, past_cost) # Initial Out

        try:
            for k in range(1, epochs+1):
                # SGD
                for i in range(m):
                    W,b = self.single_step(X[i], y[i], m, W, b, alpha, Lambda)
                # SGD
                
                
                # LOG OUTPUT
                if k % num_out == 0:
                    y_ = np.dot(X,W) + b
                    y_val_ = np.dot(X_val,W) + b
                    
                    cost = self.compute_cost(y,y_,Lambda,W,m)
                    vcost = self.compute_cost(y_val,y_val_,Lambda,W,m)
                    
                    self.log_current(k, num_out, output_limit, cost, vcost)
                # LOG OUTPUT
                
                
                # CONVERGENCE
                y_val_ = np.dot(X_val,W) + b
                current_cost = self.compute_cost(y_val,y_val_,Lambda,W,m) # vCost
                
                self.convergence_test(current_cost, past_cost, error_threshold, k)
                
                if self.EXIT:
                    return (W, b)
                
                past_cost = current_cost
                # CONVERGENCE

                    
        # CTRL C            
        except KeyboardInterrupt:
            self.log_current(k=k, num_out=0, output_limit=0, cost=0, vcost=current_cost, alter=True)
            print(f"\nTerminated! Returned: Weights: {W}, Bias: {b}")
            return (W, b)
        # CTRL C
        
        
        return (W, b)

## Usage

In [4]:
m = LinearRegression()
X = np.random.rand(1000,2)
y = 5.55*X[:,0] + 11.22*X[:,1] + 50
m.fit(X, y ,epochs= 1000, alpha = 0.2, error_threshold = 0.00001, output_limit=10)

(0/10) > Epoch: 0 cost: 3321.12922313 vCost: 3313.17398696
(1/10) > Epoch: 100 cost: 0.42088014 vCost: 0.45458323
(2/10) > Epoch: 200 cost: 0.01643032 vCost: 0.01774087
(3/10) > Epoch: 300 cost: 0.00080804 vCost: 0.00087143
       > Epoch: 339 vCost: 0.00031887

Epoch 339 > vCost Converged with threshold 1e-05. OR Performance degraded.


(array([ 5.5352, 11.1625]), 50.036011338322)

In [5]:
from sklearn.datasets import make_regression

In [6]:
X, y = make_regression(n_samples=1000,n_features=20, n_informative=20)
m = LinearRegression()
m.fit(X, y ,epochs= 100, alpha = 0.5, error_threshold = 0.01, output_limit=10)

(0/10) > Epoch: 0 cost: 69740.02828327 vCost: 73192.81481812
(1/10) > Epoch: 10 cost: 5.38961859 vCost: 6.36190649
(2/10) > Epoch: 20 cost: 0.00800590 vCost: 0.00874449
       > Epoch: 28 vCost: 0.00509619

Epoch 28 > vCost Converged with threshold 0.01. OR Performance degraded.


(array([74.2808,  3.8328, 59.6396,  9.0241, 79.6588, 73.3384, 99.199 ,
        21.1876, 29.7783, 39.7521, 85.9187, 16.5606, 93.8983, 90.0927,
        35.1949, 69.8429, 30.6432,  7.2502,  7.8456, 63.0211]),
 0.002001552437994436)

In [7]:
from sklearn.datasets import fetch_california_housing

In [8]:
X,y = fetch_california_housing(return_X_y=True)
X = X[:1000]
y = y[:1000]

In [9]:
m = LinearRegression()
m.fit(X, y ,epochs= 1000, alpha = 0.0001, error_threshold = 1/1000, validation_size=1/3 ,output_limit=10)

(0/10) > Epoch: 0 cost: 714311.95952983 vCost: 2277008.25312139
(1/10) > Epoch: 100 cost: 6.95963769 vCost: 9.48829066
(2/10) > Epoch: 200 cost: 3.52168257 vCost: 4.92413118
(3/10) > Epoch: 300 cost: 2.55081789 vCost: 3.42121042
(4/10) > Epoch: 400 cost: 2.22878181 vCost: 2.83760587
(5/10) > Epoch: 500 cost: 2.07992179 vCost: 2.55591369
(6/10) > Epoch: 600 cost: 1.97967397 vCost: 2.38571419
(7/10) > Epoch: 700 cost: 1.89528935 vCost: 2.26252746
       > Epoch: 761 vCost: 2.19934425

Epoch 761 > vCost Converged with threshold 0.001. OR Performance degraded.


(array([8.7617e-01, 3.7969e-03, 2.7513e-01, 4.0998e-01, 2.1620e-04,
        2.7794e-01, 8.2371e-01, 3.1061e-01]),
 2.92644336959984)