# Improved Implementation for Stochastic Linear Regression
with regularization.\
with efficiency improvements.\
Recommended to check cost after convergence.

In [1]:
import numpy as np

* parameter assignment reduced performance

## Implementation

In [2]:
class LinearRegression:
    """Linear regression model with L2 regularization."""
    
    DEFAULT_EPOCHS = 1000
    DEFAULT_ALPHA = 0.01
    DEFAULT_LAMBDA = 0.0001
    DEFAULT_ERROR_THRESHOLD = 0.001
    DEFAULT_VALIDATION_SIZE = 0.2
    
    def __init__(self):
        pass


    def compute_cost(self, y, y_):
        """Compute cost function with L2 regularization."""
        return np.mean((y - y_) ** 2) + ((np.sum(self.W ** 2)) * self.Lambda / (2 * self.m))
    
    
    def validation_split(self, X, y):
        """Splits X and y into train and validatation set"""
        val = int(X.shape[0] * (1 - self.validation_size))
        return X[:val], y[:val], X[val:], y[val:]
    
    
    def log_current(self, k, alter=False):
        """Log current training information. Alter for exit print."""
        
        if alter: # For printing at arbitrary epoch, w vCost only
            print(f"       * Epoch: {k}",
                  f"vCost: {self.vcost:.8f}")
            return None
            
        print(f"({k//self.num_out}/{self.output_limit}) > Epoch: {k}",
              f"cost: {self.cost:.8f}",
              f"vCost: {self.vcost:.8f}")
        
    
    def convergence_test(self, k):
        # Simple convergence test
        if  (self.last_vcost - self.vcost <= self.error_threshold):
            self.c+=1
            if self.c >= 10:
                self.log_current(k=k, alter=True)
                print(f"\nEpoch {k} > vCost Converged with threshold {self.error_threshold}. OR Performance degraded.")
                self.EXIT = True # Also returns in case of validation perf degradation (overfit)
                
        else: # elif c!=0
            self.c=0 # For counting consecutive iterations of convergence

    def single_step(self, Xi, yi):
        """Perform a single step of gradient descent."""
        
        y_i = np.dot(Xi, self.W) + self.b 
        res = yi - y_i
        
        dJ_dW = np.dot(res, Xi)  - self.Lambda * self.W
        dJ_db = res.mean()

        self.W += dJ_dW * self.alpha / self.m
        self.b += dJ_db * self.alpha
        
        
    
    def fit(self, X, y,
            epochs = DEFAULT_EPOCHS,
            alpha = DEFAULT_ALPHA,
            Lambda=DEFAULT_LAMBDA,
            error_threshold = DEFAULT_ERROR_THRESHOLD,
            val_size = DEFAULT_VALIDATION_SIZE,
            output_limit=10):
        """Fit the linear regression model to the given data.
        
        Parameter
        ---------
        epochs: int, default=1000
            Number of complete iterations through X

        alpha : float, default=0.01
            Constant Learning Rate

        Lambda : float, default=0.0001
            Rate for l2 Regularization
        
        error_threshold: float, default=0.001
            Threshold for vCost convergence
        
        validation_size: float, default=0.2
            Percent of data for validation, 0 <= vs < 1

        output_limit : int, default=10
            Number of iterations to show

        Returns
        -------
        W : numpy.ndarray
            The optimized weights.
        b : numpy.longdouble
            The optimized itercept.
        """
        
        self.alpha = alpha
        self.Lambda = Lambda
        self.validation_size = val_size
        self.error_threshold = error_threshold
 
        
        self.epochs = epochs
        self.output_limit = output_limit
        if self.output_limit<=0:
            raise ValueError("Output limit should be greater than 0")
            
        self.num_out = self.epochs//self.output_limit
        
        np.set_printoptions(precision=4)
        
        X, y, X_val, y_val = self.validation_split(X, y)
        self.m, self.n = X.shape
        
        self.W = np.random.rand(self.n)
        self.b = np.random.rand()
        
        y_ = np.dot(X, self.W) + self.b
        y_val_ = np.dot(X_val, self.W) + self.b

        self.cost = self.compute_cost(y, y_)
        self.vcost = self.compute_cost(y_val, y_val_)
        self.last_vcost = self.vcost
        
        self.c=0 # to count convergence for consecutive iterations
        self.EXIT = False # Exit flag for convergence
        
        self.log_current(0) # Initial Out

        try:
            for k in range(1, self.epochs+1):
                # SGD
                for i in range(self.m):
                    self.single_step(X[i], y[i])
                # SGD
                
                
                # LOG OUTPUT
                if k % self.num_out == 0:
                    y_ = np.dot(X, self.W) + self.b
                    y_val_ = np.dot(X_val, self.W) + self.b
                    
                    self.cost = self.compute_cost(y, y_)
                    self.vcost = self.compute_cost(y_val, y_val_)
                    
                    self.log_current(k)
                # LOG OUTPUT
                
                
                # CONVERGENCE
                y_val_ = np.dot(X_val, self.W) + self.b
                self.vcost = self.compute_cost(y_val, y_val_) # vCost
                
                self.convergence_test(k)
                
                if self.EXIT:
                    return (self.W, self.b)
                
                self.last_vcost = self.vcost
                # CONVERGENCE

                    
        # CTRL C            
        except KeyboardInterrupt:
            self.log_current(k=k, alter=True)
            print(f"\nTerminated! Returned: Weights: {self.W}, Bias: {self.b}")
            return (self.W, self.b)
        # CTRL C
        
        
        return (self.W, self.b)

## Usage

In [3]:
m = LinearRegression()
X = np.random.rand(1000,2)
y = 5.55*X[:,0] + 11.22*X[:,1] + 50
%timeit m.fit(X, y ,epochs= 1000, alpha = 0.2, error_threshold = 0.001, output_limit=10)

(0/10) > Epoch: 0 cost: 3328.33567934 vCost: 3344.31967759
(1/10) > Epoch: 100 cost: 0.49500454 vCost: 0.44454954
       * Epoch: 191 vCost: 0.02196689

Epoch 191 > vCost Converged with threshold 0.001. OR Performance degraded.
(0/10) > Epoch: 0 cost: 3316.70215377 vCost: 3331.49244356
(1/10) > Epoch: 100 cost: 0.42706651 vCost: 0.38301200
       * Epoch: 186 vCost: 0.02237819

Epoch 186 > vCost Converged with threshold 0.001. OR Performance degraded.
(0/10) > Epoch: 0 cost: 3377.06099678 vCost: 3393.11899922
(1/10) > Epoch: 100 cost: 0.47524958 vCost: 0.42839274
       * Epoch: 190 vCost: 0.02179729

Epoch 190 > vCost Converged with threshold 0.001. OR Performance degraded.
(0/10) > Epoch: 0 cost: 3386.84113269 vCost: 3402.78754242
       * Epoch: 77 vCost: 0.96790565

Terminated! Returned: Weights: [3.9028 8.246 ], Bias: 52.415040479616344
(0/10) > Epoch: 0 cost: 3320.27924196 vCost: 3335.66159563
(1/10) > Epoch: 100 cost: 0.46465453 vCost: 0.41665345
       * Epoch: 189 vCost: 0.022

In [3]:
from sklearn.datasets import make_regression

In [6]:
X, y = make_regression(n_samples=10000,n_features=2, n_informative=2, random_state=0)
m = LinearRegression()
%timeit m.fit(X, y ,epochs= 100, alpha = 0.25, error_threshold = 0.01, output_limit=10)

(0/10) > Epoch: 0 cost: 11845.98233804 vCost: 11608.66898195
(1/10) > Epoch: 10 cost: 130.99993674 vCost: 131.44726339
(2/10) > Epoch: 20 cost: 1.00056007 vCost: 1.00491690
(3/10) > Epoch: 30 cost: 0.00985083 vCost: 0.00989963
       * Epoch: 38 vCost: 0.00073651

Epoch 38 > vCost Converged with threshold 0.01. OR Performance degraded.
(0/10) > Epoch: 0 cost: 11946.96998784 vCost: 11707.40429251
(1/10) > Epoch: 10 cost: 132.38022888 vCost: 132.85210995
(2/10) > Epoch: 20 cost: 1.01139236 vCost: 1.01594832
(3/10) > Epoch: 30 cost: 0.00994652 vCost: 0.00999712
       * Epoch: 38 vCost: 0.00074011

Epoch 38 > vCost Converged with threshold 0.01. OR Performance degraded.
(0/10) > Epoch: 0 cost: 11852.69574842 vCost: 11613.16591518
(1/10) > Epoch: 10 cost: 131.09052969 vCost: 131.53894900
(2/10) > Epoch: 20 cost: 1.00126004 vCost: 1.00562592
(3/10) > Epoch: 30 cost: 0.00985693 vCost: 0.00990582
       * Epoch: 38 vCost: 0.00073674

Epoch 38 > vCost Converged with threshold 0.01. OR Performa

In [6]:
from sklearn.datasets import fetch_california_housing

In [7]:
X,y = fetch_california_housing(return_X_y=True)
X = X[:1000]
y = y[:1000]

In [8]:
m = LinearRegression()
m.fit(X, y ,epochs= 1000, alpha = 0.0001, error_threshold = 1/1000, val_size=1/3 ,output_limit=10)

(0/10) > Epoch: 0 cost: 23277.33947452 vCost: 92554.16385094
(1/10) > Epoch: 100 cost: 11.24434409 vCost: 14.00525613
(2/10) > Epoch: 200 cost: 3.98412860 vCost: 5.28210467
(3/10) > Epoch: 300 cost: 2.03675686 vCost: 2.67244059
(4/10) > Epoch: 400 cost: 1.49958577 vCost: 1.81804002
(5/10) > Epoch: 500 cost: 1.34182463 vCost: 1.50244786
       * Epoch: 595 vCost: 1.37304108

Epoch 595 > vCost Converged with threshold 0.001. OR Performance degraded.


(array([ 3.0799e-01, -3.0867e-03,  2.3938e-01,  5.0303e-01,  2.9250e-04,
         9.7142e-01,  7.8337e-01,  2.9349e-01]),
 2.9273326806109066)