# Improved Implementation for Stochastic Linear Regression
with regularization.\
with efficiency improvements.\
Recommended to check cost after convergence.

In [1]:
import numpy as np

## Implementation

In [2]:
class LinearRegression:
    """Linear regression model with L2 regularization."""
    
    DEFAULT_EPOCHS = 1000
    DEFAULT_ALPHA = 0.01
    DEFAULT_LAMBDA = 0.0001
    DEFAULT_ERROR_THRESHOLD = 0.001
    DEFAULT_VALIDATION_SIZE = 0.2


    def compute_cost(self, y, y_, Lambda, W, m):
        """Compute cost function with L2 regularization."""
        return np.mean((y-y_)**2) + ((np.sum(W**2)) * Lambda/(2*m))
    
    
    def validation_split(self, X, y, validation_size=DEFAULT_VALIDATION_SIZE):
        """Splits X and y into train and validatation set"""
        val = int(X.shape[0] * (1 - validation_size))
        return X[:val], y[:val], X[val:], y[val:]
    
    
    def log_current(self, k, num_out, output_limit,cost, vcost, alter=False):
        """Log current training information. Alter for exit print."""
        if alter: # For printing at arbitrary epoch, w vCost only
            print(f"       * Epoch: {k}",
                  f"vCost: {vcost:.8f}")
            return None
            
        print(f"({k//num_out}/{output_limit}) > Epoch: {k}",
              f"cost: {cost:.8f}",
              f"vCost: {vcost:.8f}")
        
    
    def convergence_test(self, current_cost, past_cost, error_threshold, k):
        # Simple convergence test
        if  (past_cost - current_cost <= error_threshold):
            self.c+=1
            if self.c >= 10:
                self.log_current(k=k, num_out=0, output_limit=0, cost=0, vcost=current_cost, alter=True)
                print(f"\nEpoch {k} > vCost Converged with threshold {error_threshold}. Or performance degraded.")
                self.EXIT = True # Also returns in case of validation perf degradation (overfit)
                
        else: 
            self.c=0 # For counting consecutive iterations of convergence

    def single_step(self, Xi, yi, m, W, b, alpha, Lambda):
        """Perform a single step of gradient descent."""
        
        y_i = np.dot(Xi, W) + b 
        res = yi - y_i
        
        dJ_dW = np.dot(res, Xi)  - Lambda * W
        dJ_db = res.mean()

        W += dJ_dW * alpha / m
        b += dJ_db * alpha

        return W,b
    
    def fit(self, X, y,
            epochs = DEFAULT_EPOCHS,
            alpha = DEFAULT_ALPHA,
            Lambda=DEFAULT_LAMBDA,
            error_threshold = DEFAULT_ERROR_THRESHOLD,
            validation_size = DEFAULT_VALIDATION_SIZE,
            output_limit=10):
        """Fit the linear regression model to the given data.
        
        Parameter
        ---------
        epochs: int, default=1000
            Number of complete iterations through X

        alpha : float, default=0.01
            Constant Learning Rate

        Lambda : float, default=0.0001
            Rate for l2 Regularization
        
        error_threshold: float, default=0.001
            Threshold for vCost convergence
        
        validation_size: float, default=0.2
            Percent of data for validation, 0 <= vs < 1

        output_limit : int, default=10
            Number of iterations to show

        Returns
        -------
        W : numpy.ndarray
            The optimized weights.
        b : numpy.longdouble
            The optimized itercept.
        """
 
        if output_limit <= 0:
            raise ValueError("Output limit should be greater than 0")
        
        num_out = epochs//output_limit
        np.set_printoptions(precision = 4)
        
        X, y, X_val, y_val = self.validation_split(X, y, validation_size)
        m, n = X.shape
        
        W = np.random.rand(n)
        b = np.random.rand()
        
        y_ = np.dot(X, W) + b
        y_val_ = np.dot(X_val, W) + b

        cost = self.compute_cost(y, y_, Lambda, W, m)
        past_cost = self.compute_cost(y_val, y_val_, Lambda, W, m)
        
        self.c = 0 # to count convergence for consecutive iterations
        self.EXIT = False # Exit flag for convergence
        
        self.log_current(0, num_out, output_limit, cost, past_cost) # Initial Out

        try:
            for k in range(1, epochs+1):
                # SGD
                for i in range(m):
                    W,b = self.single_step(X[i], y[i], m, W, b, alpha, Lambda)
                # SGD
                
                
                # LOG OUTPUT
                if k % num_out == 0:
                    y_ = np.dot(X, W) + b
                    y_val_ = np.dot(X_val, W) + b
                    
                    cost = self.compute_cost(y, y_, Lambda, W, m)
                    vcost = self.compute_cost(y_val, y_val_, Lambda, W, m)
                    
                    self.log_current(k, num_out, output_limit, cost, vcost)
                # LOG OUTPUT
                
                
                # CONVERGENCE
                y_val_ = np.dot(X_val, W) + b
                current_cost = self.compute_cost(y_val, y_val_, Lambda, W, m) # vCost
                
                self.convergence_test(current_cost, past_cost, error_threshold, k)
                
                if self.EXIT:
                    return (W, b)
                
                past_cost = current_cost
                # CONVERGENCE

                    
        # CTRL C            
        except KeyboardInterrupt:
            self.log_current(k=k, num_out=0, output_limit=0, cost=0, vcost=current_cost, alter=True)
            print(f"\nTerminated! Returned: Weights: {W}, Bias: {b}")
            return (W, b)
        # CTRL C
        
        
        return (W, b)

## Usage

In [3]:
m = LinearRegression()
X = np.random.rand(1000,2)
y = 5.55*X[:,0] + 11.22*X[:,1] + 50
m.fit(X, y ,epochs= 1000, alpha = 0.2, error_threshold = 0.00001, output_limit=10)

(0/10) > Epoch: 0 cost: 3292.26205816 vCost: 3300.62779780
(1/10) > Epoch: 100 cost: 0.57985605 vCost: 0.56633012
(2/10) > Epoch: 200 cost: 0.02371749 vCost: 0.02311546
(3/10) > Epoch: 300 cost: 0.00118735 vCost: 0.00115551
       * Epoch: 349 vCost: 0.00033135

Epoch 349 > vCost Converged with threshold 1e-05. Or performance degraded.


(array([ 5.5224, 11.1695]), 50.030962700252616)

In [4]:
from sklearn.datasets import make_regression

In [5]:
X, y = make_regression(n_samples=1000,n_features=20, n_informative=19)
m = LinearRegression()
m.fit(X, y ,epochs= 100, alpha = 0.5, error_threshold = 0.01, output_limit=10)

(0/10) > Epoch: 0 cost: 57864.10626818 vCost: 54437.60538561
(1/10) > Epoch: 10 cost: 17.26988229 vCost: 17.59787839
(2/10) > Epoch: 20 cost: 0.01519120 vCost: 0.01551065
       * Epoch: 29 vCost: 0.00637328

Epoch 29 > vCost Converged with threshold 0.01. Or performance degraded.


(array([4.7264e+01, 3.8149e+01, 6.6247e+01, 1.0610e+01, 1.4063e+00,
        9.7339e+00, 9.4993e+01, 5.5637e+01, 7.6699e+01, 7.3682e+00,
        7.1376e+00, 8.0231e+01, 9.3040e+01, 1.0579e+01, 6.2135e+01,
        2.0753e-03, 7.3065e+01, 4.7029e+01, 7.9275e+01, 4.8127e+01]),
 -0.04081532634282112)

In [6]:
from sklearn.datasets import fetch_california_housing

In [7]:
X,y = fetch_california_housing(return_X_y=True)
X = X[:1000]
y = y[:1000]

In [8]:
m = LinearRegression()
m.fit(X, y ,epochs= 1000, alpha = 0.0001, error_threshold = 1/1000, validation_size=1/3 ,output_limit=10)

(0/10) > Epoch: 0 cost: 6243.75563907 vCost: 27799.18435143
(1/10) > Epoch: 100 cost: 21.91755759 vCost: 28.41120184
(2/10) > Epoch: 200 cost: 7.10794364 vCost: 10.60002698
(3/10) > Epoch: 300 cost: 3.19015635 vCost: 5.32289154
(4/10) > Epoch: 400 cost: 2.12917374 vCost: 3.60590534
(5/10) > Epoch: 500 cost: 1.82019433 vCost: 2.96388810
(6/10) > Epoch: 600 cost: 1.71095421 vCost: 2.67676470
(7/10) > Epoch: 700 cost: 1.65606817 vCost: 2.52062848
       * Epoch: 762 vCost: 2.45352610

Epoch 762 > vCost Converged with threshold 0.001. Or performance degraded.


(array([ 4.2867e-01, -9.2242e-03,  5.9767e-01,  1.2986e-01,  2.7606e-04,
         8.3591e-01,  5.0878e-01,  2.2545e-01]),
 3.7203317223511996)