# Improved Implementation for Stochastic Linear Regression
with regularization.\
with efficiency improvements.\
Recommended to check cost after convergence.

In [1]:
import numpy as np

## Implementation

In [2]:
class LinearRegression:
    """Linear regression model with L2 regularization."""
    
    DEFAULT_EPOCHS = 1000
    DEFAULT_ALPHA = 0.01
    DEFAULT_LAMBDA = 0.0001

    def compute_cost(self, y, y_, Lambda, W, m):
        """Compute cost function with L2 regularization."""
        return np.mean((y-y_)**2) + ((np.sum(W**2)) * Lambda/(2*m))
    
    def validation_split(self, X, y, validation_size):
        val = int(X.shape[0] * (1 - validation_size))
        return X[:val], y[:val], X[val:], y[val:]
    
    def log_current(self, k, num_out, output_limit,cost, vcost):
        """Log current training information."""
        
        print(f"({k//num_out}/{output_limit}) > Epoch: {k}",
              f"Cost: {cost:.8f}",
                f"vCost: {vcost:.8f}")
    
    def convergence_test(self, current_cost, past_cost, error_threshold, k):
        # Simple convergence test
        if  (past_cost - current_cost < error_threshold):
            # Also returns in case of validation perf degradation (overfit)
            print(f"\nEpoch: {k} vCost Converged with threshold: {error_threshold}. Returning W and b")
            print(f"Past: {past_cost}, Curr: {current_cost}")
            return True

    def single_step(self, Xi, yi, m, W, b, alpha, Lambda):
        """Perform a single step of gradient descent."""
        
        y_i = np.dot(Xi, W) + b 
        res = yi - y_i
        
        dJ_dW = np.dot(res, Xi)  - Lambda * W
        dJ_db = res.mean()

        W += dJ_dW * alpha / m
        b += dJ_db * alpha

        return W,b
    
    def fit(self, X, y,
            epochs = DEFAULT_EPOCHS,
            alpha = DEFAULT_ALPHA,
            Lambda=DEFAULT_LAMBDA,
            error_threshold = 0.001,
            validation_size = 0.2,
            output_limit=10):
        """Fit the linear regression model to the given data.
        
        Parameter
        ---------
        epochs: int, default=1000
            Number of complete iterations through X

        alpha : float, default=0.01
            Constant Learning Rate

        Lambda : float, default=0.0001
            Rate for l2 Regularization

        output_limit : int, default=10
            Number of iterations to show

        Returns
        -------
        W : numpy.ndarray
            The optimized weights.
        b : numpy.longdouble
            The optimized itercept.
        """
 
        if output_limit<=0:
            raise ValueError("Output limit should be greater than 0")
        
        num_out = epochs//output_limit
        np.set_printoptions(precision=4)
        
        X, y, X_val, y_val = self.validation_split(X,y, validation_size)
        m,n = X.shape
        
        W = np.random.rand(n)
        b = np.random.rand()
        
        y_ = np.dot(X,W) + b
        y_val_ = np.dot(X_val,W) + b

        cost = self.compute_cost(y,y_,Lambda,W,m)
        past_cost = self.compute_cost(y_val,y_val_,Lambda,W,m)
        
        self.log_current(0, num_out, output_limit, cost, past_cost) # Initial Out

        try:
            for k in range(1, epochs+1):
                for i in range(m):
                    W,b = self.single_step(X[i], y[i], m, W, b, alpha, Lambda)
                


                if k % num_out == 0:
                    y_ = np.dot(X,W) + b
                    y_val_ = np.dot(X_val,W) + b
                    cost = self.compute_cost(y,y_,Lambda,W,m)
                    vcost = self.compute_cost(y_val,y_val_,Lambda,W,m)
                    self.log_current(k, num_out, output_limit, cost, vcost)
                
                
                # Inefficeint and slow to calulate these every epoch.
                y_val_ = np.dot(X_val,W) + b
                current_cost = self.compute_cost(y_val,y_val_,Lambda,W,m)
                #print(current_cost)
                if self.convergence_test(current_cost, past_cost, error_threshold, k):
                    return (W, b)
                past_cost = current_cost

                    
                    
        except KeyboardInterrupt:
            print(f"\nTerminated! Returned: Weights: {W}, Bias: {b}")
            return (W, b)
        return (W,b)

## Usage

In [3]:
m = LinearRegression()
X = np.random.rand(1000,2)
y = 5.55*X[:,0] + 11.22*X[:,1] + 50
m.fit(X, y ,epochs= 1000, alpha = 0.1, error_threshold = 0.0001, output_limit=10)

(0/10) > Epoch: 0 Cost: 3303.82611436 vCost: 3322.71967301
(1/10) > Epoch: 100 Cost: 2.19364647 vCost: 2.05647876
(2/10) > Epoch: 200 Cost: 0.39142207 vCost: 0.36909581
(3/10) > Epoch: 300 Cost: 0.07095134 vCost: 0.06732987
(4/10) > Epoch: 400 Cost: 0.01325814 vCost: 0.01266527

Epoch: 445 vCost Converged with threshold: 0.0001. Returning W and b
Past: 0.006175914768884979, Curr: 0.006076941587158946


(array([ 5.4037, 10.9918]), 50.18668912546416)

In [4]:
from sklearn.datasets import make_regression

In [5]:
X, y = make_regression(n_samples=1000,n_features=15, n_informative=5)
m = LinearRegression()
m.fit(X, y ,epochs= 500, alpha = 0.05, error_threshold = 0.0001, output_limit=10)

(0/10) > Epoch: 0 Cost: 14897.01193591 vCost: 17379.55506474
(1/10) > Epoch: 50 Cost: 127.79093386 vCost: 155.93963417
(2/10) > Epoch: 100 Cost: 1.30657459 vCost: 1.57948688
(3/10) > Epoch: 150 Cost: 0.01895122 vCost: 0.02231728

Epoch: 187 vCost Converged with threshold: 0.0001. Returning W and b
Past: 0.002608850163827872, Curr: 0.0025159759434306853


(array([ 1.6014e+01,  1.8524e-03,  9.5916e+01,  2.4372e-03, -7.3535e-03,
        -2.8131e-04,  4.3107e+01,  2.5044e-04, -1.4360e-03, -7.7187e-03,
         6.4066e+01, -8.2397e-03,  2.8600e-03,  6.6828e-04,  1.2954e+01]),
 0.009881927105736972)

In [6]:
from sklearn.datasets import fetch_california_housing

In [7]:
X,y = fetch_california_housing(return_X_y=True)
X = X[:100]
y = y[:100]

In [8]:
m = LinearRegression()
m.fit(X, y ,epochs= 10000, alpha = 0.00001, error_threshold = 0.0001, output_limit=10)

(0/10) > Epoch: 0 Cost: 487270.28261543 vCost: 892616.20691145
(1/10) > Epoch: 1000 Cost: 5.55564243 vCost: 14.85591705
(2/10) > Epoch: 2000 Cost: 1.50895727 vCost: 6.11432916
(3/10) > Epoch: 3000 Cost: 1.02802804 vCost: 4.10038092
(4/10) > Epoch: 4000 Cost: 1.05533599 vCost: 3.55100596
(5/10) > Epoch: 5000 Cost: 1.10649970 vCost: 3.36154732

Epoch: 5228 vCost Converged with threshold: 0.0001. Returning W and b
Past: 3.336832879809592, Curr: 3.3367329199867024


(array([ 4.0604e-01, -1.4808e-02,  1.6626e-01,  6.1842e-01, -6.0630e-05,
         9.0226e-01,  7.5197e-01,  2.6131e-01]),
 0.5599305684216496)