### Import dependencies

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import zscore
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from numpy.random import randn
from scipy.stats import pearsonr
from numpy import corrcoef

from sklearn.metrics import r2_score

from sklearn.datasets import make_regression
from matplotlib import pyplot

### Function to simulate dataset for testing algorithms

In [2]:
def generate_dataset(n_samples, n_features, effective_rank, noise, random_state=6, coef=True):
    X, y, coef = make_regression(n_samples=n_samples, 
                                 n_features=n_features,  
                                 effective_rank=effective_rank,
                                 noise=noise,
                                 random_state=random_state,
                                 coef=coef)   
    # add later:
    # bias, n_informative
    X = pd.DataFrame(X)
    y = pd.Series(y)
    X.columns = [f'col_{i+1}' for i in range(len(X.columns))]
    
    return X, y, coef


# noise --- The standard deviation of the gaussian noise applied to the output.
# effective_rank --- The approximate number of singular vectors required to explain most of the input data by linear combinations. 
# Using this kind of singular spectrum in the input allows the generator to reproduce the correlations often observed in practice.

### Dataset generation and Normalization

In [3]:
X, y, coef_data = generate_dataset(n_samples=5000, n_features=10, effective_rank=1, noise=0.3)

scaler = StandardScaler()
X_norm = scaler.fit_transform(X)
X_norm = pd.DataFrame(X_norm)
X_norm.columns = [f'col_{i+1}' for i in range(len(X.columns))]
X_norm.head()

Unnamed: 0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10
0,-1.350628,-0.769798,-1.174399,-0.292397,0.374329,-1.351269,1.183567,0.361889,1.528273,-0.964404
1,1.005382,1.95373,0.043104,1.297864,-0.334015,0.706164,-2.325961,0.042009,-0.523241,1.47828
2,-0.886513,-1.142556,-0.441451,0.449528,0.750047,-0.906594,-0.543024,0.752435,0.048777,-1.303672
3,0.182814,0.59604,0.922319,0.120746,-0.028927,0.345319,0.539579,-0.481327,-0.701598,0.542308
4,1.710477,-0.188596,0.542471,0.307411,-2.159518,0.903103,0.570584,-0.72315,-0.675225,-0.812805


### Train on Scikit-learn algorithms

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.5, random_state=6)

In [5]:
X_train.shape, X_test.shape

((2500, 10), (2500, 10))

In [6]:
ridge = Ridge(alpha=1)

print("----- Ridge Regression -----")
val_scores = cross_val_score(ridge, X_train, y_train, cv=5)
print(f'Cross validation scores : {val_scores}')

ridge.fit(X_train, y_train)
test_score = ridge.score(X_test, y_test)
print(f'Test score : {test_score}')

----- Ridge Regression -----
Cross validation scores : [0.95491717 0.95673209 0.95641968 0.95123234 0.95783682]
Test score : 0.9537393206228691


### Helper functions

In [7]:
def normalize(col):
    '''
    normalize each variable between 0 to 1
    with mean zero and std. deviation equal to 1 
    '''
    mean = col.mean()
    std_dev = col.std()
    col = [(x - mean)/std_dev for x in col]
    
    return col

def score(y_true, y_pred):
    score = r2_score(y_true, y_pred)
    return score

def predict(feat, coef):
    '''
    Make prediction for a row (instance)
    '''
    y_pred = coef[0]
    
    for i in range(feat.shape[0]):
        y_pred += coef[i+1] * feat[i]
    return y_pred

### SGD based algorithms

In [20]:
def sgd_ridge(train_X, label, lr, n_epoch, alpha):
  
    coef = [0.0 for i in range(train_X.shape[1] + 1)]
    
    for epoch in range(n_epoch):
        total_loss = 0
        
        for lbl, (idx, x) in zip(label, train_X.iterrows()):
            y_pred = predict(x, coef)
            loss = y_pred - lbl
            total_loss += loss**2
            coef[0] = coef[0] - lr * loss    # update bias coefficient
            
            for i in range(train_X.shape[1]):
                coef[i+1] = coef[i+1] - lr * ((loss * x[i]) + (alpha * coef[i+1]))    # update features' coefficients
                
#         if epoch%2==0:
#             print(epoch)
             
    return coef

### BGD based algorithms

In [21]:
def get_derivative_bias(label, train_X, coef):
    
    derivative = 0
    for lbl, (idx, x) in zip(label, train_X.iterrows()):
        y_pred = predict(x, coef)
        loss = y_pred - lbl
        der = loss
        derivative += der
        
    return derivative/train_X.shape[0]


def get_derivative_coef(label, train_X, coef, alpha, j):
    
    derivative = 0
    for lbl, (idx, x) in zip(label, train_X.iterrows()):
        y_pred = predict(x, coef)
        loss = y_pred - lbl
        der = loss * x[j]
        derivative += der
        
    derivative += alpha * coef[j+1]
    return derivative/train_X.shape[0]


def bgd_ridge(train_X, label, lr, n_epoch, alpha):
  
    coef = [0.0 for i in range(train_X.shape[1] + 1)]
    
    for epoch in range(n_epoch):
        
        # get derivative w.r.t bias, then update the bias
        derivative_bias = get_derivative_bias(label, train_X, coef)
        coef[0] = coef[0] - lr * derivative_bias
        
        for j in range(train_X.shape[1]):
            
            # get derivative w.r.t j-th coef, then update coef
            derivative_coef = get_derivative_coef(label, train_X, coef, alpha, j)
            coef[j+1] = coef[j+1] - lr * derivative_coef
            
#         if epoch%2==0:
#             print(epoch)
                        
    return coef

### Train and evaluate algorithms

#### Train SGD-based algorithms

In [22]:
# Train ridge reg - SGD
coef_sgd_ridge = sgd_ridge(X_train, y_train, 0.01, 40, 1)

In [23]:
# Evaluate ridge reg - SGD
y_preds_ridge = []
for _, feat in X_test.iterrows():
    y_pred = predict(feat, coef_sgd_ridge)
    y_preds_ridge.append(y_pred)
    
score(y_test, y_preds_ridge)

0.7089951966689771

#### Train BGD-based algorithms

In [24]:
coef_bgd_ridge = bgd_ridge(X_train, y_train, 0.02, 40, 1)

In [25]:
# Evaluate ridge reg - BGD
y_preds_ridge_bgd = []
for _, feat in X_test.iterrows():
    y_pred = predict(feat, coef_bgd_ridge)
    y_preds_ridge_bgd.append(y_pred)
    
score(y_test, y_preds_ridge_bgd)

0.7794602857605928