# Setup

In [1]:
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize

import numpy as np
import sys

sys.path.append('../')

In [2]:
# TODO: import
from splearn.linear_model import SimpleLinearRegression

# Dataset

In [3]:
diabetes_dataset = datasets.load_diabetes()
X = normalize(diabetes_dataset['data'])
y = diabetes_dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
X

array([[ 0.32100597,  0.42726811,  0.52014127, ..., -0.02185454,
         0.16784162, -0.14876892],
       [-0.01166166, -0.27661579, -0.31895198, ..., -0.24471534,
        -0.4233959 , -0.57132977],
       [ 0.6574065 ,  0.39059633,  0.34258958, ..., -0.0199788 ,
         0.02207134, -0.1998475 ],
       ...,
       [ 0.42500219,  0.51642207, -0.1620822 , ..., -0.11289848,
        -0.47769422,  0.15784799],
       [-0.4486945 , -0.44049627,  0.38544135, ...,  0.26207406,
         0.43937866, -0.25586467],
       [-0.19283439, -0.18931106, -0.30969841, ..., -0.16747894,
        -0.01789509,  0.01299519]])

# scikit-learn implementation

In [5]:
sklearn_clf = LinearRegression(fit_intercept=True,
                               normalize=False,
                               copy_X=True)
sklearn_clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [6]:
print("Train R^2:", sklearn_clf.score(X_train, y_train))
print("Test R^2:", sklearn_clf.score(X_test, y_test))
print("First 3 Coefficients:", sklearn_clf.coef_[:3])
print("Last 3 Coefficients:", sklearn_clf.coef_[-3:])
print("Intercept:", sklearn_clf.intercept_)

Train R^2: 0.5475828671286389
Test R^2: 0.29393257447748
First 3 Coefficients: [ -2.66124073 -30.93607012  84.31072006]
Last 3 Coefficients: [39.62071238 82.56911657  9.7183491 ]
Intercept: 154.6599304907271


# Simple Python implementation

In [7]:
X_train.shape

(353, 10)

In [8]:
class SimpleLinearRegression():
    def fit(self, X, y, method='closed'):
        n, p = X.shape
        X = np.concatenate([X, np.ones((n, 1))], axis=1)
        
        if method == 'closed':
            beta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
            
            
        elif method == 'gd': 
            beta = np.random.uniform(low=-1.0, high=1.0, size=(p + 1, ))
            learning_rate = 2e-3
            for i in range(100000):
                gradient = -2 * X.T.dot(y) + 2 * X.T.dot(X).dot(beta)
                if i % 10000 == 0:
                    print("Iter %i: Obj: %.5f; Norm of gradient: %.5f" % (i, (y - X.dot(beta)).T.dot(y - X.dot(beta)), np.linalg.norm(gradient)))
                beta = beta - learning_rate * gradient
            
        # TODO: sgd / mini-batch
        
        self.coef_ = beta[:-1]
        self.intercept_ = beta[-1]
            
    def score(self, X, y):
        y_bar_vec = np.mean(y) * np.ones((y.shape))
        y_hat_vec = X.dot(self.coef_) + self.intercept_ * np.ones((y.shape))
        
        numerator = (y - y_hat_vec).T.dot(y - y_hat_vec)
        denominator = (y - y_bar_vec).T.dot(y - y_bar_vec)
        
        return 1 - numerator / denominator
        

In [9]:
print('=====Closed Form=====')
my_clf = SimpleLinearRegression()
my_clf.fit(X_train, y_train, method='closed')
print("Train R^2:", my_clf.score(X_train, y_train))
print("Test R^2:", my_clf.score(X_test, y_test))
print("First 3 Coefficients:", my_clf.coef_[:3])
print("Last 3 Coefficients:", my_clf.coef_[-3:])
print("Intercept:", my_clf.intercept_)

=====Closed Form=====
Train R^2: 0.5475828671286389
Test R^2: 0.29393257447748
First 3 Coefficients: [ -2.66124073 -30.93607012  84.31072006]
Last 3 Coefficients: [39.62071238 82.56911657  9.7183491 ]
Intercept: 154.65993049072708


In [10]:
print('=====Gradient Descent=====')
my_clf = SimpleLinearRegression()
my_clf.fit(X_train, y_train, method='gd')
print("Train R^2:", my_clf.score(X_train, y_train))
print("Test R^2:", my_clf.score(X_test, y_test))
print("First 3 Coefficients:", my_clf.coef_[:3])
print("Last 3 Coefficients:", my_clf.coef_[-3:])
print("Intercept:", my_clf.intercept_)

=====Gradient Descent=====
Iter 0: Obj: 10313302.73339; Norm of gradient: 108773.22398
Iter 10000: Obj: 979092.32474; Norm of gradient: 0.00027
Iter 20000: Obj: 979092.32474; Norm of gradient: 0.00000
Iter 30000: Obj: 979092.32474; Norm of gradient: 0.00000
Iter 40000: Obj: 979092.32474; Norm of gradient: 0.00000
Iter 50000: Obj: 979092.32474; Norm of gradient: 0.00000
Iter 60000: Obj: 979092.32474; Norm of gradient: 0.00000
Iter 70000: Obj: 979092.32474; Norm of gradient: 0.00000
Iter 80000: Obj: 979092.32474; Norm of gradient: 0.00000
Iter 90000: Obj: 979092.32474; Norm of gradient: 0.00000
Train R^2: 0.5475828671286389
Test R^2: 0.2939325744774798
First 3 Coefficients: [ -2.66124073 -30.93607012  84.31072006]
Last 3 Coefficients: [39.62071238 82.56911657  9.7183491 ]
Intercept: 154.65993049072713
