In [31]:
import numpy as np
import pandas as pd
from sklearn import model_selection, preprocessing, linear_model

In [6]:
df = pd.read_csv('/Users/spodeti/Downloads/airfoil_self_noise.dat', sep='\t', header=None)

In [9]:
data = df.values

In [14]:
X = data[:, :-1]

In [16]:
(m, n) = X.shape

In [19]:
Y = np.reshape(data[:, -1], (m,1))

In [23]:
# scaling
X_scaled = preprocessing.scale(X)

In [34]:
# split data into train, test
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X_scaled, Y, test_size=.3)

In [26]:
def cost_function(X, Y, theta):
    return (1/(2*X.shape[0]))* np.sum(np.square(np.dot(X, theta) - Y), axis=0)[0]

In [43]:
def batch_gradient_descent(X, Y, alpha=0.01, error=1e-9):
    (m,n) = X.shape
    
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
    
    while True:
        cost_prev = cost_function(X, Y, theta)
        theta -= (alpha/m)*np.dot(X.T, np.dot(X,theta)-Y)
        cost_curr = cost_function(X, Y, theta)
        
        if abs(cost_prev-cost_curr)<error:
            break
            
        no_of_iter += 1
        
    return (theta, no_of_iter)

In [80]:
def stochastic_gradient_descent(X, Y, alpha=0.01, error=1e-9):
    (m,n) = X.shape
    
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
    
    np.random.shuffle(X)
    
    while True:
        cost_prev = cost_function(X, Y, theta)
        theta -= (alpha)*np.reshape(X[0, :], (1, n+1)).T *(np.dot(X[0, :], theta)-Y)[0]
        cost_curr = cost_function(X, Y, theta)
        
        if abs(cost_prev-cost_curr)<error:
            break
            
        no_of_iter += 1
        
    return (theta, no_of_iter)

In [33]:
lgr = linear_model.LinearRegression()

In [35]:
lgr.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [37]:
lgr.intercept_

array([124.83690631])

In [38]:
lgr.coef_

array([[-4.09942068, -2.32463967, -3.46476775,  1.57074524, -2.09306523]])

In [78]:
(theta_batch, no_of_iter_batch) = batch_gradient_descent(X_train, Y_train)

In [81]:
(theta_stochastic, no_of_iter_stochastic) = stochastic_gradient_descent(X_train, Y_train)

In [82]:
no_of_iter_batch

3715

In [83]:
no_of_iter_stochastic

375

In [84]:
theta_batch

array([[124.83692101],
       [ -4.09923522],
       [ -2.32327066],
       [ -3.46420773],
       [  1.57061418],
       [ -2.09409888]])

In [85]:
theta_stochastic

array([[ 19.42443606],
       [-15.84871774],
       [ 18.11617476],
       [ -7.25967981],
       [  5.78861813],
       [ 37.95004547]])