In [45]:
import numpy as np
import pandas as pd
from sklearn import preprocessing, model_selection
from sklearn.metrics import mean_squared_error

In [46]:
names = ["Expenses per car mile (pence)", "Car miles per year (1000s)", "Percent of Double Deckers in fleet", "Percent of fleet on fuel oil", "Receipts per car mile (pence)"]

In [47]:
# Load the dataset
df = pd.read_excel('Book1.xlsx', header=None, names=names)

In [48]:
df.head()

Unnamed: 0,Expenses per car mile (pence),Car miles per year (1000s),Percent of Double Deckers in fleet,Percent of fleet on fuel oil,Receipts per car mile (pence)
0,19.76,6235,100.0,100.0,25.1
1,17.85,46230,43.67,84.53,19.23
2,19.96,7360,65.51,81.57,21.42
3,16.8,28715,45.16,93.33,18.11
4,18.2,21934,49.2,83.07,19.24


In [49]:
data = df.values

In [50]:
(m, n) = data.shape

In [51]:
X = data[:, :-1]

In [52]:
Y = np.reshape(data[:, -1], (m,1))

In [53]:
# scale the  data
X = preprocessing.scale(X)

In [54]:
# split the data into train and test data 
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=.3)

In [55]:
def hypothesis(X, theta):
    return np.exp(np.dot(X, theta))

In [56]:
def cost_function(X, Y, theta):
    return (1/(2*X.shape[0]))* np.sum(np.square(hypothesis(X, theta) - Y), axis=0)[0]

In [57]:
def batch_gradient_descent(X, Y, alpha=0.001, error=1e-9):
    (m,n) = X.shape
    
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
    
    while True:
        cost_prev = cost_function(X, Y, theta)
        theta -= (alpha/m)*np.dot(X.T, hypothesis(X, theta)**2 - hypothesis(X, theta)*Y)
        cost_curr = cost_function(X, Y, theta)
                
        if abs(cost_prev - cost_curr) < error:
            break
            
        no_of_iter += 1
        
    return (theta, no_of_iter)

    

In [58]:
def stochastic_gradient_descent(X, Y, alpha=0.001, error=1e-9):
    (m,n) = X.shape
    
    X = np.hstack([np.ones((m,1)), X])
    
    theta = np.zeros((n+1,1))
    
    no_of_iter = 0
    
    # shuffle the data
    np.random.shuffle(X)
    
    while True:
        cost_prev = cost_function(X, Y, theta)
        theta -= (alpha)*np.reshape(X[0, :]*(hypothesis(X[0, :], theta)**2 - Y[0, :]*hypothesis(X[0, :], theta)), (n+1,1))
        cost_curr = cost_function(X, Y, theta)
                
        if abs(cost_prev - cost_curr) < error:
            break
            
        no_of_iter += 1
        
    return (theta, no_of_iter)



In [59]:
(theta_batch, no_of_iter_batch) = batch_gradient_descent(X_train, Y_train)

In [60]:
theta_batch, no_of_iter_batch

(array([[2.97785471],
        [0.08755536],
        [0.01307634],
        [0.01880968],
        [0.0414489 ]]), 224)

In [61]:
(theta_stochastic, no_of_iter_stochastic) = stochastic_gradient_descent(X_train, Y_train)

In [62]:
theta_stochastic, no_of_iter_stochastic

(array([[ 2.07853676],
        [ 0.08871727],
        [ 0.22416967],
        [-0.13685565],
        [ 1.28695788]]), 84)

In [63]:
X_test_unbias = np.hstack([np.ones((X_test.shape[0], 1)), X_test])

In [64]:
# metrics for quality of predictions, using mean_squared_error
score_batch = mean_squared_error(hypothesis(X_test_unbias, theta_batch), Y_test)

In [65]:
score_batch

5.5008214783938705

In [66]:
#using R-square
r2_batch = metrics.r2_score(hypothesis(X_test_unbias, theta_batch), Y_test)

In [67]:
r2_batch

0.05843014410726577

In [68]:
score_stochastic = mean_squared_error(hypothesis(X_test_unbias, theta_stochastic), Y_test)

In [69]:
score_stochastic

214.60899510169176

In [70]:
#using R-square
r2_stochastic = metrics.r2_score(hypothesis(X_test_unbias, theta_stochastic), Y_test)

In [71]:
r2_stochastic

-3.0351352376893246