In [1]:
import pandas as pd
import numpy as np
import operator
import json

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("admission_data.csv")
data.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,242,317,103,2,2.5,2.0,8.15,0,0.65
1,334,319,108,3,3.0,3.5,8.54,1,0.71
2,4,322,110,3,3.5,2.5,8.67,1,0.8
3,45,326,113,5,4.5,4.0,9.4,1,0.91
4,232,319,106,3,3.5,2.5,8.33,1,0.74


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    data[['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR', 'CGPA', 'Research']],
    data[['Chance of Admit']],
    test_size=0.2,
    random_state=0)

In [4]:
for col in X_train:
    mean = X_train[col].mean()
    std = X_train[col].std()
    X_train[col] = (X_train[col] - mean)/std
    X_test[col] = (X_test[col]-mean)/std


In [5]:
X_train['Ones'] = [1]*len(X_train)
X_test['Ones'] = [1]*len(X_test)

In [6]:
class LinearRegression:
    theta = None
    
    def predict(self, X):
        Y_pred = np.dot(X.values,self.theta.T)
        return Y_pred
    
    def compute_error(self, y_pred, y_actual, error_function):
        m = len(y_actual)
        if error_function == 'mean_squared_error':
            error = (1.0/float(m))*np.sum((y_pred-y_actual)*(y_pred-y_actual))
        elif error_function == 'mean_absolute_error':
            error = (1.0/float(m))*np.sum(np.absolute(y_pred-y_actual))
        elif error_function == 'mean_absolute_percentage_error':
            error = (1.0/float(m))*np.sum(np.absolute(np.divide((y_pred-y_actual),y_actual)))
        return error
    
    def compute_gradient(self, X, h, Y, error_function):
        m = len(Y)
        if error_function == 'mean_squared_error':
            grad = (2.0/float(m))*np.sum(X*(h-Y), axis=0)
        elif error_function == 'mean_absolute_error':
            grad = (1.0/float(m))*np.sum(X*np.divide(h-Y, np.absolute(Y-h)),axis=0)
        elif error_function == 'mean_absolute_percentage_error':
            grad = (1.0/float(m))*np.sum(X*np.divide((h-Y),(Y*Y*np.absolute(np.divide(h,Y)-1.0))),axis=0)
        return grad
    
    def train(self, X_train, y_train, alpha, max_epochs,error_function):
        self.theta = None
        self.theta = np.random.rand(1,X_train.shape[1])
        m = len(X_train)
        for i in range(0,max_epochs):
            X = X_train.values
            h = np.dot(X,self.theta.T)
            Y = y_train.values
            self.theta = self.theta - alpha*self.compute_gradient(X,h,Y,error_function)
            #print 'Current cost : '+str(self.compute_error(h,Y,error_function))

In [7]:
ln_mse = LinearRegression()
ln_mse.train(X_train,y_train,0.1, 1000,'mean_squared_error')
y_train_pred_mse = ln_mse.predict(X_train)
y_test_pred_mse = ln_mse.predict(X_test)
train_mse = ln_mse.compute_error(y_train_pred_mse, y_train.values,'mean_squared_error')
test_mse = ln_mse.compute_error(y_test_pred_mse, y_test.values,'mean_squared_error')
print '*****************MEAN SQUARED ERROR*****************'
print
print 'Train set error : '+str(train_mse)
print 'Test set error : '+str(test_mse)
print
print '****************************************************'
print

ln_mae = LinearRegression()
ln_mae.train(X_train,y_train,0.001, 3000,'mean_absolute_error')
y_train_pred_mae = ln_mae.predict(X_train)
y_test_pred_mae = ln_mae.predict(X_test)
train_mae = ln_mae.compute_error(y_train_pred_mae, y_train.values,'mean_absolute_error')
test_mae = ln_mae.compute_error(y_test_pred_mae, y_test.values,'mean_absolute_error')
print '*****************MEAN ABSOLUTE ERROR*****************'
print
print 'Train set error : '+str(train_mae)
print 'Test set error : '+str(test_mae)
print
print '****************************************************'
print

ln_mape = LinearRegression()
ln_mape.train(X_train,y_train,0.01, 1000,'mean_absolute_percentage_error')
y_train_pred_mape = ln_mape.predict(X_train)
y_test_pred_mape = ln_mape.predict(X_test)
train_mape = ln_mape.compute_error(y_train_pred_mape, y_train.values,'mean_absolute_percentage_error')
test_mape = ln_mape.compute_error(y_test_pred_mape, y_test.values,'mean_absolute_percentage_error')
print '*****************MEAN ABSOLUTE PERCENTAGE ERROR*****************'
print
print 'Train set error : '+str(train_mape)
print 'Test set error : '+str(test_mape)
print
print '****************************************************'
print


*****************MEAN SQUARED ERROR*****************

Train set error : 0.0037527355348514232
Test set error : 0.003245019579682091

****************************************************

*****************MEAN ABSOLUTE ERROR*****************

Train set error : 0.042241032609651154
Test set error : 0.04044349269203792

****************************************************

*****************MEAN ABSOLUTE PERCENTAGE ERROR*****************

Train set error : 0.07061836543195336
Test set error : 0.06240717928287569

****************************************************

