# Importing Libraries

In [246]:
import pandas as pd
import numpy as np
import math
from sklearn.cross_validation import cross_val_score, train_test_split

# Reading Data

In [247]:
data = pd.read_excel('Folds5x2_pp.xlsx')

In [248]:
data.columns

Index(['AT', 'V', 'AP', 'RH', 'PE'], dtype='object')

In [249]:
data.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [250]:
data.columns = ['AT', 'V', 'AP', 'RH', 'Target']

# Normalization Test and Train Data

In [267]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,0:4], data.iloc[:,4], test_size=0.2)
X_train_min = X_train.min()
X_train_range = X_train.max() - X_train.min() 
X_train_std = (X_train - X_train_min) / X_train_range
X_test_std = (X_test - X_train_min) / X_train_range
y_train_min = y_train.min()
y_train_range = y_train.max() - y_train.min()
y_train_std = (y_train - y_train_min) / y_train_range
y_test_std = (y_test - y_train_min) / y_train_range

In [268]:
data_scaled = data.iloc[:,0:4]/data.iloc[:,0:4].std() - data.iloc[:,0:4].mean()/data.iloc[:,0:4].std()

In [269]:
data_scaled['Target'] = data.iloc[:,4]

In [270]:
data_scaled.head()

Unnamed: 0,AT,V,AP,RH,Target
0,-0.629486,-0.987245,1.820393,-0.009519,463.26
1,0.74187,0.68101,1.141803,-0.97457,444.37
2,-1.951195,-1.172956,-0.185068,1.289772,488.56
3,0.162197,0.237191,-0.508366,0.228148,446.48
4,-1.185007,-1.32247,-0.678435,1.596616,473.9


# RMSE Function- For calculating Error betwen predicted and actual values later

In [271]:
def root_mse(y_pred, y_test):
    sse = (y_test - y_pred) ** 2
    mse = sse.mean()
    return np.sqrt(mse)

# Cost Function

In [272]:
#Activation Function MSE
def activation_function(Target, features, coefficients):
    return ((Target - np.dot(features, coefficients))**2).mean()

# Predictor Function

In [273]:
# Predictor function
def predictor(features, coefficients):
    return np.dot(features, coefficients)

# Defining Linear Regression Function

In [274]:
def linear_regressor(features, dep_variable, alpha = 0.05, iterations = 100):
    intercept = np.ones((features.shape[0],1))
    features = np.column_stack((intercept, features))
    coeff = np.zeros(features.shape[1])
    for i in range(iterations):
        coeff[0] = coeff[0] + 2*(dep_variable-np.dot(features, coeff)).mean()*alpha
        coeff[1] = coeff[1] + 2*((dep_variable-np.dot(features, coeff))*features[:,1]).mean()*alpha
        coeff[2] = coeff[2] + 2*((dep_variable-np.dot(features, coeff))*features[:,2]).mean()*alpha
        coeff[3] = coeff[3] + 2*((dep_variable-np.dot(features, coeff))*features[:,3]).mean()*alpha
        coeff[4] = coeff[4] + 2*((dep_variable-np.dot(features, coeff))*features[:,4]).mean()*alpha
    return coeff
                           
                           
    

In [275]:
result = linear_regressor(X_train_std, y_train_std, alpha = 0.01, iterations = 100)

# Coefficients of linear function

In [276]:
result

array([0.22920439, 0.02040835, 0.02528122, 0.14466287, 0.17029544])

# Adding intercept to the features and predicting values for train data

In [277]:
np.dot(np.column_stack((np.ones(len(X_train)),X_train)),result)

array([162.78361508, 158.06405817, 163.91882191, ..., 162.47869894,
       163.87441259, 162.87442772])

In [278]:
y_train.shape

(7654,)

In [279]:
data.iloc[:,0:4].shape

(9568, 4)

# Train set RMSE

In [280]:
intercept = np.ones((X_train_std.shape[0],1))
features = np.column_stack((intercept, X_train_std))

y_pred = np.dot(features, result)
print('Train Set RMSE: {0}'.format(root_mse(y_pred, y_train_std)))

Train Set RMSE: 0.21467737561086755


# Test set RMSE

In [281]:
intercept = np.ones((X_test_std.shape[0],1))
features = np.column_stack((intercept, X_test_std))

y_pred_test = np.dot(features, result)
print('Train Set RMSE: {0}'.format(root_mse(y_pred_test, y_test_std)))

Train Set RMSE: 0.21372886387846096
