In [62]:
import numpy as np
import pyexcel as p
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model as lm
from sklearn.metrics import mean_squared_error

In [22]:
#reading excel data in pandas dataframe
fname = 'data.xlsx'
dff = pd.read_excel(fname)
dff.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [21]:
#dividing data into input and output dataframes. Input has 4 features
#(Temperature (T), Ambient Pressure (AP), Relative Humidity (RH) and Exhaust Vacuum (V))
#Outout is  net hourly electrical energy output (PE) 
y = dff[dff.columns[4:]]
x = dff[dff.columns[0:4]]
x.head()

Unnamed: 0,AT,V,AP,RH
0,14.96,41.76,1024.07,73.17
1,25.18,62.96,1020.04,59.08
2,5.11,39.4,1012.16,92.14
3,20.86,57.32,1010.24,76.64
4,10.82,37.5,1009.23,96.62


In [41]:
#in this section we will perform feature scanling. 
#range of 4 features are as follows
#Temperature (T) in the range 1.81°C and 37.11°C,
#Ambient Pressure (AP) in the range 992.89-1033.30 milibar,
#Relative Humidity (RH) in the range 25.56% to 100.16%
#Exhaust Vacuum (V) in teh range 25.36-81.56 cm Hg
for col in x:
    min_ = x[col].min()
    max_ = x[col].max()
    exp = lambda x : (x-min_)/(max_-min_)
    x[col] = x[col].apply(exp)

x.head(n=5)

Unnamed: 0,AT,V,AP,RH
0,0.372521,0.291815,0.771591,0.638204
1,0.66204,0.669039,0.671863,0.44933
2,0.093484,0.249822,0.476862,0.892493
3,0.53966,0.568683,0.429349,0.684718
4,0.255241,0.216014,0.404355,0.952547


In [43]:
#splitting data into 2 set using sklearn train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.3,random_state=42)
print(xtrain.shape)
print(xtest.shape)

(6697, 4)
(2871, 4)


In [76]:
#We are going to use different model to predict PE and evaluate score of different models
#LinearRegression from sklearn.linear_models

reg1 = lm.LinearRegression()
reg1.fit(xtrain.values,ytrain.values)

predictions_test = reg1.predict(xtest.values)
predictions_train = reg1.predict(xtrain.values)

msetest = mean_squared_error(ytest.values,predictions_test)
msetrain = mean_squared_error(ytrain.values,predictions_train)

print("Mean squared Error of test data set : " + str(msetest))
print("Mean squared Error of train data set: " + str(msetrain))

Mean squared Error of test data set : 21.2398569382
Mean squared Error of train data set: 20.5808397257


In [49]:
ytest.head()

Unnamed: 0,PE
2513,455.27
9411,436.31
8745,440.68
9085,434.4
4950,482.06
