## Neural networks assignment- Q2

#### Q) Predicting turbine energy yield (TEY) using ambient variables as features

In [1]:
# to supress the warnings
import warnings
warnings.filterwarnings('ignore')

In [39]:
#Import the required libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.metrics import mean_squared_error
from math import sqrt

In [3]:
# Read the data
data = pd.read_csv('gas_turbines.csv')
data.head()

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
0,6.8594,1007.9,96.799,3.5,19.663,1059.2,550.0,114.7,10.605,3.1547,82.722
1,6.785,1008.4,97.118,3.4998,19.728,1059.3,550.0,114.72,10.598,3.2363,82.776
2,6.8977,1008.8,95.939,3.4824,19.779,1059.4,549.87,114.71,10.601,3.2012,82.468
3,7.0569,1009.2,95.249,3.4805,19.792,1059.6,549.99,114.72,10.606,3.1923,82.67
4,7.3978,1009.7,95.15,3.4976,19.765,1059.7,549.98,114.72,10.612,3.2484,82.311


In [4]:
#more information about the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15039 entries, 0 to 15038
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      15039 non-null  float64
 1   AP      15039 non-null  float64
 2   AH      15039 non-null  float64
 3   AFDP    15039 non-null  float64
 4   GTEP    15039 non-null  float64
 5   TIT     15039 non-null  float64
 6   TAT     15039 non-null  float64
 7   TEY     15039 non-null  float64
 8   CDP     15039 non-null  float64
 9   CO      15039 non-null  float64
 10  NOX     15039 non-null  float64
dtypes: float64(11)
memory usage: 1.3 MB


In [5]:
# Describing the data
data.describe()

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
count,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0
mean,17.764381,1013.19924,79.124174,4.200294,25.419061,1083.79877,545.396183,134.188464,12.102353,1.972499,68.190934
std,7.574323,6.41076,13.793439,0.760197,4.173916,16.527806,7.866803,15.829717,1.103196,2.222206,10.470586
min,0.5223,985.85,30.344,2.0874,17.878,1000.8,512.45,100.17,9.9044,0.000388,27.765
25%,11.408,1008.9,69.75,3.7239,23.294,1079.6,542.17,127.985,11.622,0.858055,61.3035
50%,18.186,1012.8,82.266,4.1862,25.082,1088.7,549.89,133.78,12.025,1.3902,66.601
75%,23.8625,1016.9,90.0435,4.5509,27.184,1096.0,550.06,140.895,12.578,2.1604,73.9355
max,34.929,1034.2,100.2,7.6106,37.402,1100.8,550.61,174.61,15.081,44.103,119.89


In [6]:
# Splitting the data
X = data.loc[:,['AT','AP','AH','AFDP','GTEP','TIT','TAT','CDP','CO','NOX']]
y = data.loc[:,['TEY']]

#### Standardizing the data

In [7]:
sc = StandardScaler()
X_array  = sc.fit_transform(X)
y_array = sc.fit_transform(y)

In [8]:
pd.DataFrame(X_array).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
count,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0
mean,-2.320107e-16,-1.92528e-14,1.844983e-16,3.810001e-16,1.107344e-16,-2.324212e-15,1.744899e-15,3.640356e-16,1.9533550000000002e-17,-6.862579000000001e-17
std,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033,1.000033
min,-2.276462,-4.266288,-3.536594,-2.779497,-1.806771,-5.021933,-4.188141,-1.992416,-0.8874862,-3.861033
25%,-0.8392292,-0.670651,-0.6796337,-0.626693,-0.5091458,-0.2540512,-0.4101146,-0.4354335,-0.5015202,-0.6578107
50%,0.05566605,-0.06227861,0.2277844,-0.01854065,-0.08075681,0.2965544,0.571257,-0.07011925,-0.2620452,-0.1518527
75%,0.8051309,0.5772924,0.7916582,0.4612196,0.4228638,0.738249,0.5928675,0.431168,0.08455882,0.5486567
max,2.266234,3.27597,1.528011,4.486233,2.871006,1.028678,0.6627839,2.700105,18.95949,4.937717


#### Tuning of Hyperparameters :- Batch Size and Epochs

In [9]:
def basemodel():
    model = Sequential()
    model.add(Dense(10, input_dim = 10,activation ='relu'))
    model.add(Dense(4,activation= 'relu'))
    model.add(Dense(1,activation = 'sigmoid'))
    model.compile(loss='mse', optimizer = 'adam')
    return model

In [10]:
model = KerasRegressor(build_fn=basemodel,verbose =0)
# Define the grid search parameters
batch_size = [5,10,15]
epochs = [10,50,100]
# Make a dictionary of the grid search parameters
param_grid = dict(batch_size = batch_size,epochs = epochs)
# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model ,param_grid = param_grid,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_array,y_array)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START batch_size=5, epochs=10.....................................
[CV 1/5; 1/9] END .....batch_size=5, epochs=10;, score=-0.686 total time=  40.1s
[CV 2/5; 1/9] START batch_size=5, epochs=10.....................................
[CV 2/5; 1/9] END .....batch_size=5, epochs=10;, score=-0.437 total time=  37.9s
[CV 3/5; 1/9] START batch_size=5, epochs=10.....................................
[CV 3/5; 1/9] END .....batch_size=5, epochs=10;, score=-0.502 total time=  37.6s
[CV 4/5; 1/9] START batch_size=5, epochs=10.....................................
[CV 4/5; 1/9] END .....batch_size=5, epochs=10;, score=-0.485 total time=  37.6s
[CV 5/5; 1/9] START batch_size=5, epochs=10.....................................
[CV 5/5; 1/9] END .....batch_size=5, epochs=10;, score=-0.682 total time=  38.1s
[CV 1/5; 2/9] START batch_size=5, epochs=50.....................................
[CV 1/5; 2/9] END .....batch_size=5, epochs=50;, 

In [11]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param)) 

Best : -0.5581401646137237, using {'batch_size': 10, 'epochs': 100}
-0.5584311306476593,0.10494539179610432 with: {'batch_size': 5, 'epochs': 10}
-0.5583658576011657,0.10494892456473533 with: {'batch_size': 5, 'epochs': 50}
-0.5582134127616882,0.10480869008786008 with: {'batch_size': 5, 'epochs': 100}
-0.5586721658706665,0.10489552100211819 with: {'batch_size': 10, 'epochs': 10}
-0.5582415401935578,0.10489922688715839 with: {'batch_size': 10, 'epochs': 50}
-0.5581401646137237,0.10490166552440919 with: {'batch_size': 10, 'epochs': 100}
-0.5588429152965546,0.10494782863907733 with: {'batch_size': 15, 'epochs': 10}
-0.5582571923732758,0.1049337309567765 with: {'batch_size': 15, 'epochs': 50}
-0.5581496119499206,0.10498314244003577 with: {'batch_size': 15, 'epochs': 100}


In [12]:
grid_result.best_score_

-0.5581401646137237

In [13]:
grid_result.best_params_

{'batch_size': 10, 'epochs': 100}

In [None]:
#### Tuning of Hyperparameters :- learning rate and dro[pout rate]

In [14]:
from keras.layers import Dropout

# Defining the model

def create_model(learning_rate,dropout_rate):
    model = Sequential()
    model.add(Dense(10,input_dim = 10,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(4,input_dim = 10,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1,activation = 'sigmoid'))
    model.compile(loss = 'mean_squared_error',optimizer = 'Adam')
    return model

# Create the model

model = KerasRegressor(build_fn = create_model,verbose = 0,batch_size = 5,epochs = 50)

# Define the grid search parameters

learning_rate = [0.001,0.01,0.1]
dropout_rate = [0.0,0.1,0.2]

# Make a dictionary of the grid search parameters

param_grids = dict(learning_rate = learning_rate,dropout_rate = dropout_rate)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_array,y_array)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 1/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=-0.686 total time= 3.2min
[CV 2/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 2/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=-0.437 total time= 3.2min
[CV 3/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 3/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=-0.501 total time= 3.3min
[CV 4/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 4/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=-0.485 total time= 3.2min
[CV 5/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 5/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=-0.682 total time= 3.3min
[CV 1/5; 2/9] START dropout_rate=0.0, learning_rate=0.01........................
[CV 

In [15]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param)) 

Best : -0.5580870866775512, using {'dropout_rate': 0.0, 'learning_rate': 0.001}
-0.5580870866775512,0.1049326175494691 with: {'dropout_rate': 0.0, 'learning_rate': 0.001}
-0.5582700133323669,0.1048858955468957 with: {'dropout_rate': 0.0, 'learning_rate': 0.01}
-0.5581759750843048,0.10482607593926076 with: {'dropout_rate': 0.0, 'learning_rate': 0.1}
-0.5589388072490692,0.10546260300000063 with: {'dropout_rate': 0.1, 'learning_rate': 0.001}
-0.5587832152843475,0.10475629201845782 with: {'dropout_rate': 0.1, 'learning_rate': 0.01}
-0.5583952784538269,0.10482878747109459 with: {'dropout_rate': 0.1, 'learning_rate': 0.1}
-0.5594920337200164,0.10474106050356645 with: {'dropout_rate': 0.2, 'learning_rate': 0.001}
-0.5588758587837219,0.10460235508530728 with: {'dropout_rate': 0.2, 'learning_rate': 0.01}
-0.5599914371967316,0.10388366741526837 with: {'dropout_rate': 0.2, 'learning_rate': 0.1}


#### Tuning of Hyperparameters:- Activation Function and Kernel Initializer

In [16]:
# Defining the model

def create_model(activation_function,init):
    model = Sequential()
    model.add(Dense(8,input_dim = 10,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(0.1))
    model.add(Dense(4,input_dim = 10,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(0.1))
    model.add(Dense(1,activation = 'sigmoid'))
    model.compile(loss = 'mean_squared_error',optimizer = 'Adam')
    return model

# Create the model

model = KerasRegressor(build_fn = create_model,verbose = 0,batch_size = 5,epochs = 50)

# Define the grid search parameters
activation_function = ['softmax','relu','tanh','linear']
init = ['uniform','normal','zero']

# Make a dictionary of the grid search parameters
param_grids = dict(activation_function = activation_function,init = init)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_array,y_array)


Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 1/5; 1/12] END activation_function=softmax, init=uniform;, score=-0.687 total time= 4.3min
[CV 2/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 2/5; 1/12] END activation_function=softmax, init=uniform;, score=-0.441 total time= 4.1min
[CV 3/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 3/5; 1/12] END activation_function=softmax, init=uniform;, score=-0.504 total time= 4.2min
[CV 4/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 4/5; 1/12] END activation_function=softmax, init=uniform;, score=-0.490 total time= 4.2min
[CV 5/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 5/5; 1/12] END activation_function=softmax, init=uniform;, score=-0.685 total time= 4.2min
[CV 1/5; 2/12] START activation_function=softmax, init=norm

[CV 3/5; 10/12] END activation_function=linear, init=uniform;, score=-0.502 total time= 4.2min
[CV 4/5; 10/12] START activation_function=linear, init=uniform..................
[CV 4/5; 10/12] END activation_function=linear, init=uniform;, score=-0.486 total time= 4.1min
[CV 5/5; 10/12] START activation_function=linear, init=uniform..................
[CV 5/5; 10/12] END activation_function=linear, init=uniform;, score=-0.683 total time= 4.1min
[CV 1/5; 11/12] START activation_function=linear, init=normal...................
[CV 1/5; 11/12] END activation_function=linear, init=normal;, score=-0.687 total time= 4.8min
[CV 2/5; 11/12] START activation_function=linear, init=normal...................
[CV 2/5; 11/12] END activation_function=linear, init=normal;, score=-0.438 total time= 4.4min
[CV 3/5; 11/12] START activation_function=linear, init=normal...................
[CV 3/5; 11/12] END activation_function=linear, init=normal;, score=-0.502 total time= 4.3min
[CV 4/5; 11/12] START activa

In [17]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param)) 

Best : -0.559031754732132, using {'activation_function': 'linear', 'init': 'uniform'}
-0.5614878714084626,0.10374859034181723 with: {'activation_function': 'softmax', 'init': 'uniform'}
-0.5611711144447327,0.10585347782579181 with: {'activation_function': 'softmax', 'init': 'normal'}
-0.5610242903232574,0.10358185050505364 with: {'activation_function': 'softmax', 'init': 'zero'}
-0.559269517660141,0.1048255057308508 with: {'activation_function': 'relu', 'init': 'uniform'}
-0.5594012439250946,0.10406878034007588 with: {'activation_function': 'relu', 'init': 'normal'}
-1.0116025447845458,0.28296185438968774 with: {'activation_function': 'relu', 'init': 'zero'}
-0.5595818400382996,0.10456897578647592 with: {'activation_function': 'tanh', 'init': 'uniform'}
-0.5595149576663971,0.1046886231868828 with: {'activation_function': 'tanh', 'init': 'normal'}
-1.0115487933158875,0.28305318649342687 with: {'activation_function': 'tanh', 'init': 'zero'}
-0.559031754732132,0.10477300096303928 with: {'

#### Tuning of Hyperparameter :-Number of Neurons in activation layer

In [18]:
# Defining the model

def create_model(neuron1,neuron2):
    model = Sequential()
    model.add(Dense(neuron1,input_dim = 10,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0.2))
    model.add(Dense(neuron2,input_dim = neuron1,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(1,activation = 'sigmoid'))
    model.compile(loss = 'mean_squared_error',optimizer = 'Adam')
    return model

# Create the model

model = KerasRegressor(build_fn = create_model,verbose = 0,batch_size = 5,epochs = 50)

# Define the grid search parameters

neuron1 = [4,8,16]
neuron2 = [2,4,8]

# Make a dictionary of the grid search parameters

param_grids = dict(neuron1 = neuron1,neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_array,y_array) 

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 1/5; 1/9] END ........neuron1=4, neuron2=2;, score=-0.688 total time= 4.1min
[CV 2/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 2/5; 1/9] END ........neuron1=4, neuron2=2;, score=-0.440 total time= 4.1min
[CV 3/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 3/5; 1/9] END ........neuron1=4, neuron2=2;, score=-0.503 total time= 4.1min
[CV 4/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 4/5; 1/9] END ........neuron1=4, neuron2=2;, score=-0.489 total time= 4.2min
[CV 5/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 5/5; 1/9] END ........neuron1=4, neuron2=2;, score=-0.685 total time= 4.1min
[CV 1/5; 2/9] START neuron1=4, neuron2=4........................................
[CV 1/5; 2/9] END ........neuron1=4, neuron2=4;, 

In [19]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param)) 

Best : -0.559312641620636, using {'neuron1': 16, 'neuron2': 8}
-0.5609307885169983,0.10470615317435386 with: {'neuron1': 4, 'neuron2': 2}
-0.560339766740799,0.10492846192997078 with: {'neuron1': 4, 'neuron2': 4}
-0.5600373923778534,0.10463744948202593 with: {'neuron1': 4, 'neuron2': 8}
-0.5602546572685242,0.104670363206434 with: {'neuron1': 8, 'neuron2': 2}
-0.5597877383232117,0.10479683337831196 with: {'neuron1': 8, 'neuron2': 4}
-0.5593795239925384,0.10468535984573756 with: {'neuron1': 8, 'neuron2': 8}
-0.5601932466030121,0.10459467620424419 with: {'neuron1': 16, 'neuron2': 2}
-0.559420382976532,0.10470163576520046 with: {'neuron1': 16, 'neuron2': 4}
-0.559312641620636,0.10463602590658258 with: {'neuron1': 16, 'neuron2': 8}


#### Training model with optimum values of Hyperparameters

In [25]:
# Defining the model

def create_model():
    model = Sequential()
    model.add(Dense(16,input_dim = 10,kernel_initializer = 'uniform',activation = 'linear'))
    model.add(Dropout(0.0))
    model.add(Dense(8,input_dim = 16,kernel_initializer = 'uniform',activation = 'linear'))
    model.add(Dropout(0.0))
    model.add(Dense(1,activation = 'sigmoid'))
    
    #adam = Adam(lr = 0.001) #sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
    model.compile(loss = 'mean_squared_error',optimizer = 'Adam')
    return model

# Create the model

model = KerasRegressor(build_fn = create_model,verbose = 10,batch_size = 5,epochs = 100)

# Fitting the model

model.fit(X_array,y_array)

# Predicting using trained model

y_predict = model.predict(X_array)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

ValueError: continuous is not supported

In [30]:
#Predicted values
y_predict

array([4.3105541e-07, 4.5774726e-07, 4.7827700e-07, ..., 8.9488559e-09,
       2.2263411e-08, 6.8635806e-08], dtype=float32)

In [34]:
#Converting the preicted standardized values
pred_val= sc.inverse_transform(y_predict)
pred_val

array([134.18848, 134.18848, 134.18848, ..., 134.18846, 134.18846,
       134.18846], dtype=float32)

In [35]:
#Converting the standardized y values to actaul y values
actual_val = sc.inverse_transform(y_array)
actual_val

array([[114.7 ],
       [114.72],
       [114.71],
       ...,
       [110.19],
       [110.74],
       [111.58]])

In [36]:
#Calculating the mean_squared_error
mean_squared_error(actual_val,pred_val)

140.00301794557598

In [38]:
#Calculating the Root_mean_squared_error
RMSE =sqrt(mean_squared_error(actual_val,pred_val))
RMSE

11.832287096989152