<a href="https://colab.research.google.com/github/mayankdeshmukh/Data-Science-and-Machine-Learning/blob/main/Neural_Network_Gas_Turbine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Neural Network - Gas Turbine

###Problem Statement - Predicting Turbine Energy Yield (TEY) using ambient variables as features.


##Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, KFold

##Importing Dataset

In [2]:
turbine = pd.read_csv("/content/gas_turbines.csv")
turbine

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
0,6.8594,1007.9,96.799,3.5000,19.663,1059.2,550.00,114.70,10.605,3.1547,82.722
1,6.7850,1008.4,97.118,3.4998,19.728,1059.3,550.00,114.72,10.598,3.2363,82.776
2,6.8977,1008.8,95.939,3.4824,19.779,1059.4,549.87,114.71,10.601,3.2012,82.468
3,7.0569,1009.2,95.249,3.4805,19.792,1059.6,549.99,114.72,10.606,3.1923,82.670
4,7.3978,1009.7,95.150,3.4976,19.765,1059.7,549.98,114.72,10.612,3.2484,82.311
...,...,...,...,...,...,...,...,...,...,...,...
15034,9.0301,1005.6,98.460,3.5421,19.164,1049.7,546.21,111.61,10.400,4.5186,79.559
15035,7.8879,1005.9,99.093,3.5059,19.414,1046.3,543.22,111.78,10.433,4.8470,79.917
15036,7.2647,1006.3,99.496,3.4770,19.530,1037.7,537.32,110.19,10.483,7.9632,90.912
15037,7.0060,1006.8,99.008,3.4486,19.377,1043.2,541.24,110.74,10.533,6.2494,93.227


In [3]:
turbine1 = turbine.drop_duplicates()
turbine1.shape

(15039, 11)

In [4]:
turbine1.isna().sum()

AT      0
AP      0
AH      0
AFDP    0
GTEP    0
TIT     0
TAT     0
TEY     0
CDP     0
CO      0
NOX     0
dtype: int64

In [5]:
turbine1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15039 entries, 0 to 15038
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      15039 non-null  float64
 1   AP      15039 non-null  float64
 2   AH      15039 non-null  float64
 3   AFDP    15039 non-null  float64
 4   GTEP    15039 non-null  float64
 5   TIT     15039 non-null  float64
 6   TAT     15039 non-null  float64
 7   TEY     15039 non-null  float64
 8   CDP     15039 non-null  float64
 9   CO      15039 non-null  float64
 10  NOX     15039 non-null  float64
dtypes: float64(11)
memory usage: 1.4 MB


In [6]:
turbine1.describe()

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
count,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0
mean,17.764381,1013.19924,79.124174,4.200294,25.419061,1083.79877,545.396183,134.188464,12.102353,1.972499,68.190934
std,7.574323,6.41076,13.793439,0.760197,4.173916,16.527806,7.866803,15.829717,1.103196,2.222206,10.470586
min,0.5223,985.85,30.344,2.0874,17.878,1000.8,512.45,100.17,9.9044,0.000388,27.765
25%,11.408,1008.9,69.75,3.7239,23.294,1079.6,542.17,127.985,11.622,0.858055,61.3035
50%,18.186,1012.8,82.266,4.1862,25.082,1088.7,549.89,133.78,12.025,1.3902,66.601
75%,23.8625,1016.9,90.0435,4.5509,27.184,1096.0,550.06,140.895,12.578,2.1604,73.9355
max,34.929,1034.2,100.2,7.6106,37.402,1100.8,550.61,174.61,15.081,44.103,119.89


##Data Visualization

In [None]:
sns.pairplot(turbine1)

In [None]:
plt.figure(figsize = (14, 6));
sns.heatmap(turbine1.corr(), cmap = 'magma', annot = True, fmt = ".3f")
plt.show()

In [None]:
f, axes = plt.subplots(2, 2, figsize = (12, 8))

sns.violinplot(x = 'AT', y = 'TEY', data = turbine1, scatter_kws = {'alpha':0.6}, ax = axes[0, 0])
sns.violinplot(x = 'AP', y = 'TEY', data = turbine1, scatter_kws = {'alpha':0.6}, ax = axes[0, 1])
sns.violinplot(x = 'AH', y = 'TEY', data = turbine1, scatter_kws = {'alpha':0.6}, ax = axes[1, 0])
sns.violinplot(x = 'AFDP', y = 'TEY', data = turbine1, scatter_kws = {'alpha':0.6}, ax = axes[1, 1])

In [None]:
f, axes = plt.subplots(2, 2, figsize = (12, 8))
sns.violinplot(x = 'GTEP', y = 'TEY', data = turbine1, scatter_kws={'alpha':0.6}, ax = axes[0,0])
sns.violinplot(x = 'TIT', y = 'TEY', data = turbine1, scatter_kws={'alpha':0.6}, ax = axes[0,1])
sns.violinplot(x = 'TAT', y = 'TEY', data = turbine1, scatter_kws={'alpha':0.6}, ax = axes[1,0])
sns.violinplot(x = 'CDP', y = 'TEY', data = turbine1, scatter_kws={'alpha':0.6}, ax = axes[1,1])

##Machine Learning Algorithm - Neural Network

In [None]:
np.random.seed(8)

X = turbine.drop(['TEY'], axis = 1)
Y = turbine['TEY']
X

In [None]:
Y

##Batch Size and Epochs

In [None]:
model = Sequential()
model.add(Dense(26, input_dim = 10, activation = 'relu'))
model.add(Dense(26, activation = 'tanh'))
model.add(Dense(26, activation = 'sigmoid'))
model.add(Dense(26, activation = 'leaky_relu'))

In [None]:
model.compile(loss = 'mse', optimizer = 'Adam', metrics = ['mean_absolute_percentage_error'])

In [None]:
history = model.fit(X, Y, validation_split = 0.33, epochs = 20, batch_size = 20)

In [None]:
scores = model.evaluate(X, Y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]))

In [None]:
model.metrics_names

In [None]:
scores

In [None]:
history.history.keys()

In [None]:
plt.plot(history.history['mean_absolute_percentage_error'])
plt.plot(history.history['val_mean_absolute_percentage_error'])
plt.title('model_mean_absolute_percentage_error')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()

##Hyperparameters all at once
The hyperparameter optimization was carried out by taking 2 hyperparameters at once. We may have missed the best values. The performance can be further improved by finding the optimum values of hyperparameters all at once given by the code snippet below.