# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

# Importing the Dataset

In [None]:
df=pd.read_csv('/content/gas_turbines.csv')
df

# Problem Statement

 predicting turbine energy yield (TEY) using ambient variables as features.




# Attribute Information:

The explanations of sensor measurements and their brief statistics are given below.

Variable (Abbr.) Unit Min Max Mean
Ambient temperature (AT) C â€“6.23 37.10 17.71

Ambient pressure (AP) mbar 985.85 1036.56 1013.07

Ambient humidity (AH) (%) 24.08 100.20 77.87

Air filter difference pressure (AFDP) mbar 2.09 7.61 3.93

Gas turbine exhaust pressure (GTEP) mbar 17.70 40.72 25.56

Turbine inlet temperature (TIT) C 1000.85 1100.89 1081.43

Turbine after temperature (TAT) C 511.04 550.61 546.16

Compressor discharge pressure (CDP) mbar 9.85 15.16 12.06

Turbine energy yield (TEY) MWH 100.02 179.50 133.51

Carbon monoxide (CO) mg/m3 0.00 44.10 2.37

Nitrogen oxides (NOx) mg/m3 25.90 119.91 65.29

# Data Exploration

In [None]:
print(df.shape)
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.columns

In [None]:
df.dtypes

In [None]:
df[df.duplicated()]

In [None]:
df[df.duplicated()].shape

In [None]:
df.columns

# Define the categorical variables and numeric variables

In [None]:
# List of Numerical Variables
numerical_features=[feature for feature in df.columns if df[feature].dtypes != 'O']

print('Number of numerical variables:', len(numerical_features))

# Visualize the numerical variables
df[numerical_features].head()

Numerical variables are usually of 2 types

1) discrete variable

2) Continuous variabl

In [None]:

discrete_feature=[feature for feature in numerical_features if len(df[feature].unique())<25]
print('Discrete Variables Count: {}'.format(len(discrete_feature)))

In [None]:
continuous_feature=[feature for feature in numerical_features if feature not in discrete_feature]
print('Continuous Feature Count {}'.format(len(continuous_feature)))

In [None]:
categorical = [var for var in df.columns if df[var].dtype=='O']
print('There are {} categorical variables\n'.format(len(categorical)))
print('The categorical variables are :\n', categorical)
print('\n')

# Correlation Matrix

In [None]:
df.corr()

# Data Visualization

In [None]:
fig=plt.figure(figsize=(18,8))
sns.heatmap(df.corr(),annot=True)
plt.xticks(rotation=45)

In [None]:
continuous=['FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area', 'dayfri', 'daymon', 'daysat', 'daysun', 'daythu', 'daytue', 'daywed', 'monthapr', 'monthaug', 'monthdec', 'monthfeb', 'monthjan', 'monthjul', 'monthjun', 'monthmar', 'monthmay', 'monthnov', 'monthoct', 'monthsep']


In [None]:
continuous=['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP', 'CO',
       'NOX']

In [None]:
columns=['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP', 'CO',
       'NOX'] 
for col in columns:
  plt.figure()
  sns.boxplot(df[col])    
  plt.show()  

In [None]:
for feature in continuous:
  sns.displot(data =df , x=feature,height = 4, aspect = 2, palette='deep')
  plt.show()

In [None]:
for feature in continuous:
  plt.figure()
  sns.violinplot(df[feature])
  plt.show()

# Label of Categorical variables

In [None]:
pal = sns.color_palette('rainbow')
sns.countplot(x='TEY', data=df, palette = pal, 
              order=df['TEY'].value_counts().index)
plt.xticks(fontsize = 12)
plt.title('gas_turbine')

# Defining Dependent and Independent variable

In [None]:
x=df.drop(['TEY'],axis=1)
y=df['TEY']

In [None]:
x.value_counts()

In [None]:
y.value_counts()

# Scaling the Data

In [None]:
# Standardization
a = StandardScaler()
a.fit(x)
x_standardized = a.transform(x)

In [None]:
x_standardized.shape

In [None]:
pd.DataFrame(x_standardized)

# define train_test_split

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

In [None]:
x_train.shape,x_test.shape

# Tuning the Hyperparameters :- Batch size and Epochs

In [None]:
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import adam_v2

In [None]:
# Create model
def create_model():
  model = Sequential()
  model.add(Dense(30, input_dim = 10, kernel_initializer='uniform',activation='relu')) 
  model.add(Dense(25, kernel_initializer='uniform',activation='relu'))
  model.add(Dense(1, kernel_initializer='uniform',activation='sigmoid'))

  adam = adam_v2.Adam(lr = 0.01)

  model.compile(loss = 'binary_crossentropy',
                optimizer = adam,
                metrics = ['accuracy'])
  
  return model

In [None]:
# Create the model
model = KerasClassifier(build_fn = create_model, verbose = 0)


# Define the grid search parameter
batch_size = [10,20,40,60]
epochs = [10,50,100,150]

#Make a dictionary of the grid search parameters
param_grid = dict(batch_size = batch_size,
                  epochs = epochs)

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model,
                    param_grid = param_grid,
                    cv = KFold(),
                    verbose = 10)

grid_result = grid.fit(x_standardized, y)

In [None]:
# Summarize the results
print('Best : {}, using {}' .format(grid_result.best_score_,grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean,stdev,param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean,stdev, param))

# Tuning of Hyperparameters:-Learning Rate and Drop out rate

In [None]:
from keras.layers import Dropout
# Defining the model
def create_model(learning_rate, dropout_rate):
  model1 = Sequential()
  model1.add(Dense(30, input_dim = 10, kernel_initializer='uniform',activation='relu'))
  model1.add(Dropout(dropout_rate))
  model1.add(Dense(25, input_dim = 10, kernel_initializer='uniform',activation='relu'))
  model1.add(Dropout(dropout_rate))
  model1.add(Dense(1,activation='sigmoid'))

  adam = adam_v2.Adam(lr = learning_rate)

  model1.compile(loss = 'binary_crossentropy',
                optimizer = adam,
                metrics = ['accuracy'])
  return model1

In [None]:
# Create the model
model1 = KerasClassifier(build_fn = create_model, 
                        verbose = 0,
                        batch_size = 10,
                        epochs = 10)


# Define the grid search parameter
learning_rate = [0.001,0.01,0.1]
dropout_rate= [0.0,0.1,0.2]

#Make a dictionary of the grid search parameters
param_grid = dict(learning_rate = learning_rate,
                  dropout_rate = dropout_rate)

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model1,
                    param_grid = param_grid,
                    cv = KFold(),
                    verbose = 10)

grid_result = grid.fit(x_standardized, y)

In [None]:
# Summarize the results
print('Best : {}, using {}' .format(grid_result.best_score_,grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean,stdev,param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean,stdev, param))

# Tuning of Hyperparameters:- Activation Function and Kernel Initializer

In [None]:
# Defining the model

def create_model(activation_function,init):
    model2 = Sequential()
    model2.add(Dense(30,input_dim = 10,kernel_initializer = init,activation = activation_function))
    model2.add(Dropout(0.0))
    model2.add(Dense(25,input_dim = 10,kernel_initializer = init,activation = activation_function))
    model2.add(Dropout(0.0))
    model2.add(Dense(1,activation = 'sigmoid'))
    
    adam = adam_v2.Adam(lr = 0.001)
    model2.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model2

# Create the model

model2 = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 20,epochs = 150)

# Define the grid search parameters
activation_function = ['softmax','relu','tanh','linear']
init = ['uniform','normal','zero']

# Make a dictionary of the grid search parameters
param_grids = dict(activation_function = activation_function,init = init)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model2,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(x_standardized,y)

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

# Tuning of Hyperparameter :-Number of Neurons in activation laye

In [None]:
# Defining the model

def create_model(neuron1,neuron2):
    model3 = Sequential()
    model3.add(Dense(neuron1,input_dim = 10,kernel_initializer = 'uniform',activation = 'linear'))
    model3.add(Dropout(0.0))
    model3.add(Dense(neuron2,input_dim = neuron1,kernel_initializer = 'uniform',activation = 'linear'))
    model3.add(Dropout(0.0))
    model3.add(Dense(1,activation = 'sigmoid'))
    
    adam = adam_v2.Adam(lr = 0.001)
    model3.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model3

# Create the model

model3 = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 10,epochs = 10)

# Define the grid search parameters

neuron1 = [4,8,16,18,25,30]
neuron2 = [2,4,8,16,18,25]

# Make a dictionary of the grid search parameters

param_grids = dict(neuron1 = neuron1,neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model3,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(x_standardized,y)

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

# Training model with optimum values of Hyperparameters

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Defining the model

def create_model():
    model4 = Sequential()
    model4.add(Dense(4,input_dim = 10,kernel_initializer = 'uniform',activation = 'linear'))
    model4.add(Dropout(0.0))
    model4.add(Dense(2,input_dim = 4,kernel_initializer = 'uniform',activation = 'linear'))
    model4.add(Dropout(0.0))
    model4.add(Dense(1,activation = 'sigmoid'))
    
    adam = adam_v2.Adam(lr = 0.001) #sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
    model4.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model4

# Create the model

model4 = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 10,epochs = 10)

# Fitting the model

model4.fit(x_standardized,y)

# Predicting using trained model

y_predict = model4.predict(x_standardized)

# Printing the metrics
print(r2_score(y,y_predict))

# **Building Neural Networks Model using Optimal Values**

In [None]:
 # create ANN model
model = Sequential()
# Defining the first layer of the model
model.add(Dense(units=4, input_dim=x_train.shape[1], kernel_initializer='uniform', activation='linear'))
model.add(Dropout(0.0))        
# Defining the Second layer of the model
model.add(Dense(units=2, kernel_initializer='uniform', activation='linear'))
model.add(Dropout(0.0))  
# The output neuron is a single fully connected node 
# Since we will be predicting a single number
model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))

model.compile(optimizer='Adam',loss='binary_crossentropy', metrics=['accuracy'])

# Training the model with best parameters
history = model.fit(x_train, y_train, validation_split=0.33, batch_size = 10, epochs = 10)

In [None]:
# Evaluate the model
scores = model.evaluate(x,y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
model.metrics_names

In [None]:
scores

In [None]:
#visualize training history

#list all data in history
history.history.keys()

In [None]:
#Summarize history for accuracy
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
#Summarize history for loss
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
model.summary()