In [35]:
# import the packages
# import the library package 
import pandas 
import numpy

import matplotlib.pyplot as plt 

from sklearn.feature_selection import RFE 
from sklearn.ensemble import ExtraTreesRegressor,RandomForestRegressor 
from sklearn.metrics import mean_absolute_error,explained_variance_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression 
from sklearn.model_selection import cross_val_score

# create your MLP in keras 
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasRegressor 
from sklearn.model_selection import KFold
from keras.optimizers import Adam

# fix random seed for reproducibility 
seed = 7
numpy.random.seed(seed)


In [36]:
# load the dataset 
gas = pandas.read_csv("gas_turbines.csv",delimiter=',')
gas

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
0,6.8594,1007.9,96.799,3.5000,19.663,1059.2,550.00,114.70,10.605,3.1547,82.722
1,6.7850,1008.4,97.118,3.4998,19.728,1059.3,550.00,114.72,10.598,3.2363,82.776
2,6.8977,1008.8,95.939,3.4824,19.779,1059.4,549.87,114.71,10.601,3.2012,82.468
3,7.0569,1009.2,95.249,3.4805,19.792,1059.6,549.99,114.72,10.606,3.1923,82.670
4,7.3978,1009.7,95.150,3.4976,19.765,1059.7,549.98,114.72,10.612,3.2484,82.311
...,...,...,...,...,...,...,...,...,...,...,...
15034,9.0301,1005.6,98.460,3.5421,19.164,1049.7,546.21,111.61,10.400,4.5186,79.559
15035,7.8879,1005.9,99.093,3.5059,19.414,1046.3,543.22,111.78,10.433,4.8470,79.917
15036,7.2647,1006.3,99.496,3.4770,19.530,1037.7,537.32,110.19,10.483,7.9632,90.912
15037,7.0060,1006.8,99.008,3.4486,19.377,1043.2,541.24,110.74,10.533,6.2494,93.227


In [37]:
gas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15039 entries, 0 to 15038
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      15039 non-null  float64
 1   AP      15039 non-null  float64
 2   AH      15039 non-null  float64
 3   AFDP    15039 non-null  float64
 4   GTEP    15039 non-null  float64
 5   TIT     15039 non-null  float64
 6   TAT     15039 non-null  float64
 7   TEY     15039 non-null  float64
 8   CDP     15039 non-null  float64
 9   CO      15039 non-null  float64
 10  NOX     15039 non-null  float64
dtypes: float64(11)
memory usage: 1.3 MB


In [38]:
gas.describe()

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
count,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0,15039.0
mean,17.764381,1013.19924,79.124174,4.200294,25.419061,1083.79877,545.396183,134.188464,12.102353,1.972499,68.190934
std,7.574323,6.41076,13.793439,0.760197,4.173916,16.527806,7.866803,15.829717,1.103196,2.222206,10.470586
min,0.5223,985.85,30.344,2.0874,17.878,1000.8,512.45,100.17,9.9044,0.000388,27.765
25%,11.408,1008.9,69.75,3.7239,23.294,1079.6,542.17,127.985,11.622,0.858055,61.3035
50%,18.186,1012.8,82.266,4.1862,25.082,1088.7,549.89,133.78,12.025,1.3902,66.601
75%,23.8625,1016.9,90.0435,4.5509,27.184,1096.0,550.06,140.895,12.578,2.1604,73.9355
max,34.929,1034.2,100.2,7.6106,37.402,1100.8,550.61,174.61,15.081,44.103,119.89


In [39]:
gas.head()

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
0,6.8594,1007.9,96.799,3.5,19.663,1059.2,550.0,114.7,10.605,3.1547,82.722
1,6.785,1008.4,97.118,3.4998,19.728,1059.3,550.0,114.72,10.598,3.2363,82.776
2,6.8977,1008.8,95.939,3.4824,19.779,1059.4,549.87,114.71,10.601,3.2012,82.468
3,7.0569,1009.2,95.249,3.4805,19.792,1059.6,549.99,114.72,10.606,3.1923,82.67
4,7.3978,1009.7,95.15,3.4976,19.765,1059.7,549.98,114.72,10.612,3.2484,82.311


In [40]:
gas.shape

(15039, 11)

In [41]:
gas.dtypes # datatypes

AT      float64
AP      float64
AH      float64
AFDP    float64
GTEP    float64
TIT     float64
TAT     float64
TEY     float64
CDP     float64
CO      float64
NOX     float64
dtype: object

In [42]:
print("correlation:",gas.corr(method='pearson'))

correlation:             AT        AP        AH  ...       CDP        CO       NOX
AT    1.000000 -0.412953 -0.549432  ... -0.100705 -0.088588 -0.600006
AP   -0.412953  1.000000  0.042573  ...  0.131198  0.041614  0.256744
AH   -0.549432  0.042573  1.000000  ... -0.182010  0.165505  0.143061
AFDP -0.099333  0.040318 -0.119249  ...  0.727152 -0.334207 -0.037299
GTEP -0.049103  0.078575 -0.202784  ...  0.993784 -0.508259 -0.208496
TIT   0.093067  0.029650 -0.247781  ...  0.887238 -0.688272 -0.231636
TAT   0.338569 -0.223479  0.010859  ... -0.744740  0.063404  0.009888
TEY  -0.207495  0.146939 -0.110272  ...  0.988473 -0.541751 -0.102631
CDP  -0.100705  0.131198 -0.182010  ...  1.000000 -0.520783 -0.169103
CO   -0.088588  0.041614  0.165505  ... -0.520783  1.000000  0.316743
NOX  -0.600006  0.256744  0.143061  ... -0.169103  0.316743  1.000000

[11 rows x 11 columns]


In [67]:
# split into input and output values 
x=gas.iloc[:,0:10]
y=gas.iloc[:,10]

In [48]:
print(x)
print(y)

           AT      AP      AH    AFDP  ...     TAT     TEY     CDP      CO
0      6.8594  1007.9  96.799  3.5000  ...  550.00  114.70  10.605  3.1547
1      6.7850  1008.4  97.118  3.4998  ...  550.00  114.72  10.598  3.2363
2      6.8977  1008.8  95.939  3.4824  ...  549.87  114.71  10.601  3.2012
3      7.0569  1009.2  95.249  3.4805  ...  549.99  114.72  10.606  3.1923
4      7.3978  1009.7  95.150  3.4976  ...  549.98  114.72  10.612  3.2484
...       ...     ...     ...     ...  ...     ...     ...     ...     ...
15034  9.0301  1005.6  98.460  3.5421  ...  546.21  111.61  10.400  4.5186
15035  7.8879  1005.9  99.093  3.5059  ...  543.22  111.78  10.433  4.8470
15036  7.2647  1006.3  99.496  3.4770  ...  537.32  110.19  10.483  7.9632
15037  7.0060  1006.8  99.008  3.4486  ...  541.24  110.74  10.533  6.2494
15038  6.9279  1007.2  97.533  3.4275  ...  545.85  111.58  10.583  4.9816

[15039 rows x 10 columns]
0        82.722
1        82.776
2        82.468
3        82.670
4        

In [54]:
# feature selection 
model = ExtraTreesRegressor()
rfe = RFE(model,3)
model1 = rfe.fit(x,y)

print("Numbers of features:",model1.n_features_)
print("Selected features:",model1.support_)
print("Feature ranking :",model1.ranking_)

Numbers of features: 3
Selected features: [ True False False False False  True False  True False False]
Feature ranking : [1 8 5 7 2 1 6 1 4 3]


In [68]:
 # define model 
def create_model():
  model = Sequential()
  model.add(Dense(12,input_dim = 12,kernel_initializer='uniform',activation='relu'))
  model.add(Dense(6,kernel_initializer='uniform',activation='relu'))
  model.add(Dense(1,kernel_initializer='uniform',activation='relu'))

  adam=Adam(learning_rate=0.1)

  # compile model 
  model.compile (loss='mean_squared_error',optimizer = adam)
  return model



In [71]:
# evaluate the model 
estimator = KerasRegressor(build_fn=create_model,epochs=600,batch_size=5,verbose = 0)
kfold = KFold(n_splits=10,random_state=seed)
results = cross_val_score(estimator,x,y,cv=kfold)
print("results")

ValueError: in user code:

    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:830 train_function  *
        return step_function(self, iterator)
    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:813 run_step  *
        outputs = model.train_step(data)
    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:770 train_step  *
        y_pred = self(x, training=True)
    /usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py:989 __call__  *
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py:248 assert_input_compatibility  *
        raise ValueError(

    ValueError: Input 0 of layer sequential_240 is incompatible with the layer: expected axis -1 of input shape to have value 12 but received input with shape (5, 10)


ValueError: in user code:

    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:830 train_function  *
  

results


ValueError: in user code:

    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:830 train_function  *
        return step_function(self, iterator)
    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:813 run_step  *
        outputs = model.train_step(data)
    /usr/local/lib/python3.7/dist-packages/keras/engine/training.py:770 train_step  *
        y_pred = self(x, training=True)
    /usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py:989 __call__  *
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py:248 assert_input_compatibility  *
        raise ValueError(

    ValueError: Input 0 of layer sequential_249 is incompatible with the layer: expected axis -1 of input shape to have value 12 but received input with shape (None, 10)


