In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import keras_tuner
import keras

from tensorflow.keras import layers
from keras_tuner import RandomSearch


import warnings
warnings.filterwarnings('ignore')

In [14]:
df=pd.read_csv('Real_Combine.csv')

In [15]:
df

Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM,PM 2.5
0,7.4,9.8,4.8,1017.6,93,0.5,4.3,9.4,219.720833
1,7.8,12.7,4.4,1018.5,87,0.6,4.4,11.1,182.187500
2,6.7,13.4,2.4,1019.4,82,0.6,4.8,11.1,154.037500
3,8.6,15.5,3.3,1018.7,72,0.8,8.1,20.6,223.208333
4,12.4,20.9,4.4,1017.3,61,1.3,8.7,22.2,200.645833
...,...,...,...,...,...,...,...,...,...
1088,18.1,24.0,11.2,1015.4,56,1.8,15.9,25.9,288.416667
1089,17.8,25.0,10.7,1015.8,54,2.3,9.4,22.2,256.833333
1090,13.9,24.5,11.4,1015.0,95,0.6,8.7,14.8,169.000000
1091,16.3,23.0,9.8,1016.9,78,1.1,7.4,16.5,186.041667


In [16]:
X=df.iloc[:,:-1] ## independent features
y=df.iloc[:,-1] ## dependent features

##### Hyperparameters

* How many number of hidden layers we should have?
* How many number of neurons we should have in hidden layers?
* Learning Rate

In [17]:
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)): # num_layers is a parameter we choose to tune the number of layers in the model
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='linear'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mean_absolute_error',
        metrics=['mean_absolute_error'])
    return model

In [19]:
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_error',
    max_trials=5,
    executions_per_trial=3,
    directory='project',
    project_name='Air Quality Index')

In [20]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [22]:
tuner.search(X_train, y_train,
             epochs=5,
             validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 17s]
val_mean_absolute_error: 63.51962661743164

Best val_mean_absolute_error So Far: 52.173964182535805
Total elapsed time: 00h 01m 05s


In [23]:
tuner.results_summary()

Results summary
Results in project\Air Quality Index
Showing 10 best trials
Objective(name="val_mean_absolute_error", direction="min")

Trial 3 summary
Hyperparameters:
num_layers: 6
units_0: 384
units_1: 32
learning_rate: 0.01
units_2: 288
units_3: 96
units_4: 320
units_5: 160
units_6: 256
units_7: 320
units_8: 224
units_9: 480
units_10: 256
units_11: 160
units_12: 384
units_13: 480
units_14: 96
units_15: 480
units_16: 160
units_17: 256
Score: 52.173964182535805

Trial 2 summary
Hyperparameters:
num_layers: 17
units_0: 512
units_1: 128
learning_rate: 0.01
units_2: 160
units_3: 160
units_4: 32
units_5: 128
units_6: 352
units_7: 128
units_8: 96
units_9: 224
units_10: 352
units_11: 128
units_12: 512
units_13: 448
units_14: 64
units_15: 512
units_16: 448
units_17: 64
Score: 59.23166020711263

Trial 1 summary
Hyperparameters:
num_layers: 17
units_0: 480
units_1: 160
learning_rate: 0.001
units_2: 128
units_3: 512
units_4: 32
units_5: 32
units_6: 288
units_7: 192
units_8: 224
units_9: 512
un