In [None]:
!pip install TPOT
import tensorflow
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
import numpy as np
from tpot import TPOTClassifier
import time

# **Loading the dataset**

In [None]:
# Loading the iris flower dataset
iris = datasets.load_iris()
x = iris.data 
y = iris.target

In [None]:
# Loading the Fashion MNIST data
# (fx_train, fy_train), (fx_test,fy_test) = tensorflow.keras.datasets.fashion_mnist.load_data()
# fx_train = fx_train.reshape(fx_train.shape[0],fx_train.shape[1] * fx_train.shape[2])
# fx_test = fx_test.reshape(fx_test.shape[0],fx_test.shape[1] * fx_test.shape[2])

# **Splitting and preprocessing the data**

In [None]:
from sklearn import preprocessing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25 , random_state = 21)
x_train = preprocessing.normalize(x_train)
x_test = preprocessing.normalize(x_test)
x_train = x_train*2 - 1
x_test = x_test*2 - 1

# **Classification Problem with Random Forest Classifier by selecting hyper-parameters ourself**

In [None]:
rfc = RandomForestClassifier(criterion = "gini", max_depth = 50, max_features = 'log2', 
                               min_samples_leaf = 0.25, min_samples_split = 0.5, n_estimators = 50,
                             )
rfc.fit(x_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=50, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=0.25, min_samples_split=0.5,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
pred = rfc.predict(x_test)

In [None]:
print("Accuracy = ", accuracy_score(y_test, pred))

Accuracy =  0.7368421052631579


In [None]:
rfc.get_params

<bound method BaseEstimator.get_params of RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=50, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=0.25, min_samples_split=0.5,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)>

# **Bayesian optimization for hyperparameter search in Random Forest Classifier**

In [None]:
def Optimization(search_space):

  estimators = search_space['n_estimators']
  criterion = search_space['criterion']
  depth = search_space['max_depth']
  features = search_space['max_features']
  leaf = search_space['min_samples_leaf']
  split = search_space['min_samples_split']

  rfc_model = RandomForestClassifier(criterion = criterion, max_depth = depth, max_features = features, 
                               min_samples_leaf = leaf, min_samples_split = split, n_estimators = estimators,
                              )
    
  accuracy = cross_val_score(rfc_model, x_train, y_train, cv = 5).mean()
  return {'loss': -accuracy, 'status': STATUS_OK }

Defining the search space for bayesian optimization

In [None]:
c = ['entropy', 'gini']
f = ['auto', 'sqrt','log2', None]
d = np.arange(start = 10 , stop = 1001 , step = 10)
n = np.arange(start = 50 , stop = 1501 , step = 50)


search_space = {'criterion': hp.choice('criterion', c ),
        'max_depth': hp.choice('max_depth', d),
        'max_features': hp.choice('max_features', f),
        'min_samples_leaf': hp.uniform('min_samples_leaf', 0, 0.5),
        'min_samples_split' : hp.uniform ('min_samples_split', 0, 1),
        'n_estimators' : hp.choice('n_estimators', n)
    }

In [None]:
params = fmin(fn= Optimization,  space= search_space, algo= tpe.suggest, max_evals = 100, trials= Trials())

100%|██████████| 100/100 [08:48<00:00,  5.29s/it, best loss: -0.9826086956521738]


In [None]:
params

{'criterion': 0,
 'max_depth': 66,
 'max_features': 2,
 'min_samples_leaf': 0.1918227402950685,
 'min_samples_split': 0.0845171143668161,
 'n_estimators': 19}

In [None]:
rfc = RandomForestClassifier( n_estimators = n[params['n_estimators']] ,criterion = c[params['criterion']] ,
                             max_depth = d[params['max_depth']] , max_features = f[params['max_features']] ,
                             min_samples_leaf = params['min_samples_leaf'] , min_samples_split = params['min_samples_split']
                             )

In [None]:
rfc.fit(x_train , y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=670, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=0.1918227402950685,
                       min_samples_split=0.0845171143668161,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
pred = rfc.predict(x_test)
print("Accuracy = ", accuracy_score(y_test, pred))

Accuracy =  0.8947368421052632


# **Genetic Algorithm for Hyperparameter search in Random Forest Classifier**

Defining the search space

In [None]:
c = ['entropy', 'gini']
f = ['auto', 'sqrt','log2', None]
d = np.arange(start = 10 , stop = 1000 , step = 10)
n = np.arange(start = 50 , stop = 1501 , step = 50)
split = np.arange(0, 1, 0.001)
leaf = np.arange(0, 0.5, 0.001)

search_space = {'n_estimators': n,'criterion': c, 'max_features': f, 'max_depth': d,
                'min_samples_split': split, 'min_samples_leaf': leaf
                }

In [None]:
rfc_tpot = TPOTClassifier(generations= 10, population_size= 24, offspring_size= 12,
                                 verbosity= 2, early_stop= 12,
                                 config_dict={'sklearn.ensemble.RandomForestClassifier': search_space}, 
                                 cv = 5, scoring = 'accuracy')

In [None]:
rfc_tpot.fit(x_train , y_train)

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=144.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 0.9735177865612649

Generation 2 - Current best internal CV score: 0.9826086956521738

Generation 3 - Current best internal CV score: 0.9826086956521738

Generation 4 - Current best internal CV score: 0.9826086956521738

Generation 5 - Current best internal CV score: 0.9826086956521738

Generation 6 - Current best internal CV score: 0.9826086956521738

Generation 7 - Current best internal CV score: 0.9826086956521738

Generation 8 - Current best internal CV score: 0.9826086956521738

Generation 9 - Current best internal CV score: 0.9826086956521738

Generation 10 - Current best internal CV score: 0.9826086956521738

Best pipeline: RandomForestClassifier(CombineDFs(input_matrix, input_matrix), criterion=gini, max_depth=90, max_features=auto, min_samples_leaf=0.203, min_samples_split=0.163, n_estimators=1250)


TPOTClassifier(config_dict={'sklearn.ensemble.RandomForestClassifier': {'criterion': ['entropy',
                                                                                      'gini'],
                                                                        'max_depth': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260,
       270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390,
       400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520,
       530, 540, 550, 560, 570, 580, 590, 60...
               crossover_rate=0.1, cv=5, disable_update_check=False,
               early_stop=12, generations=10, log_file=None,
               max_eval_time_mins=5, max_time_mins=None, memory=None,
               mutation_rate=0.9, n_jobs=1, offspring_size=12,
               periodic_checkpoint_folder=None, population_size=24,
               random_state=None, scoring='accuracy', su

In [None]:
accuracy = rfc_tpot.score(x_test, y_test)
print(accuracy)

0.9210526315789473


# **Classification Problem with deep neural network architecture by selecting hyper-parameters ourself**

Defining the search space

In [None]:
!pip install hyperas

In [None]:
!pip install np_utils

Collecting np_utils
[?25l  Downloading https://files.pythonhosted.org/packages/b6/18/5704a782fd72727a9e63198fcc76fadb86975f45bcdf579c10f668329508/np_utils-0.5.12.1.tar.gz (61kB)
[K     |█████▍                          | 10kB 15.8MB/s eta 0:00:01[K     |██████████▊                     | 20kB 21.4MB/s eta 0:00:01[K     |████████████████                | 30kB 23.6MB/s eta 0:00:01[K     |█████████████████████▍          | 40kB 26.1MB/s eta 0:00:01[K     |██████████████████████████▊     | 51kB 28.1MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 6.9MB/s 
Building wheels for collected packages: np-utils
  Building wheel for np-utils (setup.py) ... [?25l[?25hdone
  Created wheel for np-utils: filename=np_utils-0.5.12.1-cp37-none-any.whl size=57133 sha256=8dc9a2a500dad42413e5b00f3206b9cd1265b05c51ab1a223c2859fd13eaf7eb
  Stored in directory: /root/.cache/pip/wheels/92/4b/81/206efd0d01330a96f3aebe5021d2d5f0b264b7ade827c306ef
Successfully built np-utils
Installing col

In [None]:
import hyperas
from hyperas import optim
from hyperas.distributions import choice, uniform
from keras.utils.np_utils import to_categorical
import keras
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation

In [None]:
classes = 3
y_train = to_categorical(y_train, classes)
y_test = to_categorical(y_test, classes)

# **Defining the model**

In [None]:
initial_model = Sequential()

initial_model.add(Dense(512, input_shape=(4,)))
initial_model.add(Activation("relu"))
initial_model.add(Dropout(0.2))

initial_model.add(Dense(256))
initial_model.add(Activation("relu"))
initial_model.add(Dropout(0.2))

initial_model.add(Dense(classes))
initial_model.add(Activation('softmax'))

initial_model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer="adam")
initial_model.fit(x_train, y_train,
              batch_size=64,
              epochs=20,              
              validation_split = 0.2)



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fe28670c050>

In [None]:
score, acc = initial_model.evaluate(x_test, y_test, verbose=1)



# **Bayesian Optimization for Hyperparameter search on deep neural network architecture**

Defining the search space

In [None]:
hlayers = [ 2, 3 , 4 , 5]
hidden_units = [ 64, 128, 256, 512 , 1024 ]
activation = [ 'relu' , 'sigmoid']
optimizer = ['adam', 'rmsprop', 'sgd']
batch_size = [ 64, 128, 256 , 512]
lr = [ 0.001, 0.005, 0.01, 0.05, 0.1, 0.5]
epochs = np.arange(start = 10, stop = 101, step = 10)

In [None]:
space = {'Dense1': hp.choice('Dense1', hidden_units ),
         'Dense2': hp.choice('Dense2', hidden_units ),
         'Dense3': hp.choice('Dense3', hidden_units ),
         'Dense4': hp.choice('Dense4', hidden_units ),
         'Dense5': hp.choice('Dense5', hidden_units ),
         'Dense6': hp.choice('Dense6', hidden_units ),
         'Activation1': hp.choice('Activation1', activation ),
         'Activation2': hp.choice('Activation2', activation ),
        #  'Activation3': hp.choice('Activation3', activation ),

         'Dropout1': hp.uniform('Dropout1', 0, 1),
         'Dropout2': hp.uniform('Dropout2', 0, 1),
         'Dropout3': hp.uniform('Dropout3', 0, 1),
         'Dropout4': hp.uniform('Dropout4', 0, 1),
         'Dropout5': hp.uniform('Dropout5', 0, 1),
         'Dropout6': hp.uniform('Dropout6', 0, 1),
         
         'hidden_layers': hp.choice('hidden_layers', hlayers),
         'Optimizer': hp.choice('Optimizer', optimizer),

         'learning_rate': hp.choice('learning_rate', lr),
         'epochs': hp.choice('epochs' , epochs),
        'batch_size': hp.choice('batch_size' , batch_size)

        
    }

In [None]:
def Optimization_deep(space):
  model = Sequential()

  model.add(Dense(space['Dense1'], input_shape=(4,)))
  model.add(Activation(space['Activation1']))
  model.add(Dropout(space['Dropout1']))

  model.add(Dense(space['Dense2']))
  model.add(Activation(space['Activation2']))
  model.add(Dropout(space['Dropout2']))
    
  if space['hidden_layers'] >=2:
    model.add(Dense(space['Dense3']))
    model.add(Activation(space['Activation2']))
    model.add(Dropout(space['Dropout3']))

  if space['hidden_layers'] >=3:
    model.add(Dense(space['Dense4']))
    model.add(Activation(space['Activation2']))
    model.add(Dropout(space['Dropout4']))

  if space['hidden_layers'] >=4:
    model.add(Dense(space['Dense5']))
    model.add(Activation(space['Activation2']))
    model.add(Dropout(space['Dropout5']))

  if space['hidden_layers'] == 5:
    model.add(Dense(space['Dense6']))
    model.add(Activation(space['Activation2']))
    model.add(Dropout(space['Dropout6']))
        
  model.add(Dense(classes))
  model.add(Activation('softmax'))

  adam = keras.optimizers.Adam(lr=space['learning_rate'])
  rmsprop = keras.optimizers.RMSprop(lr=space['learning_rate'])
  sgd = keras.optimizers.SGD(lr=space['learning_rate'])

  temp = space['Optimizer']
  if temp == 'adam':
      optim = adam
  elif temp == 'rmsprop':
      optim = rmsprop
  else:
      optim = sgd

  model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer=optim)
  model.fit(x_train, y_train,
              batch_size = space['batch_size'],
              epochs=space['epochs'],              
              validation_split = 0.2)
  score, acc = model.evaluate(x_test, y_test, verbose=1)
  print('Test accuracy:', acc)
  return {'loss': -acc, 'status': STATUS_OK, 'model': model}



In [None]:
params = fmin(fn= Optimization_deep, space = space, algo= tpe.suggest, max_evals = 50, trials= Trials())

Epoch 1/70
  0%|          | 0/50 [00:00<?, ?it/s, best loss: ?]

  "The `lr` argument is deprecated, use `learning_rate` instead.")



 - ETA: 0s - loss: 1.1289 - accuracy: 0.3594

 - 1s 203ms/step - loss: 3.6227 - accuracy: 0.3258 - val_loss: 1.3379 - val_accuracy: 0.3913

Epoch 2/70
 - ETA: 0s - loss: 1.3057 - accuracy: 0.3594

 - 0s 50ms/step - loss: 1.3476 - accuracy: 0.3596 - val_loss: 2.8259 - val_accuracy: 0.1739

Epoch 3/70
 - ETA: 0s - loss: 2.0436 - accuracy: 0.3906

 - 0s 50ms/step - loss: 2.1648 - accuracy: 0.3708 - val_loss: 1.8281 - val_accuracy: 0.4348

Epoch 4/70
 - ETA: 0s - loss: 1.8137 - accuracy: 0.3750

 - 0s 49ms/step - loss: 1.6675 - accuracy: 0.3820 - val_loss: 1.1433 - val_accuracy: 0.3913

Epoch 5/70
 - ETA: 0s - loss: 1.3949 - accuracy: 0.3125


In [None]:
params

Testing neural network with the hyperparameters selected by Bayesian optimization

In [None]:
final_model = Sequential()

final_model.add(Dense(hidden_units[params['Dense1']], input_shape=(4,)))
final_model.add(Activation(activation[params['Activation1']]))
final_model.add(Dropout(params['Dropout1']))

final_model.add(Dense(hidden_units[params['Dense2']]))
final_model.add(Activation(activation[params['Activation2']]))
final_model.add(Dropout(params['Dropout2']))
    
if space['hidden_layers'] >=2:
  final_model.add(Dense(hidden_units[params['Dense3']]))
  final_model.add(Activation(activation[params['Activation3']]))
  final_model.add(Dropout(params['Dropout3']))

if space['hidden_layers'] >= 3:
  final_model.add(Dense(hidden_units[params['Dense4']]))
  final_model.add(Activation(activation[params['Activation3']]))
  final_model.add(Dropout(params['Dropout4']))

if space['hidden_layers'] >= 4:
  final_model.add(Dense(hidden_units[params['Dense5']]))
  final_model.add(Activation(activation[params['Activation3']]))
  final_model.add(Dropout(params['Dropout5']))

if space['hidden_layers'] == 5:
  final_model.add(Dense(hidden_units[params['Dense6']]))
  final_model.add(Activation(activation[params['Activation3']]))
  final_model.add(Dropout(params['Dropout6']))
        
final_model.add(Dense(classes))
final_model.add(Activation('softmax'))

adam = keras.optimizers.Adam(lr=lr[params['learning_rate']])
rmsprop = keras.optimizers.RMSprop(lr=lr[params['learning_rate']])
sgd = keras.optimizers.SGD(lr=lr[params['learning_rate']])

temp = optimizer[params['Optimizer']]
if temp == 'adam':
    optim = adam
elif temp == 'rmsprop':
    optim = rmsprop
else:
    optim = sgd

final_model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer=optim)
final_model.fit(x_train, y_train,
            batch_size= batch_size[params['batch_size']],
            epochs = epochs[params['epochs']],              
            validation_split = 0.2)


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fb3d6665ed0>

In [2]:
score, acc = final_model.evaluate(x_test, y_test, verbose=1)
print('Test accuracy:', acc)

0.8953420958518982


# **Genetic Algorithm for hyperparameter search in deep neural network architecture**

In [None]:
hlayers = [ 2,3]
hidden_units = [ 64, 128, 256, 512 , 1024 ]
activation = [ 'relu' , 'logistic']
optimizer = ['adam', 'lbfgs', 'sgd']
batch_size = [ 32, 64, 128, 256 , 512]
lr = [ 0.001, 0.005, 0.01, 0.05, 0.1, 0.5]

In [None]:
hidden_layer_sizes = []
for i in hidden_units:
  hidden_layer_sizes.append((i,))

In [None]:
for i in hidden_units:
  for j in hidden_units:
    hidden_layer_sizes.append((i,j))

In [None]:
for i in hidden_units:
  for j in hidden_units:
    for k in hidden_units:
      hidden_layer_sizes.append((i,j,k))

Defining the search space

In [None]:
search_space = {'hidden_layer_sizes': hidden_layer_sizes,
                'activation': activation,
                'solver': optimizer,
                'learning_rate_init': lr,
                'batch_size' : batch_size
                }

In [None]:
NN_tpot = TPOTClassifier(generations = 5, population_size= 12, offspring_size= 6,
                                 verbosity= 2, early_stop= 12,
                                 config_dict={'sklearn.neural_network.MLPClassifier': search_space}, 
                                 cv = 5, scoring = 'accuracy')

In [None]:
NN_tpot.fit(x_train, y_train)

In [None]:
accuracy = NN_tpot.score(x_test, y_test)
print(accuracy)

0.9473684210526315
