# Automated Tuning of Neural Networks Hyperparameters using Evolutionary Algorithms

In [1]:
import numpy as np
from collections import Counter

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import os
import random
import pandas as pd
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [5]:
batch_size=100
train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('./data/', train=True, download=True,
        transform=torchvision.transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('./data/', train=False, download=True,
        transform=torchvision.transforms.ToTensor()),
batch_size=batch_size)

test_size = 10000  # may

batch_size_train = 50
n_epochs = 5

Files already downloaded and verified
Files already downloaded and verified


In [6]:
def EA_parent_init():
    EA_dict={'pooling':['pool_max','pool_avg'],
             'stride':[1,2],
             'padding':[0,1,2],
             'activation':['act_sigm','act_relu'],
            'loss': ['loss_cross','loss_nll'],
            'optimizer':['opt_sgd','opt_adam'] ,
            'kernel_size':[1,2,3,4]}
    l1=len(EA_dict['pooling'])
    l2=len(EA_dict['stride'])
    l3=len(EA_dict['padding'])
    l4=len(EA_dict['activation'])
    l5=len(EA_dict['loss'])
    l6=len(EA_dict['optimizer'])
    l7=len(EA_dict['kernel_size'])
    
    pooling=random.choices(EA_dict['pooling'], [(1/l1)]*l1 , k=1)[0]
    stride=random.choices(EA_dict['stride'], [(1/l2)]*l2 , k=1)[0]
    padding=random.choices(EA_dict['padding'], [(1/l3)]*l3 , k=1)[0]
    activation=random.choices(EA_dict['activation'], [(1/l4)]*l4 , k=1)[0]
    loss=random.choices(EA_dict['loss'], [(1/l5)]*l5 , k=1)[0]
    optimizer=random.choices(EA_dict['optimizer'], [(1/l6)]*l6 , k=1)[0]
    kernel=random.choices(EA_dict['kernel_size'], [(1/l7)]*l7 , k=1)[0]

    parent=[stride, padding ,optimizer,activation,pooling,loss,kernel]
    return parent

def mutation(parent, r=0.2):
    parent_dict={'pooling':['pool_max','pool_avg'],
             'stride':[1,2],
             'padding':[0,1,2],
             'activation':['act_sigm','act_relu'],
            'loss': ['loss_cross','loss_nll'],
            'optimizer':['opt_sgd','opt_adam'] ,
            'kernel_size':[3,4,5,6]}
    l_params= ['stride', 'padding' ,'optimizer','activation',
               'pooling','loss','kernel_size']
    i=0
    child=[]
    for params in l_params:
        l1 =len(parent_dict[params])
        prob=[round(r/(l1-1) ,4)]*l1
        value_parent=parent[i]
        i=i+1
        idx=parent_dict[params].index(value_parent)
        prob[idx]=1-r
        value_child=random.choices(parent_dict[params], prob , k=1)[0]
        child.append(value_child)
    return child

def get_params_fromEA(init=True, parent=None):
    if(init ==True):
        EA_params=EA_parent_init()
    else: #mutating over parent
        EA_params=mutation(parent)
    parent=EA_params
    stride,padding=EA_params[0],EA_params[1] #this will come EA
    opt ,act, pool,loss,kernel_size=EA_params[2],EA_params[3],EA_params[4],EA_params[5],EA_params[6] #this will come EA
    DL_params_dict={
        'cnn1':nn.Conv2d(3, 6, kernel_size=kernel_size, padding=padding),
        'cnn2':nn.Conv2d(6, 16, kernel_size=kernel_size),
        'cnn3':nn.Conv2d(16, 120, kernel_size=kernel_size),
        'cnn4':nn.Conv2d(120, 84, 1),
        'act_sigm': nn.Sigmoid(),
        'act_relu':nn.ReLU(),
        'pool_max':nn.MaxPool2d(2, stride=stride),
        'pool_avg':nn.AvgPool2d(2),
        'cnn_last':nn.Conv2d(84, 10, 1),
        'loss_cross':nn.CrossEntropyLoss(),
        'loss_nll':nn.NLLLoss()
    }
    lenet = nn.Sequential(
    DL_params_dict['cnn1'],
    DL_params_dict[act],
    DL_params_dict[pool],
    DL_params_dict['cnn2'], 
    DL_params_dict[act],
    DL_params_dict[pool],
    DL_params_dict['cnn3'],
    DL_params_dict[act],
    DL_params_dict['cnn4'], 
    DL_params_dict[act],
    DL_params_dict['cnn_last'],
    nn.Flatten())
    
    return lenet ,opt ,DL_params_dict[loss] , parent



In [10]:


def train_DL(best_accuracy,best_model, gene):

    lenet,opt=gene[0],gene[1]
    if opt=='opt_sgd':optimizer=optim.SGD(lenet.parameters(),lr=0.01)
    elif opt=='opt_adam':optimizer= optim.Adam(lenet.parameters())

    loss_fn=gene[2]


    for epoch in range(n_epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            p = lenet(data)
            train_loss = loss_fn(p, target)

            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        m = 0
        for batch_idx, (data, target) in enumerate(test_loader):
            m = m + torch.sum(torch.argmax(lenet(data), dim=1) == target).item()
    accuracy=m
    print("test", epoch, m, "of", test_size, "correctly classified")
    if(m > best_accuracy):
        best_accuracy=accuracy
        best_model=[lenet , opt, loss_fn]
        torch.save(lenet.state_dict(), "./best_model_so_far.pt")
    return accuracy, best_model 

In [11]:
best_accuracy=0
best_model=None
df=pd.DataFrame(columns=['iteration','stride', 'padding' ,'optimizer','activation',
                         'pooling','loss','kernel','accuracy'])
gene=get_params_fromEA() #parent initialization
parent=gene[-1]
for iterations in range(20):
    print('='*10,'Iteration ',iterations,'\n', '='*10,
          'stride, padding ,optimizer,activation,pooling,loss,kernel', gene[-1])
    try:
        accuracy,model=train_DL(best_accuracy,best_model,gene)
    except:
        print('incompatible model parameters',
             'stride, padding ,optimizer,activation,pooling,loss,kernel', gene[-1])
        accuracy=0
        print('Best accuracy so far and best model', best_accuracy, model)
    
    df.loc[iterations]=[iterations,parent[0],parent[1],parent[2], parent[3],parent[4],parent[5], accuracy]
    
    gene=get_params_fromEA(init=False,parent=parent) #mutation over parent
    
    parent=gene[-1]
    
    if(accuracy>best_accuracy):
        best_accuracy =accuracy
        best_model=gene
            

    


test 4 738 of 10000 correctly classified
test 4 2908 of 10000 correctly classified
test 4 5282 of 10000 correctly classified
test 4 3614 of 10000 correctly classified
test 4 1531 of 10000 correctly classified
test 4 2145 of 10000 correctly classified
test 4 1000 of 10000 correctly classified
test 4 1070 of 10000 correctly classified
test 4 1997 of 10000 correctly classified
test 4 1089 of 10000 correctly classified
test 4 2417 of 10000 correctly classified
test 4 2595 of 10000 correctly classified
test 4 3846 of 10000 correctly classified
test 4 5527 of 10000 correctly classified
test 4 379 of 10000 correctly classified
test 4 1000 of 10000 correctly classified
test 4 375 of 10000 correctly classified
test 4 371 of 10000 correctly classified
test 4 336 of 10000 correctly classified
test 4 121 of 10000 correctly classified


In [12]:
df

Unnamed: 0,iteration,stride,padding,optimizer,activation,pooling,"loss,kernel",accuracy
0,0,2,0,opt_sgd,act_relu,pool_avg,loss_cross,738
1,1,2,0,opt_adam,act_relu,pool_avg,loss_cross,2908
2,2,1,0,opt_adam,act_relu,pool_avg,loss_cross,5282
3,3,2,0,opt_adam,act_sigm,pool_avg,loss_cross,3614
4,4,2,2,opt_adam,act_sigm,pool_max,loss_cross,1531
5,5,2,2,opt_adam,act_sigm,pool_avg,loss_cross,2145
6,6,2,2,opt_adam,act_sigm,pool_avg,loss_nll,1000
7,7,1,2,opt_adam,act_sigm,pool_max,loss_cross,1070
8,8,2,2,opt_adam,act_sigm,pool_max,loss_cross,1997
9,9,2,2,opt_adam,act_sigm,pool_max,loss_cross,1089


In [23]:
df.to_csv('model_20iterations.csv')

In [14]:
df=df.sort_values(by='accuracy',ascending=False)

In [16]:
kernel=[3,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,3]
df['kernel']=kernel

In [19]:

df=df.rename(columns={'loss,kernel':'loss'})

In [22]:
df=df[['iteration','stride', 'padding' ,'optimizer','activation',
                         'pooling','loss','kernel','accuracy']]

In [24]:
df

Unnamed: 0,iteration,stride,padding,optimizer,activation,pooling,loss,kernel,accuracy
13,13,2,0,opt_adam,act_relu,pool_max,loss_cross,3,5527
2,2,1,0,opt_adam,act_relu,pool_avg,loss_cross,4,5282
12,12,2,0,opt_adam,act_sigm,pool_max,loss_cross,5,3846
3,3,2,0,opt_adam,act_sigm,pool_avg,loss_cross,5,3614
1,1,2,0,opt_adam,act_relu,pool_avg,loss_cross,5,2908
11,11,2,2,opt_adam,act_sigm,pool_max,loss_cross,5,2595
10,10,2,2,opt_adam,act_sigm,pool_max,loss_cross,5,2417
5,5,2,2,opt_adam,act_sigm,pool_avg,loss_cross,5,2145
8,8,2,2,opt_adam,act_sigm,pool_max,loss_cross,5,1997
4,4,2,2,opt_adam,act_sigm,pool_max,loss_cross,5,1531
