In [31]:
from pprint import pprint
from collections import defaultdict, OrderedDict
from random import choice, seed, randint
from tqdm import tqdm
from itertools import repeat, product, combinations, cycle
from pathlib import Path
from string import ascii_lowercase

import numpy as np
import pandas as pd

from torch import optim
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from torch import (load, amax as pt_amax, max as pt_max, ones, save, no_grad, stack, numel, tensor, 
                   manual_seed, sigmoid, tanh, add, mul, sub, div, amin as pt_amin, cat,
                  maximum, minimum, device, cuda, rand, prod, median, log as pt_log, round as pt_round,
                  isnan, flatten, mean)
from torch import nn
from torch.nn import functional as F
from torch.optim import Adam
#from torchviz import make_dot
import torchvision
import torchvision.transforms as transforms

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from deap import creator as ga_cr, base as ga_b, algorithms as ga_algo, tools as ga_t

In [32]:
#np.set_printoptions(suppress=True)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [33]:
device = device("cuda" if cuda.is_available() else "cpu")
# device = "cpu"
print("==> Device:", device)

==> Device: cpu


In [34]:
# Data
#X_train, y_train = load('/kaggle/input/pytorch-mnist/training.pt')

# This code cut and pasted inside eval fun

#X_test, y_test = load('/kaggle/input/pytorch-mnist/test.pt')
#X_train.shape, y_train.shape, X_test.shape, y_test.shape
#num = randint(1, len(img_F))
#img = img_F[num][0]
#imshow(img)
#img.shape

In [35]:
class NET(nn.Module):
    def __init__(self, 
                 batch_size,
                 params={
                     #
                    'feature_maps': 2, 
                    'f_kernel_size': 5, 
                    'f_pad_size': 2,
                    'pools': 'MaxPool2d', 
                    'p_kernel_size': 3, 
                    'f_activs': 'ELU',
                     #
                    #'f_maps_h': 2, 
                    'f_kernel_size_h': 5, 
                    'f_pad_size_h': 2,
                    'pools_h': 'MaxPool2d', 
                    'p_kernel_size_h': 3, 
                    'f_activs_h': 'ELU',
                    'times_hidden': 2,
                     #
                    'linear_dim': 10, 
                    'l_activs': 'ELU',
                 },
                 outputs_dim=5,
                ):
        super().__init__()
        
        self.batch_size = batch_size
        
        # inputs model
        self.f_maps = params['feature_maps']
        self.f_k_size = params['f_kernel_size']
        self.f_pad_size = params['f_pad_size']
        self.pool = params['pools']
        self.p_k_size = params['p_kernel_size']
        self.f_activs = params['f_activs']
        
        # hidden model
        #self.f_maps_h = params['f_maps_h']
        self.f_k_size_h = params['f_kernel_size_h']
        self.f_pad_size_h = params['f_pad_size_h']
        self.pool_h = params['pools_h']
        self.p_k_size_h = params['p_kernel_size_h']
        self.f_activs_h = params['f_activs_h']
        self.times_hidden = params['times_hidden']
        
        # outputs model
        self.linear_dim = params['linear_dim']
        self.l_activs = params['l_activs']
        self.outputs_dim = outputs_dim  
        
        self.inputs_model = nn.Sequential(
            nn.Conv2d(3, 
                      self.f_maps, 
                      self.f_k_size, 
                      padding=(self.f_pad_size, self.f_pad_size),
                      padding_mode='reflect'),
            nn.Dropout2d(p=0.25),
            nn.__getattribute__(self.f_activs)(),
            nn.__getattribute__(self.pool)(kernel_size=self.p_k_size, 
                                           stride=1))
        
        self.hidden_model = nn.Sequential(
            nn.Conv2d(self.f_maps, 
                      self.f_maps, 
                      self.f_k_size_h, 
                      padding=(self.f_pad_size_h, self.f_pad_size_h),
                      padding_mode='reflect'),
            nn.Dropout2d(p=0.25),
            nn.__getattribute__(self.f_activs_h)(),
            nn.__getattribute__(self.pool_h)(kernel_size=self.p_k_size_h, 
                                             stride=1))
        
        self._hidden_to_output_helper()
                
        self.output_model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.hidden_model_out.shape[1], self.linear_dim),
            nn.__getattribute__(self.l_activs)(),
            nn.Softmax(dim=-1)
        )
        
    def _hidden_to_output_helper(self):
        self.hidden_model_out = self.inputs_model(ones(self.batch_size,3,48,48))
        for _ in range(self.times_hidden):
            self.hidden_model_out = self.hidden_model(self.hidden_model_out)
        self.hidden_model_out = nn.Flatten()(self.hidden_model_out)
        
    def forward(self, x):
        """Forward"""
        res = self.inputs_model(x)
        for _ in range(self.times_hidden):
            res = self.hidden_model(res)
        res = self.output_model(res)
        return res
    
    def count_weights_biases(self):
        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
    
#net = NET(batch_size=16).train()
#print(f'forward: {net(ones(16, 3, 48, 48).float()).shape}')

In [36]:
class GA_Pytorch():
    def __init__(self, 
                 params, 
                 eval_func,
                 eval_weights,
                 #X_train,
                 #X_test,
                 #y_train,
                 #y_test,
                 #lr=0.0001,
                 sel_tournsize=2, 
                 cx_uniform_prob=0.5, 
                 mut_shuffle_idx_prob=0.1, 
                 n_pop=20, 
                 n_gen=10, 
                 n_hof=5, 
                 cx_prob=0.5, 
                 mut_prob=0.1, 
                 n_jobs=1
                ):
        self.params = params
        self.eval_func = eval_func
        self.eval_weights = eval_weights
        
        #self.X_train = X_train
        #self.X_test = X_test
        #self.y_train = y_train
        #self.y_test = y_test
        #self.image_folder = image_folder
        #self.batch_size = batch_size
        #self.lr = lr
        
        self.sel_tournsize = sel_tournsize
        self.cx_uniform_prob = cx_uniform_prob
        self.mut_shuffle_idx_prob = mut_shuffle_idx_prob
        self.n_pop = n_pop
        self.n_gen = n_gen
        self.n_hof = n_hof
        self.cx_prob = cx_prob
        self.mut_prob = mut_prob
        
        self.n_jobs = n_jobs

        self._pad_params()
        self._create_fitness_and_indiv()
        self._register_indiv_and_pop_generators()
        self._register_eval_func()
        self._register_selection_crossover_mutation_methods()

    def _pad_params(self):
        """Pad params for crossover shuffle idx method"""
        assert isinstance(self.params, dict), 'Params must be a dict, i.e. estimator.get_params()'
        params_count = {k: len(v) for k,v in self.params.items()}
        max_length, max_key = -99, ''
        for k, v in params_count.items():
            if v <= max_length:
                continue
            else:
                max_key = k
                max_length = v
        assert isinstance(max_length, int), 'The max length between all params must be an int'
        # cycle through params for max length param, otherwise infinite cycle
        values_padded = (cycle(v) if k!=max_key else v for k,v in self.params.items())
        values_padded = zip(*values_padded)  # ('a', 1, 14), ('b', 2, 16), ('c', 3, 16) ...
        values_padded = zip(*values_padded)  # ('a', 'b', 'c'), (1, 2, 3), (14, 15, 16)...
        padded_params = {}
        for k, v in zip(self.params, values_padded):
            padded_params[k] = v
        self.padded_params = padded_params
        print('Params padded')

    def _create_fitness_and_indiv(self):
        """Create GA individual and fitness entities (classes)"""
        ga_cr.create('Fitness', ga_b.Fitness, weights=self.eval_weights)
        ga_cr.create('Individual', list, fitness=ga_cr.Fitness)
        print('GA entities created')

    def _gen_params_to_ga(self):
        """Generate index for each param for individual"""
        max_dict = len(self.padded_params)
        max_length = len(list(self.padded_params.values())[0])
        idxs = [randint(0, max_length-1) for _ in range(max_dict)]
        return idxs
    
    def _register_indiv_and_pop_generators(self):
        """Register GA individual and population generators"""
        self.tb = ga_b.Toolbox()

        if self.n_jobs > 1:
            from multiprocessing import Pool
            pool = Pool()
            self.tb.register("map", pool.map)

        self.tb.register("individual", ga_t.initIterate, ga_cr.Individual, self._gen_params_to_ga)
        #print('indiv', self.tb.individual())
        self.tb.register("population", ga_t.initRepeat, list, self.tb.individual)
        #print('population', self.tb.population(n=2))
        print('GA entities\' methods registered')
        
    def _register_eval_func(self):
        """Set GA evaluate individual function"""
        self.tb.register("evaluate",
                        self.eval_func,
                        padded_params=self.padded_params,
                        #image_folder=self.image_folder,
                        #X_train=self.X_train,
                        #X_test=self.X_test, 
                        #y_train=self.y_train, 
                        #y_test=self.y_test,
                        #batch_size=self.batch_size,
                        #lr=self.lr
                        )
        #print(list(self.tb.evaluate(indiv) for indiv in self.tb.population(3)))
        print('GA eval function registered')
    
    def _register_selection_crossover_mutation_methods(self):
        self.tb.register("select", ga_t.selTournament, tournsize=self.sel_tournsize)
        self.tb.register("mate", ga_t.cxUniform, indpb=self.cx_uniform_prob)
        self.tb.register("mutate", ga_t.mutShuffleIndexes, indpb=self.mut_shuffle_idx_prob)
        print('GA sel-cx-mut methods registered')
        
    def run_ga_search(self):
        """GA Search"""
        pop = self.tb.population(n=self.n_pop)
        hof = ga_t.HallOfFame(self.n_hof)

        # Stats stdout
        #stats = ga_t.Statistics(lambda ind: ind.fitness.values )
        stats1 = ga_t.Statistics(lambda ind: ind.fitness.values[0] )
        stats2 = ga_t.Statistics(lambda ind: ind.fitness.values[1] )
        stats3 = ga_t.Statistics(lambda ind: ind.fitness.values[2] )
        stats = ga_t.MultiStatistics(accuracy=stats1, risk=stats2, complexity=stats3)
        stats.register("median", np.median)
        #stats.register("std", np.std)
        #stats.register("min", np.min)
        #stats.register("max", np.max)

        # History
        #hist = tools.History()
        #toolbox.decorate("select", hist.decorator)
        #tb.decorate("mate", hist.decorator)
        #tb.decorate("mutate", hist.decorator)
        #hist.update(pop)

        # GA Run
        pop, log = ga_algo.eaSimple(pop, self.tb, cxpb=self.cx_prob, 
                                    mutpb=self.mut_prob, ngen=self.n_gen, 
                                    stats=stats, halloffame=hof, verbose=True)
        
        # Convert back params
        hof_ = {}
        for i in range(self.n_hof):
            hof_['hof_' + str(i)] = self._ga_to_params(hof[i])

        return pop, log, hof_
    
    def _ga_to_params(self, idx_params):
        """Convert back idx to params"""
        res = {}
        for (k,v), idx in zip(self.padded_params.items(), idx_params):
            res[k] = v[idx]
        return res

In [37]:
net_params = {
    # image_folder
    'padding_mode': ['constant', 'edge', 'reflect', 'symmetric'],
    
    # inputs model
    'feature_maps': np.linspace(5, 7, 3).astype(int),
    'f_kernel_size': np.linspace(1, 3, 3).astype(int),
    'f_pad_size': np.linspace(2, 4, 3).astype(int),
    'pools': ['MaxPool2d', 'AvgPool2d'],
    'p_kernel_size': np.linspace(6, 8, 3).astype(int),
    'f_activs': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    
    # hidden model
    'f_maps_h': np.linspace(8, 10, 3).astype(int),
    'f_kernel_size_h': np.linspace(2, 4, 3).astype(int),
    'f_pad_size_h': np.linspace(1, 3, 3).astype(int),
    'pools_h': ['MaxPool2d', 'AvgPool2d'],
    'p_kernel_size_h': np.linspace(4, 6, 3).astype(int),
    'f_activs_h': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    'times_hidden': np.linspace(1, 2, 2).astype(int),
    
    # output model
    'linear_dim': np.linspace(16, 32, 16).astype(int),
    'l_activs': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    
    # learning
    'batch_size': np.linspace(8, 64, 10).astype(int).tolist(),
    'lr': np.linspace(0.0001, 0.01, 10),
}

def net_eval_indiv(individual, padded_params):
    """Evaluate individual's genes (estimator's params)"""
    # Params
    indiv_params = {k : list(v)[idx] for (k,v), idx in zip(padded_params.items(), individual)}
    image_folder_params = {k:v for k,v in indiv_params.items() if k=='padding_mode'}
    net_params = {k:v for k,v in indiv_params.items() if k in [
        'feature_maps', 'f_kernel_size', 'f_pad_size', 'pools', 'p_kernel_size',
        'f_activs', 'f_maps_h', 'f_kernel_size_h', 'f_pad_size_h', 'pools_h', 
        'p_kernel_size_h', 'f_activs_h', 'times_hidden', 'linear_dim', 'l_activs'
    ]}
    learning_params = {k:v for k,v in indiv_params.items() if k in ['batch_size', 'lr']}    
    
    # Data
    trans = transforms.Compose([
                transforms.Resize(size=(64,64)), 
                # transforms.RandomRotation(degrees=[-5, 5], ),
                transforms.RandomCrop(size=(48,48), padding_mode=image_folder_params['padding_mode']),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    image_folder = ImageFolder('/kaggle/input/flower-photos-by-the-tensorflow-team/flower_photos', 
                        transform=trans)

    # Net
    try:
        net = NET(batch_size=learning_params['batch_size'], params=net_params)
        net = net.to(device)

        # Optimizer
        optimizer = Adam(net.parameters(), lr=learning_params['lr'])
        criterion = nn.NLLLoss()

        # Train
        #train_ds = DS(X_train, y_train)  # TODO refactor out
        train_dl = DataLoader(image_folder,
                            batch_size=learning_params['batch_size'],
                            shuffle=True,
                            num_workers=1,
                            drop_last=True)

        for epoch in range(1):
            #running_loss = []
            train_correct = 0
            train_total = 0
            for i, (inputs, labels) in enumerate(train_dl):
                if i <= 25:
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    outputs = net(inputs)
                    outputs = pt_log(outputs)

                    optimizer.zero_grad()
                    loss = criterion(outputs, labels).mean()
                    loss.backward()
                    optimizer.step()

                    # print statistics
                    #running_loss.append(loss.item())
                    _, predicted = pt_max(outputs.data, 1)
                    train_total += labels.size(0)
                    train_correct += (predicted == labels).sum().item()
                    train_accuracy = train_correct / train_total * 100
                    #print(f'TRAIN {train_accuracy:^5.2f} %', end=' ')
                else:
                    break
    except BaseException as e:
        print(e)
        return (0.01, 10, 1000000,)
        
    # Eval
    """with no_grad():
        net = net.eval()
        test_ds = DS(X_test, y_test)  # TODO refactor out
        test_dl = DataLoader(test_ds,
                            batch_size=batch_size,
                            shuffle=True,
                            drop_last=True)
        #running_loss = []
        test_correct = 0
        test_total = 0
        for i, (inputs, labels) in enumerate(test_dl):
            if i <= 100:
                #inputs = inputs.cuda()
                #labels = labels.cuda()
                outputs = net(inputs)

                # print statistics
                #running_loss.append(loss.item())
                _, predicted = pt_max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                test_accuracy = test_correct / test_total * 100
            else:
                break
        #print(f'TEST {test_accuracy:^5.2f} %')"""
        
    # Risk
    inputs = inputs.to(device)
    risk = mean(prod(net(inputs)*10, dim=1))
    if isnan(risk):
        risk = 10
    else:
        risk = float(risk)
        
    # Complexity
    compl = net.count_weights_biases()

    return (train_accuracy, risk, compl,)

net_weights = (1, -1, -1)

In [38]:
net_ga_params = GA_Pytorch(net_params,
                           net_eval_indiv,
                           net_weights)
pop, log, hof = net_ga_params.run_ga_search()

Params padded
GA entities created
GA entities' methods registered
GA eval function registered
GA sel-cx-mut methods registered




   	      	       accuracy       	      complexity      	             risk             
   	      	----------------------	----------------------	------------------------------
gen	nevals	gen	median 	nevals	gen	median	nevals	gen	median     	nevals
0  	20    	0  	21.4062	20    	0  	235096	20    	0  	1.50029e-11	20    
1  	4     	1  	22.2294	4     	1  	207076	4     	1  	1.57712e-11	4     
2  	14    	2  	22.14  	14    	2  	220985	14    	2  	5.32358e-14	14    
3  	14    	3  	21.3757	14    	3  	231485	14    	3  	1.28642e-14	14    
4  	10    	4  	22.7376	10    	4  	226011	10    	4  	1.71375e-14	10    
5  	11    	5  	22.2633	11    	5  	171575	11    	5  	1.31688e-14	11    
6  	13    	6  	23.338 	13    	6  	200695	13    	6  	1.04175e-14	13    
7  	14    	7  	22.8304	14    	7  	210695	14    	7  	7.85668e-14	14    
8  	10    	8  	22.6331	10    	8  	180985	10    	8  	1.78147e-13	10    
9  	10    	9  	23.3728	10    	9  	190695	10    	9  	1.63024e-13	10    
10 	9     	10 	23.2249	9     	10 	195632	9 

In [39]:
pd.DataFrame(list(hof.values()))

Unnamed: 0,padding_mode,feature_maps,f_kernel_size,f_pad_size,pools,p_kernel_size,f_activs,f_maps_h,f_kernel_size_h,f_pad_size_h,pools_h,p_kernel_size_h,f_activs_h,times_hidden,linear_dim,l_activs,batch_size,lr
0,reflect,5,3,2,MaxPool2d,6,CELU,10,4,1,MaxPool2d,6,ELU,2,25,ELU,26,0.0012
1,reflect,5,3,3,MaxPool2d,6,CELU,9,4,1,MaxPool2d,6,ELU,1,25,ELU,26,0.0012
2,reflect,7,3,3,AvgPool2d,6,SELU,9,2,1,MaxPool2d,6,CELU,1,17,ELU,39,0.0023
3,reflect,7,1,4,AvgPool2d,6,ELU,10,4,2,MaxPool2d,6,SELU,1,25,ELU,51,0.0012
4,reflect,7,3,2,MaxPool2d,6,CELU,8,4,3,MaxPool2d,6,ELU,2,28,ELU,26,0.0012
