In [None]:
%reset -sf

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# !pip install graphviz
# !pip install torchviz

In [None]:
from pprint import pprint
from collections import defaultdict, OrderedDict
from random import choice, seed, randint
from tqdm import tqdm
from itertools import repeat, product, combinations, cycle
from pathlib import Path
from string import ascii_lowercase

from torch import optim
from torch.utils.data import DataLoader, Dataset
from torch import (load, amax as pt_amax, max as pt_max, ones, save, no_grad, stack, numel, tensor, 
                   manual_seed, sigmoid, tanh, add, mul, sub, div, amin as pt_amin, cat,
                  maximum, minimum, device, cuda, rand, prod, median, log as pt_log, round as pt_round,
                  isnan)
from torch import nn
from torch.nn import functional as F
from torch.optim import Adam
#from torchviz import make_dot
import torchvision
import torchvision.transforms as transforms

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from deap import creator as ga_cr, base as ga_b, algorithms as ga_algo, tools as ga_t

In [None]:
np.set_printoptions(suppress=True)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [None]:
device = device("cuda" if cuda.is_available() else "cpu")
# device = "cpu"
print("==> Device:", device)

In [None]:
class DS(Dataset):
    def __init__(self, maps, labels) -> None:
        self.maps = maps
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        X = self.maps[idx]
        #X = X.reshape(1, -1)
        X = X.unsqueeze(0)
        y = self.labels[idx]
        return X.float(), y.long()

# Data
X_train, y_train = load('/kaggle/input/pytorch-mnist/training.pt')
X_test, y_test = load('/kaggle/input/pytorch-mnist/test.pt')
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
class NET(nn.Module):
    def __init__(self, 
                 batch_size,
                 params={
                    'feature_maps': 2, 
                    'f_kernel_size': 2, 
                    'f_stride_size': 2, 
                    'pools': 'MaxPool2d', 
                    'p_kernel_size': 2, 
                    'p_stride_size': 2, 
                    'activs': 'ELU'},
                 outputs_dim=10,
                ):
        super().__init__()
        
        self.f_maps = params['feature_maps']
        self.f_k_size = params['f_kernel_size']
        self.f_s_size = params['f_stride_size']
        self.pool = params['pools']
        self.p_k_size = params['p_kernel_size']
        self.p_s_size = params['p_stride_size']
        self.activ = params['activs']
        self.outputs_dim = outputs_dim  # MNIST
        self.batch_size = batch_size
        
        self.model = nn.Sequential(
            nn.Conv2d(1, 
                      self.f_maps, 
                      self.f_k_size, 
                      stride=self.f_s_size),
            nn.Dropout2d(p=0.5),
            nn.__getattribute__(self.pool)(kernel_size=self.p_k_size, 
                                           stride=self.p_s_size),
            nn.__getattribute__(self.activ)())
        
        self.model_out_shape = tensor(self.model(ones(self.batch_size,1,28,28)).shape)
        self.model_out_shape = int(prod(self.model_out_shape)/self.batch_size)
        self.output = nn.Linear(self.model_out_shape, self.outputs_dim)
                
    def forward(self, x):
        """Forward"""
        res = self.model(x)
        res = res.reshape(self.batch_size, self.model_out_shape)
        res = F.softmax(self.output(res), dim=1)
        return res
    
    def count_weights_biases(self):
        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
    
#net = NET(BATCH_SIZE).train()
#num = np.random.randint(0, len(X_train))
#_ = plt.imshow(X_train[num])
#f'single image:, {X_train[num].shape}'
#f'out: {net(ones(BATCH_SIZE, 1, 28, 28).float()).shape}'

In [None]:
class GA_Pytorch():
    def __init__(self, 
                 params, 
                 eval_func,
                 eval_weights,
                 X_train,
                 X_test,
                 y_train,
                 y_test,
                 batch_size=64,
                 lr=0.0001,
                 sel_tournsize=2, 
                 cx_uniform_prob=0.5, 
                 mut_shuffle_idx_prob=0.1, 
                 n_pop=50, 
                 n_gen=20, 
                 n_hof=5, 
                 cx_prob=0.5, 
                 mut_prob=0.1, 
                 n_jobs=1
                ):
        self.params = params
        self.eval_func = eval_func
        self.eval_weights = eval_weights
        
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.batch_size = batch_size
        self.lr = lr
        
        self.sel_tournsize = sel_tournsize
        self.cx_uniform_prob = cx_uniform_prob
        self.mut_shuffle_idx_prob = mut_shuffle_idx_prob
        self.n_pop = n_pop
        self.n_gen = n_gen
        self.n_hof = n_hof
        self.cx_prob = cx_prob
        self.mut_prob = mut_prob
        
        self.n_jobs = n_jobs

        self._pad_params()
        self._create_fitness_and_indiv()
        self._register_indiv_and_pop_generators()
        self._register_eval_func()
        self._register_selection_crossover_mutation_methods()

    def _pad_params(self):
        """Pad params for crossover shuffle idx method"""
        assert isinstance(self.params, dict), 'Params must be a dict, i.e. estimator.get_params()'
        params_count = {k: len(v) for k,v in self.params.items()}
        max_length, max_key = -99, ''
        for k, v in params_count.items():
            if v <= max_length:
                continue
            else:
                max_key = k
                max_length = v
        assert isinstance(max_length, int), 'The max length between all params must be an int'
        # cycle through params for max length param, otherwise infinite cycle
        values_padded = (cycle(v) if k!=max_key else v for k,v in self.params.items())
        values_padded = zip(*values_padded)  # ('a', 1, 14), ('b', 2, 16), ('c', 3, 16) ...
        values_padded = zip(*values_padded)  # ('a', 'b', 'c'), (1, 2, 3), (14, 15, 16)...
        padded_params = {}
        for k, v in zip(self.params, values_padded):
            padded_params[k] = v
        self.padded_params = padded_params
        print('Params padded')

    def _create_fitness_and_indiv(self):
        """Create GA individual and fitness entities (classes)"""
        ga_cr.create('Fitness', ga_b.Fitness, weights=self.eval_weights)
        ga_cr.create('Individual', list, fitness=ga_cr.Fitness)
        print('GA entities created')

    def _gen_params_to_ga(self):
        """Generate index for each param for individual"""
        max_dict = len(self.padded_params)
        max_length = len(list(self.padded_params.values())[0])
        idxs = [randint(0, max_length-1) for _ in range(max_dict)]
        return idxs
    
    def _register_indiv_and_pop_generators(self):
        """Register GA individual and population generators"""
        self.tb = ga_b.Toolbox()

        if self.n_jobs > 1:
            from multiprocessing import Pool
            pool = Pool()
            self.tb.register("map", pool.map)

        self.tb.register("individual", ga_t.initIterate, ga_cr.Individual, self._gen_params_to_ga)
        self.tb.register("population", ga_t.initRepeat, list, self.tb.individual)
        print('GA entities\' methods registered')
        
    def _register_eval_func(self):
        """Set GA evaluate individual function"""
        self.tb.register("evaluate",
                        self.eval_func,
                        padded_params=self.padded_params,
                        X_train=self.X_train,
                        X_test=self.X_test, 
                        y_train=self.y_train, 
                        y_test=self.y_test,
                        batch_size=self.batch_size,
                        lr=self.lr)
        #print(list(self.tb.evaluate(indiv) for indiv in self.tb.population(3)))
        print('GA eval function registered')
    
    def _register_selection_crossover_mutation_methods(self):
        self.tb.register("select", ga_t.selTournament, tournsize=self.sel_tournsize)
        self.tb.register("mate", ga_t.cxUniform, indpb=self.cx_uniform_prob)
        self.tb.register("mutate", ga_t.mutShuffleIndexes, indpb=self.mut_shuffle_idx_prob)
        print('GA sel-cx-mut methods registered')
        
    def run_ga_search(self):
        """GA Search"""
        pop = self.tb.population(n=self.n_pop)
        hof = ga_t.HallOfFame(self.n_hof)

        # Stats stdout
        #stats = ga_t.Statistics(lambda ind: ind.fitness.values )
        stats1 = ga_t.Statistics(lambda ind: ind.fitness.values[0] )
        stats2 = ga_t.Statistics(lambda ind: ind.fitness.values[1] )
        stats3 = ga_t.Statistics(lambda ind: ind.fitness.values[2] )
        stats = ga_t.MultiStatistics(accuracy=stats1, risk=stats2, complexity=stats3)
        stats.register("avg", np.mean)
        #stats.register("std", np.std)
        #stats.register("min", np.min)
        #stats.register("max", np.max)

        # History
        #hist = tools.History()
        #toolbox.decorate("select", hist.decorator)
        #tb.decorate("mate", hist.decorator)
        #tb.decorate("mutate", hist.decorator)
        #hist.update(pop)

        # GA Run
        pop, log = ga_algo.eaSimple(pop, self.tb, cxpb=self.cx_prob, 
                                    mutpb=self.mut_prob, ngen=self.n_gen, 
                                    stats=stats, halloffame=hof, verbose=True)
        
        # Convert back params
        hof_ = {}
        for i in range(self.n_hof):
            hof_['hof_' + str(i)] = self._ga_to_params(hof[i])

        return pop, log, hof_
    
    def _ga_to_params(self, idx_params):
        """Convert back idx to params"""
        res = {}
        for (k,v), idx in zip(self.padded_params.items(), idx_params):
            res[k] = v[idx]
        return res

In [None]:
net_params = {
    'feature_maps': np.linspace(1, 10, 10).astype(int),
    'f_kernel_size': np.linspace(1, 4, 4).astype(int),
    'f_stride_size': np.linspace(1, 4, 4).astype(int),
    #
    'pools': ['MaxPool2d', 'AvgPool2d'],
    'p_kernel_size': np.linspace(1, 4, 4).astype(int),
    'p_stride_size': np.linspace(1, 4, 4).astype(int),
    #
    'activs': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    #'norms': ['']
}

def net_eval_indiv(individual, padded_params, X_train, X_test, y_train, y_test, batch_size, lr):
    """Evaluate individual's genes (estimator's params)"""

    # Params
    indiv_params = {k : list(v)[idx] for (k,v), idx in zip(padded_params.items(), individual)}
    
    # Net
    net = NET(batch_size, params=indiv_params)
    
    # Optimizer
    optimizer = Adam(net.parameters(), lr=lr)
    criterion = nn.NLLLoss()
    
    # Train
    train_ds = DS(X_train, y_train)  # TODO refactor out
    train_dl = DataLoader(train_ds,
                        batch_size=batch_size,
                        shuffle=True,
                        drop_last=True)
    
    for epoch in range(1):
        #running_loss = []
        train_correct = 0
        train_total = 0
        for i, (inputs, labels) in enumerate(train_dl):
            if i <= 200:
                #inputs = inputs.cuda()
                #labels = labels.cuda()
                outputs = net(inputs)
                outputs = pt_log(outputs)

                optimizer.zero_grad()
                loss = criterion(outputs, labels).mean()
                loss.backward()
                optimizer.step()

                # print statistics
                #running_loss.append(loss.item())
                _, predicted = pt_max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
            else:
                break
        #print(f'TRAIN {train_correct / train_total * 100:^5.2f} %', end=' ')
        
    # Eval
    with no_grad():
        net = net.eval()
        test_ds = DS(X_test, y_test)  # TODO refactor out
        test_dl = DataLoader(test_ds,
                            batch_size=batch_size,
                            shuffle=True,
                            drop_last=True)
        #running_loss = []
        test_correct = 0
        test_total = 0
        for i, (inputs, labels) in enumerate(test_dl):
            if i <= 100:
                #inputs = inputs.cuda()
                #labels = labels.cuda()
                outputs = net(inputs)

                # print statistics
                #running_loss.append(loss.item())
                _, predicted = pt_max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                test_accuracy = test_correct / test_total * 100
            else:
                break
        #print(f'TEST {test_accuracy:^5.2f} %')
        
    # Risk
    risk = median(prod(net(inputs)*10, dim=1))
    if isnan(risk):
        risk = 10
    else:
        risk = float(risk)
        
    # Complexity
    compl = net.count_weights_biases()

    return (test_accuracy, risk, compl,)

net_weights = (1, -1, -1)

In [None]:
net_ga_params = GA_Pytorch(net_params, 
                           net_eval_indiv, 
                           net_weights,
                           X_train, 
                           X_test, 
                           y_train, 
                           y_test)
pop, log, hof = net_ga_params.run_ga_search()

In [None]:
pd.DataFrame(list(hof.values()))

In [None]:
# Backup

class ARCH_NET(nn.Module):
    def __init__(
        self,
        input_size=784,
        hidden_dim=10,
        output_dim=10,
        nodes=5
    ):
        super().__init__()
        
        self.input_size = input_size
        self.hidden_dim = hidden_dim
        self.nodes = nodes
        self.activs = [F.selu, F.relu, F.celu, F.elu, sigmoid, F.logsigmoid, F.softplus, F.softsign, tanh]
        #self.activs = [F.elu, sigmoid, F.softsign]
        #self.activs = [F.elu]
        self.opers = [add, sub, mul, maximum, minimum]
        #self.opers = [add]

        self.abc = [letter*j for j in range(1, 99) for letter in ascii_lowercase]

        self.init_layers = nn.ModuleDict()  # required for optim
        self.init_activs = OrderedDict()

        self.pairs = OrderedDict()
        self.hidden_layers = nn.ModuleDict()
        self.hidden_activs = OrderedDict()
        self.hidden_activs_str = OrderedDict()
        self.hidden_opers = OrderedDict()
        self.hidden_opers_str = OrderedDict()

        self.to_output = nn.Linear(hidden_dim, output_dim)
        
    def create_initial_layer(self):
        """Creates the NN initial layer"""
        for i in range(2):
            self.init_layers['i' + str(i)] = nn.Linear(self.input_size, self.hidden_dim)
            self.init_activs['i' + str(i)] = choice(self.activs)

    def create_node(self, options):
        """Create a NN node"""
        pair = list(combinations(options, 2))
        pair = choice(pair)
 
        if pair in self.pairs:
            self.create_node(options)
        else:
            self.options.append(pair)

            self.hidden_opers[self.abc[len(self.hidden_layers)]] = choice(self.opers)
            self.hidden_opers_str[pair] = self.hidden_opers[self.abc[len(self.hidden_layers)]].__name__
            
            self.pairs[pair] = self.abc[len(self.hidden_layers)]
            self.hidden_layers[self.abc[len(self.hidden_layers)]] = nn.Linear(self.hidden_dim, self.hidden_dim)
            
            self.hidden_activs[self.abc[len(self.hidden_layers)]] = choice(self.activs)
            self.hidden_activs_str[pair] = self.hidden_activs[self.abc[len(self.hidden_layers)]].__name__

        return pair
            
    def create_random_arch(self):
        """Create a random NN Architecture"""
        self.create_initial_layer()
        for _ in range(self.nodes):
            self.options = list(self.init_layers.keys())
            self.options.extend(list(self.pairs.keys()))
            self.create_node(self.options)
        # print('Net Architecture is')
        # pprint(self.pairs)

        # def create_arch_from(self, arch):
        # """Creates a user-supplied NN Architecture"""
        
    def forward(self, x):
        """Forward/Predict"""
        
        # Pass inputs through initial layers...
        init_l = {}
        for (lay_k, lay), (act) in zip(
            self.init_layers.items(),
            self.init_activs.values()):
            init_l[lay_k] = act(F.dropout(lay(x), p=0.2))
        
        # ...then, through hidden layers
        hidden_l = {}
        for (pair), (oper), (lay), (act) in zip(
                self.pairs.keys(),
                self.hidden_opers.values(),
                self.hidden_layers.values(),
                self.hidden_activs.values()
        ):
            hidden_l[pair] = oper( {**init_l, **hidden_l}[pair[0]], {**init_l, **hidden_l}[pair[1]] )
            hidden_l[pair] = act(F.dropout(lay(hidden_l[pair]), p=0.2))
        
        # Check ending layers for output gather
        end_layers, end_layers_str = [], []
        for lay_k, lay_v in reversed(list(hidden_l.items())):
            #print(end_layers_str)
            if any([lay for lay in end_layers_str if str(lay_k) in lay]):
                # if layer inside any previous layer...
                #print('layer found inside other (continue)', str(lay_k))
                continue
            else:
                # if not means that this is an ending layer for outputs
                #print('layer not found inside others (end layer), save:', str(lay_k))
                end_layers_str.append(str(lay_k))
                end_layers.append(lay_v)
        
        # Outputs
        end_layers = stack(end_layers, dim=0)
        end_layers = pt_amax(end_layers, dim=0)
        end_layers = self.to_output(end_layers)
        end_layers = F.log_softmax(end_layers, dim=-1).squeeze()
        return end_layers
    
#net = ARCH_NET(hidden_dim=64, nodes=5).train()
#net.create_random_arch()
#r = net(ones(1,784))