In [1]:
%reset -sf

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
from torch.cuda import is_available
DEVI = "cuda" if is_available() else "cpu"
# device = "cpu"
print("==> Device:", DEVI)

from torch import manual_seed
manual_seed(16)
from random import seed
seed(16)

==> Device: cpu


<torch._C.Generator at 0x7faa18a277f0>

In [3]:
from torch.utils.data import Dataset, DataLoader
from torch import load

from pathlib import Path

path = Path('/kaggle/input/mangafacespt')
list(path.glob('*'))

imgs = load(path / 'imgs.pt')
imgs_te = load(path / 'imgs_te.pt')
labs = load(path / 'labs.pt')
labs_te = load(path / 'labs_te.pt')

imgs.shape, imgs_te.shape, labs.shape, labs_te.shape

[PosixPath('/kaggle/input/mangafacespt/imgs.pt'),
 PosixPath('/kaggle/input/mangafacespt/imgs_te.pt'),
 PosixPath('/kaggle/input/mangafacespt/labs_te.pt'),
 PosixPath('/kaggle/input/mangafacespt/labs.pt')]

(torch.Size([1383, 3, 128, 128]),
 torch.Size([48, 3, 128, 128]),
 torch.Size([1383]),
 torch.Size([48]))

In [4]:
BATCH_SIZE = 64
LR = 0.0005

In [5]:
from torch import nn
from torch import float as pt_float, ones

class NET(nn.Module):
    def __init__(self, l1, k1, a1, l2, k2, a2, l3, k3, a3):
        super().__init__()
        
        self.cnn1 = nn.Sequential(
            nn.LazyConv2d(l1, k1),
            nn.Dropout(0.5),
            nn.__getattribute__(a1)())
        
        self.cnn2 = nn.Sequential(
            nn.LazyBatchNorm2d(),
            nn.LazyConv2d(l2, k2),
            nn.Dropout(0.5),
            nn.__getattribute__(a2)())

        self.cnn3 = nn.Sequential(
            nn.LazyBatchNorm2d(),
            nn.LazyConv2d(l3, k3),
            nn.Dropout(0.5),
            nn.__getattribute__(a3)())

        self.out1 = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(2),
            nn.LogSoftmax(dim=-1))
        
        self.model1 = nn.Sequential(
            self.cnn1,
            self.cnn2,
            self.cnn3,
            self.out1)
        
    def forward(self, x):
        """Forward"""
        return self.model1(x)
    
    def count_weights_biases(self):
        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
    
#net = NET(10, 2, 'SELU', 10, 2, 'SELU', 10, 2, 'SELU').to(DEVI)
#f'Dry run'
#net(ones(1, 3, 95, 95).to(DEVI, dtype=pt_float))

In [6]:
class GA_Pytorch():
    def __init__(self, 
                 params, 
                 eval_func,
                 eval_weights,
                 img_tr,
                 batch_size=BATCH_SIZE,
                 lr=LR,
                 sel_tournsize=2, 
                 cx_uniform_prob=0.5, 
                 mut_shuffle_idx_prob=0.1, 
                 #
                 n_pop=40,
                 n_gen=10,
                 #
                 n_hof=5,
                 cx_prob=0.5, 
                 mut_prob=0.1, 
                 n_jobs=1
                ):
        self.params = params
        self.eval_func = eval_func
        self.eval_weights = eval_weights
        
        self.img_tr = img_tr
        self.batch_size = batch_size
        self.lr = lr
        
        self.sel_tournsize = sel_tournsize
        self.cx_uniform_prob = cx_uniform_prob
        self.mut_shuffle_idx_prob = mut_shuffle_idx_prob
        self.n_pop = n_pop
        self.n_gen = n_gen
        self.n_hof = n_hof
        self.cx_prob = cx_prob
        self.mut_prob = mut_prob
        
        self.n_jobs = n_jobs

        self._pad_params()
        self._create_fitness_and_indiv()
        self._register_indiv_and_pop_generators()
        self._register_eval_func()
        self._register_selection_crossover_mutation_methods()

    def _pad_params(self):
        """Pad params for crossover shuffle idx method"""
        assert isinstance(self.params, dict), 'Params must be a dict, i.e. estimator.get_params()'
        params_count = {k: len(v) for k,v in self.params.items()}
        max_length, max_key = -99, ''
        for k, v in params_count.items():
            if v <= max_length:
                continue
            else:
                max_key = k
                max_length = v
        assert isinstance(max_length, int), 'The max length between all params must be an int'
        # cycle through params for max length param, otherwise infinite cycle
        values_padded = (cycle(v) if k!=max_key else v for k,v in self.params.items())
        values_padded = zip(*values_padded)  # ('a', 1, 14), ('b', 2, 16), ('c', 3, 16) ...
        values_padded = zip(*values_padded)  # ('a', 'b', 'c'), (1, 2, 3), (14, 15, 16)...
        padded_params = {}
        for k, v in zip(self.params, values_padded):
            padded_params[k] = v
        self.padded_params = padded_params
        print('Params padded')

    def _create_fitness_and_indiv(self):
        """Create GA individual and fitness entities (classes)"""
        ga_cr.create('Fitness', ga_b.Fitness, weights=self.eval_weights)
        ga_cr.create('Individual', list, fitness=ga_cr.Fitness)
        print('GA entities created')

    def _gen_params_to_ga(self):
        """Generate index for each param for individual"""
        max_dict = len(self.padded_params)
        max_length = len(list(self.padded_params.values())[0])
        idxs = [randint(0, max_length-1) for _ in range(max_dict)]
        return idxs
    
    def _register_indiv_and_pop_generators(self):
        """Register GA individual and population generators"""
        self.tb = ga_b.Toolbox()

        if self.n_jobs > 1:
            from multiprocessing import Pool
            pool = Pool()
            self.tb.register("map", pool.map)

        self.tb.register("individual", ga_t.initIterate, ga_cr.Individual, self._gen_params_to_ga)
        self.tb.register("population", ga_t.initRepeat, list, self.tb.individual)
        print('GA entities\' methods registered')
        
    def _register_eval_func(self):
        """Set GA evaluate individual function"""
        self.tb.register("evaluate",
                        self.eval_func,
                        padded_params=self.padded_params,
                        img_tr=self.img_tr,
                        batch_size=self.batch_size,
                        lr=self.lr)
        #print(list(self.tb.evaluate(indiv) for indiv in self.tb.population(3)))
        print('GA eval function registered')
    
    def _register_selection_crossover_mutation_methods(self):
        self.tb.register("select", ga_t.selTournament, tournsize=self.sel_tournsize)
        self.tb.register("mate", ga_t.cxUniform, indpb=self.cx_uniform_prob)
        self.tb.register("mutate", ga_t.mutShuffleIndexes, indpb=self.mut_shuffle_idx_prob)
        print('GA sel-cx-mut methods registered')
        
    def run_ga_search(self):
        """GA Search"""
        pop = self.tb.population(n=self.n_pop)
        hof = ga_t.HallOfFame(self.n_hof)

        # Stats stdout
        #stats = ga_t.Statistics(lambda ind: ind.fitness.values )
        stats1 = ga_t.Statistics(lambda ind: ind.fitness.values[0] )
        stats2 = ga_t.Statistics(lambda ind: ind.fitness.values[1] )
        stats3 = ga_t.Statistics(lambda ind: ind.fitness.values[2] )
        stats = ga_t.MultiStatistics(accuracy=stats1, risk=stats2, complexity=stats3)
        stats.register("avg", mean)

        # GA Run
        pop, log = ga_algo.eaSimple(pop, self.tb, cxpb=self.cx_prob, 
                                    mutpb=self.mut_prob, ngen=self.n_gen, 
                                    stats=stats, halloffame=hof, verbose=True)
        
        # Convert back params
        hof_ = {}
        for i in range(self.n_hof):
            hof_['hof_' + str(i)] = self._ga_to_params(hof[i])

        return pop, log, hof_
    
    def _ga_to_params(self, idx_params):
        """Convert back idx to params"""
        res = {}
        for (k,v), idx in zip(self.padded_params.items(), idx_params):
            res[k] = v[idx]
        return res

In [7]:
from numpy import mean, linspace, inf

net_params = {
    'l1': linspace(1,20,30).astype(int),
    'k1': linspace(1,20,30).astype(int),
    'a1': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    'l2': linspace(1,20,30).astype(int),
    'k2': linspace(1,20,30).astype(int),
    'a2': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    'l3': linspace(1,20,30).astype(int),
    'k3': linspace(1,20,30).astype(int),
    'a3': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
}

def net_eval_indiv(individual, padded_params, img_tr, batch_size, lr):
    """Evaluate individual's genes (estimator's params)"""

    # Params
    indiv_params = {k : list(v)[idx] for (k,v), idx in zip(padded_params.items(), individual)}
    
    # Net
    net = NET(**indiv_params).to(DEVI).train()
    try:
        net(ones(1,3,128,128))
    except BaseException as e:
        print('=> Possible Arch Error:', e)
        return (0.01, (1/10)**10, 1e7)
    
    # Optimizer
    optimizer = Adam(net.parameters(), lr=lr)
    criterion = nn.NLLLoss()
    
    # Train
    img_tr, img_te = img_tr[0], img_tr[1]
    
    train_dl = DataLoader(img_tr,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=1,
                        drop_last=True)
    
    for epoch in range(1):
        train_correct = 0
        train_total = 0
        for i, (inputs, labels) in enumerate(train_dl):
            if i <= 3:
                outputs = net(inputs.to(DEVI, dtype=pt_float))

                optimizer.zero_grad()
                loss = criterion(outputs, labels.to(DEVI, dtype=long)).mean()
                loss.backward()
                optimizer.step()

                #_, predicted = pt_max(outputs.data, 1)
                #train_total += labels.size(0)
                #train_correct += (predicted == labels.to(DEVI, dtype=long)).sum().item()
                #train_accuracy = train_correct / train_total * 100
                
    test_dl = DataLoader(img_te,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=1,
                        #drop_last=True
                        )
    
    net = net.eval()
    test_correct = 0
    test_total = 0
    
    for j, (inputs, labels) in enumerate(test_dl):
        if j <= 5:
            outputs = net(inputs.to(DEVI, dtype=pt_float))

            _, predicted = pt_max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels.to(DEVI, dtype=long)).sum().item()
            test_accuracy = test_correct / test_total * 100

    # Risk
    risk = median(prod(net(inputs.to(DEVI, dtype=pt_float)).exp()*10, dim=1))
    if isnan(risk):
        risk = 10
    else:
        risk = float(risk)
        
    # Complexity
    compl = net.count_weights_biases()
    
    return (test_accuracy, risk, compl,)

net_weights = (1, -1, -1,)

In [8]:
from itertools import cycle
from deap import creator as ga_cr, base as ga_b, algorithms as ga_algo, tools as ga_t
from random import randint
from numpy import mean
from torch.optim import Adam
from torch import max as pt_max, no_grad, median, prod, isnan, long, float as pt_float
from torch.utils.data import DataLoader

net_ga_params = GA_Pytorch(net_params,
                           net_eval_indiv,
                           net_weights,
                           ( list(zip(imgs, labs)), list(zip(imgs_te, labs_te)) ) )
pop, log, hof = net_ga_params.run_ga_search()

Params padded
GA entities created
GA entities' methods registered
GA eval function registered
GA sel-cx-mut methods registered




   	      	       accuracy       	      complexity      	         risk         
   	      	----------------------	----------------------	----------------------
gen	nevals	avg    	gen	nevals	avg   	gen	nevals	avg    	gen	nevals
0  	40    	63.8542	0  	40    	247483	0  	40    	5.51826	0  	40    
1  	23    	73.9583	1  	23    	291376	1  	23    	4.78529	1  	23    
2  	32    	76.0937	2  	32    	268675	2  	32    	3.8712 	2  	32    
3  	19    	86.1458	3  	19    	326777	3  	19    	2.46817	3  	19    
4  	17    	81.4583	4  	17    	333043	4  	17    	1.70634	4  	17    
5  	24    	82.9687	5  	24    	377200	5  	24    	2.07319	5  	24    
6  	23    	88.2812	6  	23    	402539	6  	23    	0.910941	6  	23    
7  	23    	90     	7  	23    	456694	7  	23    	0.645114	7  	23    
8  	15    	91.6667	8  	15    	517009	8  	15    	0.0105053	8  	15    
9  	21    	87.6562	9  	21    	503901	9  	21    	0.0297392	9  	21    
10 	20    	91.6667	10 	20    	527809	10 	20    	0.0299962	10 	20    


In [9]:
hof

{'hof_0': {'l1': 13,
  'k1': 15,
  'a1': 'Softsign',
  'l2': 3,
  'k2': 2,
  'a2': 'ELU',
  'l3': 9,
  'k3': 1,
  'a3': 'SELU'},
 'hof_1': {'l1': 7,
  'k1': 10,
  'a1': 'CELU',
  'l2': 1,
  'k2': 5,
  'a2': 'ELU',
  'l3': 18,
  'k3': 2,
  'a3': 'ELU'},
 'hof_2': {'l1': 6,
  'k1': 6,
  'a1': 'ReLU',
  'l2': 1,
  'k2': 5,
  'a2': 'Softsign',
  'l3': 18,
  'k3': 2,
  'a3': 'ELU'},
 'hof_3': {'l1': 2,
  'k1': 6,
  'a1': 'Softsign',
  'l2': 1,
  'k2': 5,
  'a2': 'SELU',
  'l3': 18,
  'k3': 1,
  'a3': 'ELU'},
 'hof_4': {'l1': 7,
  'k1': 3,
  'a1': 'CELU',
  'l2': 3,
  'k2': 5,
  'a2': 'ReLU',
  'l3': 18,
  'k3': 1,
  'a3': 'Softsign'}}

In [10]:
# Saving params

from pandas import DataFrame
from joblib import dump, load

DataFrame(hof)
dump(hof, 'best_params.json')

Unnamed: 0,hof_0,hof_1,hof_2,hof_3,hof_4
l1,13,7,6,2,7
k1,15,10,6,6,3
a1,Softsign,CELU,ReLU,Softsign,CELU
l2,3,1,1,1,3
k2,2,5,5,5,5
a2,ELU,ELU,Softsign,SELU,ReLU
l3,9,18,18,18,18
k3,1,2,2,1,1
a3,SELU,ELU,ELU,ELU,Softsign


['best_params.json']