In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
from torch import manual_seed
from random import seed
manual_seed(16)
seed(16)

<torch._C.Generator at 0x7f57d683b070>

In [3]:
from pprint import pprint
from pathlib import Path

path = Path('/kaggle/input/hydrangea-dataset-compressed/data_CNN')
list(path.glob('*'))

[PosixPath('/kaggle/input/hydrangea-dataset-compressed/data_CNN/test'),
 PosixPath('/kaggle/input/hydrangea-dataset-compressed/data_CNN/train')]

In [4]:
train_path = path / 'train'
test_path = path / 'test'

In [5]:
from torch.cuda import is_available

devi = 'cuda' if is_available() else 'cpu'
devi

'cuda'

In [6]:
EPOCHS = 10
BATCH_SIZE = 128
LR = 0.0005

In [7]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, Normalize, Resize, ColorJitter, GaussianBlur, RandomHorizontalFlip, RandomVerticalFlip

transf = Compose([
    RandomHorizontalFlip(),
    RandomVerticalFlip(),
    ColorJitter(0.1, 0.1, 0.1),
    GaussianBlur(1),
    Resize((95,95)),
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

img_tr = ImageFolder(train_path, transform=transf)
img_tr[0][0].shape

img_te = ImageFolder(test_path, transform=transf)

torch.Size([3, 95, 95])

In [8]:
from torch import nn
from torch import float as pt_float, ones

class NET(nn.Module):
    def __init__(self, 
                 l1, k1, a1, l2, k2, a2, l3, k3, a3):
        super().__init__()
        
        self.cnn1 = nn.Sequential(
            nn.LazyConv2d(l1, k1),
            nn.Dropout(0.5),
            nn.__getattribute__(a1)())
        
        self.cnn2 = nn.Sequential(
            nn.LazyConv2d(l2, k2),
            nn.Dropout(0.5),
            nn.__getattribute__(a2)())

        self.cnn3 = nn.Sequential(
            nn.LazyConv2d(l3, k3),
            nn.Dropout(0.5),
            nn.__getattribute__(a3)())

        self.out = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(10),
            nn.LogSoftmax(dim=-1))
        
        self.model = nn.Sequential(
            self.cnn1,
            self.cnn2,
            self.cnn3,
            self.out
        )
                
    def forward(self, x):
        """Forward"""
        return self.model(x)
    
    def count_weights_biases(self):
        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
    
net = NET(10, 2, 'SELU', 10, 2, 'SELU', 10, 2, 'SELU').to(devi)
f'Dry run: {net(ones(1, 1, 28, 28).to(devi, dtype=pt_float)).shape}'



'Dry run: torch.Size([1, 10])'

In [21]:
class GA_Pytorch():
    def __init__(self, 
                 params, 
                 eval_func,
                 eval_weights,
                 #
                 train_df,
                 test_df,
                 #
                 sel_tournsize=2, 
                 cx_uniform_prob=0.5, 
                 mut_shuffle_idx_prob=0.1, 
                 n_pop=50, 
                 n_gen=20, 
                 n_hof=5, 
                 cx_prob=0.5, 
                 mut_prob=0.1, 
                 #
                 n_jobs=1
                ):
        self.params = params
        self.eval_func = eval_func
        self.eval_weights = eval_weights
        
        self.train_df=train_df
        self.test_df=test_df
        
        self.sel_tournsize = sel_tournsize
        self.cx_uniform_prob = cx_uniform_prob
        self.mut_shuffle_idx_prob = mut_shuffle_idx_prob
        self.n_pop = n_pop
        self.n_gen = n_gen
        self.n_hof = n_hof
        self.cx_prob = cx_prob
        self.mut_prob = mut_prob
        
        self.n_jobs = n_jobs

        self._pad_params()
        self._create_fitness_and_indiv()
        self._register_indiv_and_pop_generators()
        self._register_eval_func()
        self._register_selection_crossover_mutation_methods()

    def _pad_params(self):
        """Pad params for crossover shuffle idx method"""
        assert isinstance(self.params, dict), 'Params must be a dict, i.e. estimator.get_params()'
        params_count = {k: len(v) for k,v in self.params.items()}
        max_length, max_key = -99, ''
        for k, v in params_count.items():
            if v <= max_length:
                continue
            else:
                max_key = k
                max_length = v
        assert isinstance(max_length, int), 'The max length between all params must be an int'
        # cycle through params for max length param, otherwise infinite cycle
        values_padded = (cycle(v) if k!=max_key else v for k,v in self.params.items())
        values_padded = zip(*values_padded)  # ('a', 1, 14), ('b', 2, 16), ('c', 3, 16) ...
        values_padded = zip(*values_padded)  # ('a', 'b', 'c'), (1, 2, 3), (14, 15, 16)...
        padded_params = {}
        for k, v in zip(self.params, values_padded):
            padded_params[k] = v
        self.padded_params = padded_params
        print('Params padded')

    def _create_fitness_and_indiv(self):
        """Create GA individual and fitness entities (classes)"""
        ga_cr.create('Fitness', ga_b.Fitness, weights=self.eval_weights)
        ga_cr.create('Individual', list, fitness=ga_cr.Fitness)
        print('GA entities created')

    def _gen_params_to_ga(self):
        """Generate index for each param for individual"""
        max_dict = len(self.padded_params)
        max_length = len(list(self.padded_params.values())[0])
        idxs = [randint(0, max_length-1) for _ in range(max_dict)]
        return idxs
    
    def _register_indiv_and_pop_generators(self):
        """Register GA individual and population generators"""
        self.tb = ga_b.Toolbox()

        if self.n_jobs > 1:
            from multiprocessing import Pool
            pool = Pool()
            self.tb.register("map", pool.map)

        self.tb.register("individual", ga_t.initIterate, ga_cr.Individual, self._gen_params_to_ga)
        self.tb.register("population", ga_t.initRepeat, list, self.tb.individual)
        print('GA entities\' methods registered')
        
    def _register_eval_func(self):
        """Set GA evaluate individual function"""
        self.tb.register("evaluate",
                        self.eval_func,
                        padded_params=self.padded_params,
                        train_path=self.train_df,
                        test_path=self.test_df,
                        batch_size=BATCH_SIZE,
                        lr=LR)
        #print(list(self.tb.evaluate(indiv) for indiv in self.tb.population(3)))
        print('GA eval function registered')
    
    def _register_selection_crossover_mutation_methods(self):
        self.tb.register("select", ga_t.selTournament, tournsize=self.sel_tournsize)
        self.tb.register("mate", ga_t.cxUniform, indpb=self.cx_uniform_prob)
        self.tb.register("mutate", ga_t.mutShuffleIndexes, indpb=self.mut_shuffle_idx_prob)
        print('GA sel-cx-mut methods registered')
        
    def run_ga_search(self):
        """GA Search"""
        pop = self.tb.population(n=self.n_pop)
        hof = ga_t.HallOfFame(self.n_hof)

        # Stats stdout
        #stats = ga_t.Statistics(lambda ind: ind.fitness.values )
        stats1 = ga_t.Statistics(lambda ind: ind.fitness.values[0] )
        stats2 = ga_t.Statistics(lambda ind: ind.fitness.values[1] )
        stats3 = ga_t.Statistics(lambda ind: ind.fitness.values[2] )
        stats = ga_t.MultiStatistics(accuracy=stats1, risk=stats2, complexity=stats3)
        stats.register("avg", mean)
        #stats.register("std", np.std)
        #stats.register("min", np.min)
        #stats.register("max", np.max)

        # History
        #hist = tools.History()
        #toolbox.decorate("select", hist.decorator)
        #tb.decorate("mate", hist.decorator)
        #tb.decorate("mutate", hist.decorator)
        #hist.update(pop)

        # GA Run
        pop, log = ga_algo.eaSimple(pop, self.tb, cxpb=self.cx_prob, 
                                    mutpb=self.mut_prob, ngen=self.n_gen, 
                                    stats=stats, halloffame=hof, verbose=True)
        
        # Convert back params
        hof_ = {}
        for i in range(self.n_hof):
            hof_['hof_' + str(i)] = self._ga_to_params(hof[i])

        return pop, log, hof_
    
    def _ga_to_params(self, idx_params):
        """Convert back idx to params"""
        res = {}
        for (k,v), idx in zip(self.padded_params.items(), idx_params):
            res[k] = v[idx]
        return res

In [22]:
from numpy import mean, linspace, inf

net_params = {
    'l1': linspace(1,20,20).astype(int),
    'k1': linspace(1,20,20).astype(int),
    'a1': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    'l2': linspace(1,20,20).astype(int),
    'k2': linspace(1,20,20).astype(int),
    'a2': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
    'l3': linspace(1,20,20).astype(int),
    'k3': linspace(1,20,20).astype(int),
    'a3': ['ReLU', 'CELU', 'SELU', 'ELU', 'Softsign'],
}

def net_eval_indiv(individual, padded_params, train_path, test_path, batch_size, lr):
    """Evaluate individual's genes (estimator's params)"""

    # Params
    indiv_params = {k : list(v)[idx] for (k,v), idx in zip(padded_params.items(), individual)}
    
    # Net
    net = NET(**indiv_params).to(devi)
    try:
        net(ones(1,3,95,95).to(devi))
    except BaseException as e:
        print('=> Possible Arch Error:', e)
        return (0.01, (1/10)**10, 1000000)
    
    # Optimizer
    optimizer = Adam(net.parameters(), lr=lr)
    criterion = nn.NLLLoss()
    
    # Data
    train_dl = DataLoader(img_tr, 
                   batch_size=batch_size, 
                   #num_workers=2,
                   drop_last=True,
                   shuffle=True)
    
    test_dl = DataLoader(img_te, 
                   batch_size=batch_size, 
                   #num_workers=2,
                   drop_last=True,
                   shuffle=True)
    
    # Train
    for epoch in range(EPOCHS):
        train_correct = 0
        train_total = 0
        for i, (inputs, labels) in enumerate(train_dl):
            if i <= 50:
                outputs = net(inputs.to(devi))

                optimizer.zero_grad()
                loss = criterion(outputs, labels.to(devi)).mean()
                loss.backward()
                optimizer.step()

                _, predicted = pt_max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels.to(devi)).sum().item()
            else:
                break
        #print(f'TRAIN {train_correct / train_total * 100:^5.2f} %', end=' ')
        
    # Eval
    with no_grad():
        net = net.eval()
        test_correct = 0
        test_total = 0
        for i, (inputs, labels) in enumerate(test_dl):
            if i <= 50:
                outputs = net(inputs.to(devi))
                _, predicted = pt_max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels.to(devi)).sum().item()
                test_accuracy = test_correct / test_total * 100
            else:
                break
        #print(f'TEST {test_accuracy:^5.2f} %')
        
    # Risk
    risk = median(prod(net(inputs.to(devi)).exp()*10, dim=1))
    if isnan(risk):
        risk = 10
    else:
        risk = float(risk)
        
    # Complexity
    compl = net.count_weights_biases()

    return (test_accuracy, risk, compl,)

net_weights = (1, -1, -1)

In [23]:
from itertools import cycle
from deap import creator as ga_cr, base as ga_b, algorithms as ga_algo, tools as ga_t
from random import randint
from numpy import mean
from torch.optim import Adam
from torch import max as pt_max, no_grad, median, prod, isnan

net_ga_params = GA_Pytorch(net_params, 
                           net_eval_indiv, 
                           net_weights,
                           img_tr,
                           img_te)
pop, log, hof = net_ga_params.run_ga_search()

Params padded
GA entities created
GA entities' methods registered
GA eval function registered
GA sel-cx-mut methods registered


KeyboardInterrupt: 

### HPs
***

In [None]:
EPOCHS = 10
BATCH_SIZE = 128
LR = 0.0005

### Data
***

In [None]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, Normalize, Resize, ColorJitter, GaussianBlur, RandomHorizontalFlip, RandomVerticalFlip

transf = Compose([
    RandomHorizontalFlip(),
    RandomVerticalFlip(),
    ColorJitter(0.1, 0.1, 0.1),
    GaussianBlur(1),
    Resize((95,95)),
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_path
img_tr = ImageFolder(train_path, transform=transf)
img_tr[0][0].shape
tr_dl = DataLoader(img_tr, 
                   batch_size=BATCH_SIZE, 
                   num_workers=1,
                   drop_last=True)

img_te = ImageFolder(test_path, transform=transf)
te_dl = DataLoader(img_tr, 
                   batch_size=BATCH_SIZE, 
                   num_workers=1,
                   drop_last=True)

## Baseline
***

In [None]:
from torch import nn, numel, float as pt_float, long as pt_long, mean, as_tensor, rand
from torch.optim import Adam

In [None]:
# linear, no dropout, no batchnorm, no cnn, 

class BL_NET(nn.Module):
    def __init__(self, in_features=3*95*95):
        super().__init__()
        self.in_features = in_features
        
        self.model = nn.Sequential(
            nn.Flatten(),  # C * H * W
            nn.Linear(self.in_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2),
            nn.LogSoftmax(dim=-1)
        )
        
        self.count_w_b()
    
    def forward(self, x):
        return self.model(x)
    
    def count_w_b(self):
        print(f'# Params: {sum(numel(p) for p in self.parameters()):,}')
    
bl_net = BL_NET().to(devi)
#bl_net.count_w_b()
#bl_net(next(iter(tr_dl))[0]).shape

opt = Adam(bl_net.parameters(), lr=LR)
criterion = nn.NLLLoss(reduction='mean')

bl_net = bl_net.train()
for epoch in range(EPOCHS):
    running_acc = []
    running_loss = []
    for images, labels in tr_dl:
        images = images.to(devi, dtype=pt_float)
        labels = labels.to(devi, dtype=pt_long)

        out = bl_net(images)
        loss = criterion(out, labels)
        
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        running_acc.append( (out.argmax(dim=1) == labels).sum() / len(labels) )
        running_loss.append(loss)
        
    print(f'Epoch {epoch:^3} | Accuracy {mean(as_tensor(running_acc)):^3.2f} | Loss: {mean(as_tensor(running_loss)):^3.2f}')
    
bl_net = bl_net.eval()
test_acc = []
for images, labels in te_dl:
    images = images.to(devi, dtype=pt_float)
    labels = labels.to(devi, dtype=pt_long)

    out = bl_net(images)

    test_acc.append((out.argmax(dim=1) == labels).sum() / len(labels))

print(f'Test Accuracy {mean(as_tensor(test_acc)):^3.2f}')

## CNN
***

In [None]:
class INPUTS(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.inputs = nn.Sequential(
            nn.Conv2d(3, 32, 3),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),
            nn.Softshrink())
        
    def forward(self, x):
        return self.inputs(x)

inputs_block = INPUTS()
inputs_block(rand(1,3,95,95)).shape

In [None]:
class BLOCK(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.block = nn.Sequential(
            nn.Conv2d(32, 32, 3),
            nn.Dropout2d(0.25),
            nn.BatchNorm2d(32),
            #nn.MaxPool2d(2),
            nn.Softshrink(),
            nn.ConstantPad2d(1, 0)
        )
        
    def forward(self, x):
        return self.block(x)

block = BLOCK()
block(rand(1,32,46,46)).shape


In [None]:
class RES_BLOCK(nn.Module):
    def __init__(self, block):
        super().__init__()
        self.block = block
        
        self.pool = nn.AvgPool2d(2)
        
    def forward(self, x):
        return self.pool(self.block(x) + x)

res_block = RES_BLOCK(block)
res_block(rand(1,32,46,46)).shape


In [None]:
class LINEAR(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.linear = nn.Sequential(
            nn.Flatten(),
            nn.Linear(800, int(800**0.5)),
            nn.Dropout(0.25),
            nn.Linear(int(800**0.5), 2),
            nn.Dropout(0.25),
            nn.LogSoftmax(dim=-1))
        
    def forward(self, x):
        return self.linear(x)

linear = LINEAR()
linear(res_block(
    res_block(
        res_block(rand(1,32,46,46))))).shape


In [None]:
# class ARM...

In [None]:
class CNN(nn.Module):
    def __init__(self, inputs_block, res_block, linear):
        super().__init__()
        
        self.inputs_block = inputs_block
        self.res_block = res_block
        self.linear = linear
        
        self.model = nn.Sequential(
            self.inputs_block,
            self.res_block,
            self.res_block,
            self.res_block,
            #self.res_block,
            #nn.Flatten()
            self.linear,
        )
        
        self.count_w_b()
    
    def forward(self, x):
        return self.model(x)
    
    def count_w_b(self):
        print(f'# Params: {sum(numel(p) for p in self.parameters()):,}')

cnn_net = CNN(inputs_block, res_block, linear).to(devi)
cnn_net(next(iter(tr_dl))[0]).shape

opt = Adam(cnn_net.parameters(), lr=LR)
criterion = nn.NLLLoss(reduction='mean')

cnn_net = cnn_net.train()
for epoch in range(25):
    running_acc = []
    running_loss = []
    for images, labels in tr_dl:
        images = images.to(devi, dtype=pt_float)
        labels = labels.to(devi, dtype=pt_long)

        out = cnn_net(images)
        loss = criterion(out, labels)
        
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        running_acc.append((out.argmax(dim=1) == labels).sum() / len(labels))
        running_loss.append(loss)
        
    print(f'Epoch {epoch:^3} | Accuracy {mean(as_tensor(running_acc)):^3.2f} | Loss (-log(softmax())): {mean(as_tensor(running_loss)):^3.2f}')
    
cnn_net = cnn_net.eval()
test_acc = []
for images, labels in te_dl:
    images = images.to(devi, dtype=pt_float)
    labels = labels.to(devi, dtype=pt_long)

    out = cnn_net(images)

    test_acc.append((out.argmax(dim=1) == labels).sum() / len(labels))

print(f'Test Accuracy {mean(as_tensor(test_acc)):^3.2f}')