In [1]:
!nvidia-smi

Mon Sep 30 10:32:06 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.104      Driver Version: 410.104      CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    39W / 300W |      0MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
from radam import RAdam, PlainRAdam, AdamW
from am_softmax import AMSoftmaxLoss, AngleSimpleLinear

In [4]:
import os
import numpy as np 
import pandas as pd
from PIL import Image
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.utils.data as D
from torch.optim.lr_scheduler import ExponentialLR
from torchvision import models, transforms as T
import torch.nn.functional as F

from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer
from ignite.metrics import Loss, Accuracy
from ignite.contrib.handlers.tqdm_logger import ProgressBar
from ignite.handlers import  EarlyStopping, ModelCheckpoint

#import torch.nn.parallel
#import torch.distributed as dist
#import torch.utils.data.distributed
#from torch.multiprocessing import Pool, Process, set_start_method
#import apex
#from apex import amp
#from apex.parallel import DistributedDataParallel as DDP
#from apex.fp16_utils import *
#from apex import amp, optimizers
#from apex.multi_tensor_apply import multi_tensor_applier

import warnings
warnings.filterwarnings('ignore')

## Config

In [5]:
config = {
    'SEED': 42,
    'CLASSES': 1108,
    'PATH_DATA': '/home/tienen/kaggle_dataset_drugs/',
    'DEVICE': 'cuda',
    'BATCH_SIZE': 8,
    'VAL_SIZE': 0.05,
    'MODEL_NAME': 'DenseNet201_AMSLoss',
    'USE_ANGULAR': True,
    'USE_BN': True,
    'LR': 1e-4,
    'LR_STR': '1e-4',
    'TURN_OFF_ON_N_EPOCHS': 1,
}

In [6]:
def seed_torch(seed=42):
    import random; import os
    
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch(config['SEED'])

## Model

In [7]:
class DenseNet(nn.Module):
    def __init__(self, num_classes=1000, num_channels=6, use_bn=False, use_angular=False):
        super().__init__()
        self.use_angular = use_angular
        self.use_bn = use_bn
        if self.use_bn:
            self.bn = nn.BatchNorm2d(6)
        
        preloaded = models.densenet201(pretrained=True)
        # print(preloaded)
        self.features = preloaded.features
        
        new_conv = nn.Conv2d(num_channels, 64, 7, 2, 3, bias=False)
        trained_kernel = self.features.conv0.weight
        with torch.no_grad():
            new_conv.weight[:,:] = torch.stack([torch.mean(trained_kernel, 1)]*6, dim=1)
        
        self.features.conv0 = new_conv
        
        if self.use_angular:
            self.fc_angular = AngleSimpleLinear(1920, num_classes)
        else:
            self.fc = nn.Linear(in_features=1920, out_features=num_classes, bias=True)
            
        del preloaded
        
    def forward(self, x):
        if self.use_bn:
            x = self.bn(x)
        x = self.features(x)
        x = F.adaptive_avg_pool2d(x, (1, 1)).view(x.size(0), -1)
        
        if self.use_angular:
            y = self.fc_angular(x)
            return y
        else:
            x = self.fc(x)
            return x

---

In [8]:
model = DenseNet(num_classes=config['CLASSES'], use_bn=config['USE_BN'], use_angular=config['USE_ANGULAR'])
model.to(config['DEVICE']);

In [9]:
# criterion = nn.CrossEntropyLoss()
criterion = AMSoftmaxLoss(margin_type='cos').to(config['DEVICE'])
optimizer = torch.optim.Adam(model.parameters(), lr=config['LR'])

## Dataset

In [10]:
class ImagesDS(D.Dataset):
    def __init__(self, df, img_dir, mode='train', site=1, channels=[1,2,3,4,5,6]):
        self.records = df.to_records(index=False)
        self.channels = channels
        self.site = site
        self.mode = mode
        self.img_dir = img_dir
        self.len = df.shape[0]
        
    @staticmethod
    def _load_img_as_tensor(file_name):
        with Image.open(file_name) as img:
            return T.ToTensor()(img)

    def _get_img_path(self, index, channel):
        experiment, well, plate = self.records[index].experiment, self.records[index].well, self.records[index].plate
        return '/'.join([self.img_dir,self.mode,experiment,f'Plate{plate}',f'{well}_s{self.site}_w{channel}.png'])
        
    def __getitem__(self, index):
        paths = [self._get_img_path(index, ch) for ch in self.channels]
        img = torch.cat([self._load_img_as_tensor(img_path) for img_path in paths])
        if self.mode == 'train':
            return img, int(self.records[index].sirna)
        else:
            return img, self.records[index].id_code

    def __len__(self):
        return self.len

In [11]:
df = pd.read_csv(config['PATH_DATA']+'/train.csv')
df_train, df_val = train_test_split(df, test_size=config['VAL_SIZE'], stratify=df.sirna, random_state=config['SEED'])
df_test = pd.read_csv(config['PATH_DATA']+'/test.csv')

In [12]:
ds_1 = ImagesDS(df_train, config['PATH_DATA'], site=1, mode='train')
ds_2 = ImagesDS(df_train, config['PATH_DATA'], site=2, mode='train')
ds = D.ConcatDataset([ds_1, ds_2])

ds_val_1 = ImagesDS(df_val, config['PATH_DATA'], site=1, mode='train')
ds_val_2 = ImagesDS(df_val, config['PATH_DATA'], site=2, mode='train')
ds_val = D.ConcatDataset([ds_val_1, ds_val_2])

ds_test_1 = ImagesDS(df_test, config['PATH_DATA'], site=1, mode='test')
ds_test_2 = ImagesDS(df_test, config['PATH_DATA'], site=2, mode='test')

## Ignite magic starts here
Let's define which metrics we will use and create magic objects to train and validate our model

In [13]:
metrics = {
    'loss': Loss(criterion),
    'accuracy': Accuracy(),
}

trainer = create_supervised_trainer(model, optimizer, criterion, device=config['DEVICE'])
val_evaluator = create_supervised_evaluator(model, metrics=metrics, device=config['DEVICE'])

In [14]:
# Attach to our trainer a function to run a validator at the end of each epoch
@trainer.on(Events.EPOCH_COMPLETED)
def compute_and_display_val_metrics(engine):
    epoch = engine.state.epoch
    metrics = val_evaluator.run(val_loader).metrics
    print("Validation Results - Epoch: {}  Average Loss: {:.4f} | Accuracy: {:.4f} "
          .format(engine.state.epoch, 
                      metrics['loss'], 
                      metrics['accuracy']))

In [15]:
# In this problem I think it's better not to use the same learning rate during all the training
# let's make it decrease after each epoch
lr_scheduler = ExponentialLR(optimizer, gamma=0.95)

@trainer.on(Events.EPOCH_COMPLETED)
def update_lr_scheduler(engine):
    lr_scheduler.step()
    lr = float(optimizer.param_groups[0]['lr'])
    print("Learning rate: {}".format(lr))

In [16]:
# We definitely need early stopping
# I don't want to tune the number of epochs by hands
handler = EarlyStopping(patience=4, score_function=lambda engine: engine.state.metrics['accuracy'], trainer=trainer)
val_evaluator.add_event_handler(Events.COMPLETED, handler)

In [17]:
# Let's warmup our last linear layer by freezing all the other layers for a couple of epochs
@trainer.on(Events.EPOCH_STARTED)
def turn_on_layers(engine):
    epoch = engine.state.epoch
    if config['TURN_OFF_ON_N_EPOCHS'] > 0:
        
        if epoch == 1:
            for name, child in model.named_children():
                if name == 'fc' or name == 'fc_angular' or name == 'bn':
                    pbar.log_message(name + ' is unfrozen')
                    for param in child.parameters():
                        param.requires_grad = True
                else:
                    pbar.log_message(name + ' is frozen')
                    for param in child.parameters():
                        param.requires_grad = False
                        
        if epoch > config['TURN_OFF_ON_N_EPOCHS']:
            pbar.log_message("Turn on all the layers")
            for name, child in model.named_children():
                for param in child.parameters():
                    param.requires_grad = True

In [18]:
# Also, let's save our model's weights after some epochs to be able to use them later
checkpoints = ModelCheckpoint(config['MODEL_NAME'], 'all_exps',
                              save_interval=1, n_saved=10, create_dir=True, require_empty=False)
trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoints, {config['MODEL_NAME']+'_lr={}'.format(config['LR_STR']): model})

In [19]:
# And we obviously need beautiful tqdm-based progress bars for our training process
pbar = ProgressBar(bar_format='')
pbar.attach(trainer, output_transform=lambda x: {'loss': x})

In [21]:
# Let's log some interesting information about our learning process to Tensorboard
# (Does not work in kaggle kernels, you need to have TensorboadX installed)

import os
if not 'KAGGLE_WORKING_DIR' in os.environ:  #  If we are not on kaggle server
    from ignite.contrib.handlers.tensorboard_logger import *
    tb_logger = TensorboardLogger("board/"+config['MODEL_NAME'])
    tb_logger.attach(trainer, log_handler=OutputHandler(tag="training", output_transform=lambda loss: {'loss': loss}),
                     event_name=Events.ITERATION_COMPLETED)

    tb_logger.attach(val_evaluator, log_handler=OutputHandler(tag="validation", metric_names=["accuracy", "loss"],
                     another_engine=trainer),event_name=Events.EPOCH_COMPLETED)
    tb_logger.close()

## Training

In [22]:
trainer.run(train_loader, max_epochs=20)

bn is unfrozen
features is frozen
fc_angular is unfrozen


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 1  Average Loss: 21.5494 | Accuracy: 0.0192 
Learning rate: 9.5e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 2  Average Loss: 19.7147 | Accuracy: 0.1361 
Learning rate: 9.025e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 3  Average Loss: 18.2339 | Accuracy: 0.3220 
Learning rate: 8.573749999999999e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 4  Average Loss: 17.5369 | Accuracy: 0.3891 
Learning rate: 8.145062499999998e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 5  Average Loss: 16.9738 | Accuracy: 0.4351 
Learning rate: 7.737809374999998e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 6  Average Loss: 16.5153 | Accuracy: 0.4688 
Learning rate: 7.350918906249998e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 7  Average Loss: 16.1953 | Accuracy: 0.4860 
Learning rate: 6.983372960937497e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 8  Average Loss: 15.8290 | Accuracy: 0.5159 
Learning rate: 6.634204312890622e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 9  Average Loss: 16.7451 | Accuracy: 0.4261 
Learning rate: 6.30249409724609e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 10  Average Loss: 15.9029 | Accuracy: 0.4915 
Learning rate: 5.987369392383786e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 11  Average Loss: 16.0825 | Accuracy: 0.4745 
Learning rate: 5.688000922764596e-05
Turn on all the layers


HBox(children=(IntProgress(value=0, max=8673), HTML(value='')))

Validation Results - Epoch: 12  Average Loss: 15.9597 | Accuracy: 0.4890 
Learning rate: 5.4036008766263664e-05


<ignite.engine.engine.State at 0x7fc8422553c8>

## Prediction for test

In [13]:
n_epochs = 8

model = DenseNet(num_classes=config['CLASSES'], use_bn=config['USE_BN'], use_angular=config['USE_ANGULAR'])
checkpoint = torch.load('{0}/all_exps_{0}_lr{1}_{2}.pth'.format(config['MODEL_NAME'], config['LR_STR'], n_epochs))
model.load_state_dict(checkpoint)
model.to(config['DEVICE'])
model.eval();

In [14]:
predicted = []  # predicted = np.empty(0)

with torch.no_grad():
    for (x1, id1), (x2, id2) in tqdm_notebook(zip(test_loader_1, test_loader_2)):
        x1 = x1.to(config['DEVICE'])
        output1 = model(x1)
        
        x2 = x2.to(config['DEVICE'])
        output2 = model(x2)
        
        result = 0.5*(output1 + output2)
        predicted.append(result.cpu().numpy())

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [15]:
predicted = np.stack(predicted).squeeze()
predicted.shape

(19897, 1108)

In [16]:
submission = pd.read_csv(config['PATH_DATA'] + '/test.csv')
submission['sirna'] = np.argmax(predicted, axis=1).astype(int)
submission.to_csv('submits/{}_{}epochs_lr{}.csv'.format(config['MODEL_NAME'], n_epochs, config['LR_STR']),
                  index=False, columns=['id_code','sirna'])

In [17]:
submission.head()

Unnamed: 0,id_code,experiment,plate,well,sirna
0,HEPG2-08_1_B03,HEPG2-08,1,B03,855
1,HEPG2-08_1_B04,HEPG2-08,1,B04,466
2,HEPG2-08_1_B05,HEPG2-08,1,B05,836
3,HEPG2-08_1_B06,HEPG2-08,1,B06,731
4,HEPG2-08_1_B07,HEPG2-08,1,B07,307


## Use leak

In [18]:
plate_groups = np.zeros((1108,4), int)
for sirna in range(1108):
    grp = df.loc[df.sirna==sirna,:].plate.value_counts().index.values
    assert len(grp) == 3
    plate_groups[sirna,0:3] = grp
    plate_groups[sirna,3] = 10 - grp.sum()
    
print(plate_groups[:10,:])

[[4 2 3 1]
 [1 3 4 2]
 [2 4 1 3]
 [1 3 4 2]
 [3 1 2 4]
 [1 3 4 2]
 [1 3 4 2]
 [2 4 1 3]
 [1 3 4 2]
 [4 2 3 1]]


In [19]:
all_test_exp = df_test.experiment.unique()

group_plate_probs = np.zeros((len(all_test_exp),4))
for idx in range(len(all_test_exp)):
    preds = submission.loc[df_test.experiment == all_test_exp[idx],'sirna'].values
    pp_mult = np.zeros((len(preds),1108))
    pp_mult[range(len(preds)),preds] = 1
    
    sub_test = df_test.loc[df_test.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    
    for j in range(4):
        mask = np.repeat(plate_groups[np.newaxis, :, j], len(pp_mult), axis=0) == \
               np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
        
        group_plate_probs[idx,j] = np.array(pp_mult)[mask].sum()/len(pp_mult)

In [20]:
exp_to_group = group_plate_probs.argmax(1)
print(exp_to_group)

[3 1 0 0 0 0 2 2 3 0 0 3 1 0 0 0 2 3]


In [21]:
# this is the function that sets 75% of the sirnas to zero according to the selected assignment

def select_plate_group(pp_mult, idx):
    sub_test = df_test.loc[df_test.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    mask = np.repeat(plate_groups[np.newaxis, :, exp_to_group[idx]], len(pp_mult), axis=0) != \
           np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
    pp_mult[mask] = 0
    return pp_mult

In [22]:
sub = submission.copy()

for idx in range(len(all_test_exp)):
    indices = (df_test.experiment == all_test_exp[idx])
    preds = predicted[indices, :].copy()
    
    preds = select_plate_group(preds, idx)
    sub.loc[indices,'sirna'] = preds.argmax(1)

In [23]:
sub.to_csv('submits/{}_{}epochs_lr{}_plates_leak.csv'.format(config['MODEL_NAME'], n_epochs, config['LR_STR']),
           index=False, columns=['id_code','sirna'])

In [24]:
print((sub.sirna == submission.sirna).mean())

0.6434638387696637


In [25]:
len(submission.sirna.unique()), len(sub.sirna.unique())

(1096, 1105)

In [26]:
sub.head()

Unnamed: 0,id_code,experiment,plate,well,sirna
0,HEPG2-08_1_B03,HEPG2-08,1,B03,855
1,HEPG2-08_1_B04,HEPG2-08,1,B04,710
2,HEPG2-08_1_B05,HEPG2-08,1,B05,836
3,HEPG2-08_1_B06,HEPG2-08,1,B06,609
4,HEPG2-08_1_B07,HEPG2-08,1,B07,878
