In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.backends.cudnn.benchmark = True

import os
import cv2
import shutil
import pandas as pd
import numpy as np
import pretrainedmodels
import matplotlib.pyplot as plt

from apex import amp
from time import time
from warnings import filterwarnings
from tqdm.notebook import tqdm
from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset, DataLoader
import argparse

import gc
from PIL import Image
from albumentations import *
from albumentations.pytorch import ToTensorV2
from torchvision.transforms import ToPILImage
from sklearn.model_selection import StratifiedKFold

In [2]:
# .9998 LB
test_root = 'data/test_images'
model_dir = 'checkpoints'
model_names = ['final_b4_0_stage1.pt', 'final_b4_1_stage1.pt',\
               'final_iv4_0_stage1.pt', 'final_iv4_1_stage1.pt', 'final_iv4_2_stage1.pt',\
               'final_se50_0_stage1.pt', 'final_se50_1_stage1.pt',\
               'final_mixxl_0_stage1.pt', 'final_mixxl_1_stage1.pt']
batch_size = 128
image_size = 384

weights = torch.tensor([1/2, 1/2, 1/3, 1/3, 1/3, 1/2, 1/2, 1/2, 1/2])
temperature = 10

device = 'cuda:0'

In [3]:
# Master Ensemble

In [4]:
test_transform = Compose([
    Resize(image_size, image_size, interpolation=cv2.INTER_LANCZOS4),
    Normalize(),
    ToTensorV2(),
])

In [5]:
models = []
if model_names is None:
    model_names = list(os.listdir(model_dir))
for name in model_names:
    model = torch.jit.load(os.path.join(model_dir, name)).to(device).half()
    model.eval()
    models.append(model)
'''if weights is None:
    weights = torch.ones(len(model_names))
weights = torch.tensor(weights) / len(weights)'''

import json
train_path = 'data/train.json'
id_map = {}
with open(train_path, 'r') as fp:
    train = dict(json.load(fp))
for instance in train['categories']:
    id_map[instance['id']] = instance['name']

In [6]:
class TrashDataset_test(torch.utils.data.Dataset):
    def __init__(self, df, root='data', transform=None):
        self.transform = transform
        self.df = df
        self.root = root

    def __len__(self):
        return len(self.df)

    def __getitem__(self,idx):
        name = str(self.df['id'].values[idx])
        path = os.path.join(self.root, name+'.png')
        # try:
        image = np.array(Image.open(path))
        if self.transform is not None:
            image = self.transform(image=image)['image']
        return {'img':image, 'name':name}

In [7]:
testset = TrashDataset_test(pd.read_csv('data/test.csv'), test_root, test_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=8)
print(len(testset), 'test')

10000 test


In [8]:
names, model_preds = [], [[] for model in models]
with torch.no_grad():
    for batch in tqdm(testloader):
        img = batch['img'].to(device).half()
        for i, model in enumerate(models):
            p0 = F.softmax(model(img), dim=1)
            p1 = F.softmax(model(torch.flip(img, (-1,))), dim=1)
            p2 = F.softmax(model(torch.flip(img, (-2,))), dim=1)
            p3 = F.softmax(model(torch.flip(img, (-1, -2))), dim=1)
            preds = ((p0 + p1 + p2 + p3) / 4).float()
            model_preds[i].append(preds)
        names.extend(batch['name'])
# N_models x num_samples x n_classes
model_preds = torch.stack([torch.cat(preds) for preds in model_preds])

HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




In [9]:
# Temperature sharpen and multiply by weights
model_preds_ = (model_preds ** temperature) * weights.unsqueeze(1).unsqueeze(1).type_as(model_preds)
# Normalize
model_preds_ /= model_preds_.sum(2).unsqueeze(2)
ensemble_conf, ensemble_preds = model_preds_.mean(0).cpu().max(1)

preds = ensemble_preds.numpy() + 1
df = pd.DataFrame({'id':names, 'label':preds})
df.to_csv(os.path.join(model_dir, 'stage2_submission.csv'), index=False)
df.head()

Unnamed: 0,id,label
0,a2567c4c8625,33
1,5cd9c7b56d6e,142
2,cc222c7f2318,80
3,cbf0afa29462,24
4,445dcea229a0,126
