In [1]:
import gc
import os
import sys
import time
import random
import logging
import datetime as dt

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
import torchvision as vision

from torch.optim.lr_scheduler import CosineAnnealingLR

from pathlib import Path
from PIL import Image
from contextlib import contextmanager

from joblib import Parallel, delayed
from tqdm import tqdm
from fastprogress import master_bar, progress_bar

from sklearn.model_selection import KFold
from sklearn.metrics import fbeta_score


In [2]:
#!mkdir -p /tmp/.torch/models/
#!wget -O /tmp/.torch/models/se_resnet152-d17c99b7.pth http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth
#import pretrainedmodels
torch.cuda.is_available()

True

In [3]:
@contextmanager
def timer(name="Main", logger=None):
    t0 = time.time()
    yield
    msg = f"[{name}] done in {time.time() - t0} s"
    if logger is not None:
        logger.info(msg)
    else:
        print(msg)
        

def get_logger(name="Main", tag="exp", log_dir="log/"):
    log_path = Path(log_dir)
    path = log_path / tag
    path.mkdir(exist_ok=True, parents=True)

    logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)

    fh = logging.FileHandler(
        path / (dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".log"))
    sh = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s %(name)s %(levelname)s %(message)s")

    fh.setFormatter(formatter)
    sh.setFormatter(formatter)
    logger.addHandler(fh)
    logger.addHandler(sh)
    return logger


def seed_torch(seed=1029):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

In [4]:
logger = get_logger(name="Main", tag="Pytorch-VGG16")

In [5]:
@contextmanager
def timer(name="Main", logger=None):
    t0 = time.time()
    yield
    msg = f"[{name}] done in {time.time() - t0} s"
    if logger is not None:
        logger.info(msg)
    else:
        print(msg)
        

def get_logger(name="Main", tag="exp", log_dir="log/"):
    log_path = Path(log_dir)
    path = log_path / tag
    path.mkdir(exist_ok=True, parents=True)

    logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)

    fh = logging.FileHandler(
        path / (dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".log"))
    sh = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s %(name)s %(levelname)s %(message)s")

    fh.setFormatter(formatter)
    sh.setFormatter(formatter)
    logger.addHandler(fh)
    logger.addHandler(sh)
    return logger


def seed_torch(seed=1029):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

In [25]:
!ls ../input
labels = pd.read_csv("../input/imet-2019-fgvc6/labels.csv")
train = pd.read_csv("../input/imet-2019-fgvc6/train.csv")
sample = pd.read_csv("../input/imet-2019-fgvc6/sample_submission.csv")
train.head()

cultures = [x for x in labels.attribute_name.values if x.startswith("culture")]
tags = [x for x in labels.attribute_name.values if x.startswith("tag")]
len(cultures), len(tags)

imet-2019-fgvc6  pytorch-pretrained-image-models


(398, 705)

In [72]:
import cv2
def split_culture_tag(x):
    cultures_ = list()
    tags_ = list()
    for i in x.split(" "):
        if int(i) <= len(cultures):
            cultures_.append(i)
        else:
            tags_.append(str(int(i) - len(cultures)))
    if not cultures_:
        cultures_.append(str(len(cultures)))
    if not tags_:
        tags_.append(str(len(tags)))
    return " ".join(cultures_), " ".join(tags_)

culture_ids = list()
tag_ids = list()

for v in tqdm(train.attribute_ids.values):
    c, t = split_culture_tag(v)
    culture_ids.append(c)
    tag_ids.append(t)

num_classes_c = len(cultures) + 1
num_classes_t = len(tags) + 1

train["culture_ids"] = culture_ids
train["tag_ids"] = tag_ids


def obtain_y_c(ids):
    y = np.zeros(num_classes_c)
    for idx in ids.split(" "):
        y[int(idx)] = 1
    return y

def obtain_y_t(ids):
    y = np.zeros(num_classes_t)
    for idx in ids.split(" "):
        y[int(idx)] = 1
    return y

paths = ["../input/imet-2019-fgvc6/train/{}.png".format(x) for x in train.id.values]

targets_c = np.array([obtain_y_c(y) for y in train.culture_ids.values])
targets_t = np.array([obtain_y_t(y) for y in train.tag_ids.values])
print(targets_c.shape)

def rem_bkg(img):
    y_size,x_size,col = img.shape
    
    for y in range(y_size):
        for r in range(1,6):
            col = img[y, x_size-r] 
            img[np.where((img == col).all(axis = 2))] = [255,255,255]
        for l in range(5):
            col = img[y, l] 
            img[np.where((img == col).all(axis = 2))] = [255,255,255]

    for x in range(x_size):
        for d in range(1,6):
            col = img[y_size-d, x] 
            img[np.where((img == col).all(axis = 2))] = [255,255,255]
        for u in range(5):
            col = img[u, x] 
            img[np.where((img == col).all(axis = 2))] = [255,255,255]
    
    return img

class ImageDataLoader(data.DataLoader):
    def __init__(self, root_dir: Path, 
                 df: pd.DataFrame, 
                 mode="train", 
                 transforms=None):
        self._root = root_dir
        self.transform = transforms[mode]
        self._img_id = (df["id"] + ".png").values
        
    def __len__(self):
        return len(self._img_id)
    
    def __getitem__(self, idx):
        img_id = self._img_id[idx]
        file_name = self._root / img_id
        img = Image.open(file_name)
        #img = cv2.imread(file_name.absolute().as_posix())[...,[2, 1, 0]]
        #img = rem_bkg(img)
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
            
        return [img]
    
    
data_transforms = {
    'train': vision.transforms.Compose([
        vision.transforms.RandomResizedCrop(224),
        vision.transforms.RandomHorizontalFlip(),
        vision.transforms.ToTensor(),
        vision.transforms.Normalize(
            [0.485, 0.456, 0.406], 
            [0.229, 0.224, 0.225])
    ]),
    'val': vision.transforms.Compose([
        vision.transforms.Resize(256),
        vision.transforms.CenterCrop(224),
        vision.transforms.ToTensor(),
        vision.transforms.Normalize(
            [0.485, 0.456, 0.406], 
            [0.229, 0.224, 0.225])
    ]),
}

data_transforms["test"] = data_transforms["val"]




  0%|          | 0/109237 [00:00<?, ?it/s][A[A[A


 20%|██        | 22206/109237 [00:00<00:00, 222046.98it/s][A[A[A


 39%|███▉      | 42957/109237 [00:00<00:00, 217474.70it/s][A[A[A


 59%|█████▊    | 63910/109237 [00:00<00:00, 215015.39it/s][A[A[A


 78%|███████▊  | 85587/109237 [00:00<00:00, 215537.16it/s][A[A[A


 97%|█████████▋| 106072/109237 [00:00<00:00, 212214.53it/s][A[A[A


100%|██████████| 109237/109237 [00:00<00:00, 210271.21it/s][A[A[A

(109237, 399)


In [81]:
class IMetDataset(data.Dataset):
    def __init__(self, tensor, device="cuda:0", labels=None):
        self.tensor = tensor
        self.labels = labels
        self.device= device
        
    def __len__(self):
        return self.tensor.size(0)
    
    def __getitem__(self, idx):
        tensor = self.tensor[idx, :]
        if self.labels is not None:
            label = self.labels[idx]
            label_tensor = torch.zeros((1, 1103))
            y_c = torch.FloatTensor(targets_c[idx]).to(self.device)
            y_t = torch.FloatTensor(targets_t[idx]).to(self.device)
            for i in label:
                label_tensor[0, int(i)] = 1
            label_tensor = label_tensor.to(self.device)
            return [tensor, [y_c, y_t]]
        else:
            return [tensor]

In [82]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
    def forward(self, x):
        return x


class Densenet121(nn.Module):
    def __init__(self, pretrained: Path):
        super(Densenet121, self).__init__()
        self.densenet121 = vision.models.densenet121()
        self.densenet121.load_state_dict(torch.load(pretrained))
        self.densenet121.classifier = Classifier()
        
        dense = nn.Sequential(*list(self.densenet121.children())[:-1])
        for param in dense.parameters():
            param.requires_grad = False
        
    def forward(self, x):
        return self.densenet121(x)
    
class Resnet50(nn.Module):
    def __init__(self, pretrained: Path):
        super(Resnet50, self).__init__()
        self.resnet50 = vision.models.resnet50()
        self.resnet50.load_state_dict(torch.load(pretrained))
        self.resnet50.classifier = Classifier()
        
        dense = nn.Sequential(*list(self.resnet50.children())[:-1])
        for param in dense.parameters():
            param.requires_grad = False
        
    def forward(self, x):
        return self.resnet50(x)
    
    
class MultiLayerPerceptron(nn.Module):
    def __init__(self):
        super(MultiLayerPerceptron, self).__init__()
        self.linear1 = nn.Linear(1024, 1024)
        self.relu = nn.ReLU()
        self.linear11 = nn.Linear(1024, 1024)
        self.relu2 = nn.ReLU()
        self.linear2 = nn.Linear(1024, 1103)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.relu(self.linear1(x))
        x = self.relu2(self.linear11(x))
        x = self.dropout(x)
        return self.sigmoid(self.linear2(x))
    
class MultiLayerPerceptron1(nn.Module):
    def __init__(self):
        super(MultiLayerPerceptron1, self).__init__()
        self.linear1 = nn.Linear(1024, 1024)
        self.relu = nn.ReLU()
        self.linear11 = nn.Linear(1024, 1024)
        self.relu2 = nn.ReLU()
        self.linear2 = nn.Linear(1024, 399)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.relu(self.linear1(x))
        x = self.relu2(self.linear11(x))
        x = self.dropout(x)
        return self.sigmoid(self.linear2(x))
    
class MultiLayerPerceptron2(nn.Module):
    def __init__(self):
        super(MultiLayerPerceptron2, self).__init__()
        self.linear1 = nn.Linear(1024, 1024)
        self.relu = nn.ReLU()
        self.linear11 = nn.Linear(1024, 1024)
        self.relu2 = nn.ReLU()
        self.linear2 = nn.Linear(1024, 706)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.relu(self.linear1(x))
        x = self.relu2(self.linear11(x))
        x = self.dropout(x)
        return self.sigmoid(self.linear2(x))

In [83]:
train_dataset = ImageDataLoader(
    root_dir=Path("../input/imet-2019-fgvc6/train/"),
    df=train,
    mode="train",
    transforms=data_transforms)
train_loader = data.DataLoader(dataset=train_dataset,
                               shuffle=False,
                               batch_size=64)
test_dataset = ImageDataLoader(
    root_dir=Path("../input/imet-2019-fgvc6/test/"),
    df=sample,
    mode="test",
    transforms=data_transforms)
test_loader = data.DataLoader(dataset=test_dataset,
                              shuffle=False,
                              batch_size=64)

In [36]:
from torchvision import models
def get_feature_vector(df, loader, device):
    matrix = torch.zeros((df.shape[0], 1024)).to(device)
    model = Densenet121('../input/pytorch-pretrained-image-models/densenet121.pth') #Resnet50('../input/pytorch-pretrained-image-models/resnet50.pth')

    model.to(device)
    batch = loader.batch_size
    for i, (i_batch,) in tqdm(enumerate(loader)):
        i_batch = i_batch.to(device)
        pred = model(i_batch).detach()
        matrix[i * batch:(i + 1) * batch] = pred
    return matrix

In [37]:
train_tensor = get_feature_vector(train, train_loader, "cuda:0")
test_tensor = get_feature_vector(sample, test_loader, "cuda:0")




0it [00:00, ?it/s][A[A[A


1it [00:00,  1.15it/s][A[A[A


2it [00:01,  1.23it/s][A[A[A


3it [00:02,  1.29it/s][A[A[A


4it [00:02,  1.34it/s][A[A[A


5it [00:03,  1.38it/s][A[A[A


6it [00:04,  1.42it/s][A[A[A


7it [00:04,  1.45it/s][A[A[A


8it [00:05,  1.46it/s][A[A[A


9it [00:06,  1.47it/s][A[A[A


10it [00:06,  1.47it/s][A[A[A


11it [00:07,  1.49it/s][A[A[A


12it [00:08,  1.51it/s][A[A[A


13it [00:08,  1.50it/s][A[A[A


14it [00:09,  1.51it/s][A[A[A


15it [00:10,  1.49it/s][A[A[A


16it [00:10,  1.49it/s][A[A[A


17it [00:11,  1.50it/s][A[A[A


18it [00:12,  1.49it/s][A[A[A


19it [00:12,  1.49it/s][A[A[A


20it [00:13,  1.47it/s][A[A[A


21it [00:14,  1.49it/s][A[A[A


22it [00:15,  1.44it/s][A[A[A


23it [00:15,  1.45it/s][A[A[A


24it [00:16,  1.47it/s][A[A[A


25it [00:17,  1.48it/s][A[A[A


26it [00:17,  1.49it/s][A[A[A


27it [00:18,  1.48it/s][A[A[A


28it [00:19,  1.49it/s][A[A[

230it [02:33,  1.50it/s][A[A[A


231it [02:34,  1.49it/s][A[A[A


232it [02:34,  1.48it/s][A[A[A


233it [02:35,  1.47it/s][A[A[A


234it [02:36,  1.47it/s][A[A[A


235it [02:36,  1.47it/s][A[A[A


236it [02:37,  1.48it/s][A[A[A


237it [02:38,  1.50it/s][A[A[A


238it [02:38,  1.50it/s][A[A[A


239it [02:39,  1.49it/s][A[A[A


240it [02:40,  1.46it/s][A[A[A


241it [02:41,  1.44it/s][A[A[A


242it [02:41,  1.45it/s][A[A[A


243it [02:42,  1.41it/s][A[A[A


244it [02:43,  1.44it/s][A[A[A


245it [02:43,  1.45it/s][A[A[A


246it [02:44,  1.46it/s][A[A[A


247it [02:45,  1.47it/s][A[A[A


248it [02:45,  1.50it/s][A[A[A


249it [02:46,  1.49it/s][A[A[A


250it [02:47,  1.50it/s][A[A[A


251it [02:47,  1.52it/s][A[A[A


252it [02:48,  1.52it/s][A[A[A


253it [02:49,  1.51it/s][A[A[A


254it [02:49,  1.50it/s][A[A[A


255it [02:50,  1.50it/s][A[A[A


256it [02:51,  1.49it/s][A[A[A


257it [02:51,  1.50it/s][A

457it [05:08,  1.46it/s][A[A[A


458it [05:08,  1.46it/s][A[A[A


459it [05:09,  1.47it/s][A[A[A


460it [05:10,  1.45it/s][A[A[A


461it [05:10,  1.40it/s][A[A[A


462it [05:11,  1.38it/s][A[A[A


463it [05:12,  1.34it/s][A[A[A


464it [05:13,  1.34it/s][A[A[A


465it [05:13,  1.36it/s][A[A[A


466it [05:14,  1.37it/s][A[A[A


467it [05:15,  1.37it/s][A[A[A


468it [05:16,  1.39it/s][A[A[A


469it [05:16,  1.40it/s][A[A[A


470it [05:17,  1.42it/s][A[A[A


471it [05:18,  1.41it/s][A[A[A


472it [05:18,  1.44it/s][A[A[A


473it [05:19,  1.45it/s][A[A[A


474it [05:20,  1.46it/s][A[A[A


475it [05:20,  1.47it/s][A[A[A


476it [05:21,  1.45it/s][A[A[A


477it [05:22,  1.45it/s][A[A[A


478it [05:22,  1.47it/s][A[A[A


479it [05:23,  1.47it/s][A[A[A


480it [05:24,  1.49it/s][A[A[A


481it [05:24,  1.50it/s][A[A[A


482it [05:25,  1.49it/s][A[A[A


483it [05:26,  1.49it/s][A[A[A


484it [05:26,  1.50it/s][A

684it [07:41,  1.45it/s][A[A[A


685it [07:42,  1.46it/s][A[A[A


686it [07:43,  1.44it/s][A[A[A


687it [07:43,  1.46it/s][A[A[A


688it [07:44,  1.47it/s][A[A[A


689it [07:45,  1.47it/s][A[A[A


690it [07:45,  1.46it/s][A[A[A


691it [07:46,  1.47it/s][A[A[A


692it [07:47,  1.49it/s][A[A[A


693it [07:47,  1.45it/s][A[A[A


694it [07:48,  1.46it/s][A[A[A


695it [07:49,  1.49it/s][A[A[A


696it [07:49,  1.52it/s][A[A[A


697it [07:50,  1.51it/s][A[A[A


698it [07:51,  1.51it/s][A[A[A


699it [07:51,  1.46it/s][A[A[A


700it [07:52,  1.44it/s][A[A[A


701it [07:53,  1.44it/s][A[A[A


702it [07:54,  1.45it/s][A[A[A


703it [07:54,  1.45it/s][A[A[A


704it [07:55,  1.45it/s][A[A[A


705it [07:56,  1.46it/s][A[A[A


706it [07:56,  1.47it/s][A[A[A


707it [07:57,  1.48it/s][A[A[A


708it [07:58,  1.45it/s][A[A[A


709it [07:58,  1.45it/s][A[A[A


710it [07:59,  1.44it/s][A[A[A


711it [08:00,  1.44it/s][A

911it [10:16,  1.46it/s][A[A[A


912it [10:16,  1.48it/s][A[A[A


913it [10:17,  1.51it/s][A[A[A


914it [10:18,  1.50it/s][A[A[A


915it [10:18,  1.48it/s][A[A[A


916it [10:19,  1.49it/s][A[A[A


917it [10:20,  1.50it/s][A[A[A


918it [10:20,  1.49it/s][A[A[A


919it [10:21,  1.45it/s][A[A[A


920it [10:22,  1.45it/s][A[A[A


921it [10:22,  1.46it/s][A[A[A


922it [10:23,  1.48it/s][A[A[A


923it [10:24,  1.48it/s][A[A[A


924it [10:24,  1.44it/s][A[A[A


925it [10:25,  1.42it/s][A[A[A


926it [10:26,  1.45it/s][A[A[A


927it [10:26,  1.46it/s][A[A[A


928it [10:27,  1.48it/s][A[A[A


929it [10:28,  1.49it/s][A[A[A


930it [10:28,  1.50it/s][A[A[A


931it [10:29,  1.50it/s][A[A[A


932it [10:30,  1.50it/s][A[A[A


933it [10:30,  1.52it/s][A[A[A


934it [10:31,  1.50it/s][A[A[A


935it [10:32,  1.50it/s][A[A[A


936it [10:32,  1.51it/s][A[A[A


937it [10:33,  1.53it/s][A[A[A


938it [10:34,  1.50it/s][A

1134it [12:45,  1.48it/s][A[A[A


1135it [12:46,  1.47it/s][A[A[A


1136it [12:47,  1.49it/s][A[A[A


1137it [12:47,  1.49it/s][A[A[A


1138it [12:48,  1.49it/s][A[A[A


1139it [12:49,  1.48it/s][A[A[A


1140it [12:49,  1.49it/s][A[A[A


1141it [12:50,  1.49it/s][A[A[A


1142it [12:51,  1.48it/s][A[A[A


1143it [12:51,  1.49it/s][A[A[A


1144it [12:52,  1.51it/s][A[A[A


1145it [12:53,  1.51it/s][A[A[A


1146it [12:53,  1.50it/s][A[A[A


1147it [12:54,  1.51it/s][A[A[A


1148it [12:55,  1.50it/s][A[A[A


1149it [12:55,  1.46it/s][A[A[A


1150it [12:56,  1.43it/s][A[A[A


1151it [12:57,  1.43it/s][A[A[A


1152it [12:57,  1.42it/s][A[A[A


1153it [12:58,  1.44it/s][A[A[A


1154it [12:59,  1.44it/s][A[A[A


1155it [12:59,  1.48it/s][A[A[A


1156it [13:00,  1.50it/s][A[A[A


1157it [13:01,  1.51it/s][A[A[A


1158it [13:01,  1.51it/s][A[A[A


1159it [13:02,  1.52it/s][A[A[A


1160it [13:03,  1.52it/s][A[A[A


1

1355it [15:15,  1.47it/s][A[A[A


1356it [15:16,  1.48it/s][A[A[A


1357it [15:17,  1.42it/s][A[A[A


1358it [15:17,  1.43it/s][A[A[A


1359it [15:18,  1.46it/s][A[A[A


1360it [15:19,  1.46it/s][A[A[A


1361it [15:20,  1.46it/s][A[A[A


1362it [15:20,  1.47it/s][A[A[A


1363it [15:21,  1.47it/s][A[A[A


1364it [15:22,  1.48it/s][A[A[A


1365it [15:22,  1.49it/s][A[A[A


1366it [15:23,  1.49it/s][A[A[A


1367it [15:24,  1.47it/s][A[A[A


1368it [15:24,  1.48it/s][A[A[A


1369it [15:25,  1.45it/s][A[A[A


1370it [15:26,  1.47it/s][A[A[A


1371it [15:26,  1.49it/s][A[A[A


1372it [15:27,  1.50it/s][A[A[A


1373it [15:28,  1.49it/s][A[A[A


1374it [15:28,  1.48it/s][A[A[A


1375it [15:29,  1.49it/s][A[A[A


1376it [15:30,  1.51it/s][A[A[A


1377it [15:30,  1.51it/s][A[A[A


1378it [15:31,  1.50it/s][A[A[A


1379it [15:32,  1.52it/s][A[A[A


1380it [15:32,  1.49it/s][A[A[A


1381it [15:33,  1.49it/s][A[A[A


1

1576it [17:46,  1.44it/s][A[A[A


1577it [17:46,  1.44it/s][A[A[A


1578it [17:47,  1.46it/s][A[A[A


1579it [17:48,  1.43it/s][A[A[A


1580it [17:48,  1.43it/s][A[A[A


1581it [17:49,  1.44it/s][A[A[A


1582it [17:50,  1.44it/s][A[A[A


1583it [17:51,  1.44it/s][A[A[A


1584it [17:51,  1.43it/s][A[A[A


1585it [17:52,  1.40it/s][A[A[A


1586it [17:53,  1.41it/s][A[A[A


1587it [17:53,  1.43it/s][A[A[A


1588it [17:54,  1.46it/s][A[A[A


1589it [17:55,  1.45it/s][A[A[A


1590it [17:55,  1.48it/s][A[A[A


1591it [17:56,  1.49it/s][A[A[A


1592it [17:57,  1.49it/s][A[A[A


1593it [17:57,  1.49it/s][A[A[A


1594it [17:58,  1.49it/s][A[A[A


1595it [17:59,  1.48it/s][A[A[A


1596it [17:59,  1.47it/s][A[A[A


1597it [18:00,  1.44it/s][A[A[A


1598it [18:01,  1.44it/s][A[A[A


1599it [18:01,  1.46it/s][A[A[A


1600it [18:02,  1.45it/s][A[A[A


1601it [18:03,  1.45it/s][A[A[A


1602it [18:04,  1.47it/s][A[A[A


1

95it [01:10,  1.27it/s][A[A[A


96it [01:11,  1.28it/s][A[A[A


97it [01:12,  1.30it/s][A[A[A


98it [01:13,  1.27it/s][A[A[A


99it [01:13,  1.29it/s][A[A[A


100it [01:14,  1.29it/s][A[A[A


101it [01:15,  1.31it/s][A[A[A


102it [01:16,  1.28it/s][A[A[A


103it [01:16,  1.28it/s][A[A[A


104it [01:17,  1.29it/s][A[A[A


105it [01:18,  1.32it/s][A[A[A


106it [01:19,  1.35it/s][A[A[A


107it [01:19,  1.36it/s][A[A[A


108it [01:20,  1.37it/s][A[A[A


109it [01:21,  1.37it/s][A[A[A


110it [01:21,  1.39it/s][A[A[A


111it [01:22,  1.38it/s][A[A[A


112it [01:23,  1.39it/s][A[A[A


113it [01:24,  1.37it/s][A[A[A


114it [01:24,  1.39it/s][A[A[A


115it [01:25,  1.38it/s][A[A[A


116it [01:26,  1.36it/s][A[A[A


117it [01:26,  1.69it/s][A[A[A

In [38]:
del train_dataset, train_loader
del test_dataset, test_loader
gc.collect()

8954

In [176]:
from numpy.random import beta
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.5, gamma=2, logits=False, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.sum(F_loss)
        else:
            return F_loss
        
def mixup(input, target, gamma):
    # target is onehot format!
    perm = torch.randperm(input.size(0))
    perm_input = input[perm]
    perm_target = target[perm]
    return input.mul_(gamma).add_(1 - gamma, perm_input), target.mul_(gamma).add_(1 - gamma, perm_target)

    return mixed_x, mixed_y

class Trainer:
    def __init__(self, 
                 model1,
                 model2,
                 logger,
                 n_splits=5,
                 seed=42,
                 device="cuda:0",
                 train_batch=32,
                 valid_batch=128,
                 kwargs={}):
        self.model1 = model1
        self.model2 = model2
        self.logger = logger
        self.device = device
        self.n_splits = n_splits
        self.seed = seed
        self.train_batch = train_batch
        self.valid_batch = valid_batch
        self.kwargs = kwargs
        
        self.best_score = None
        self.tag = dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        self.loss_fn = nn.BCELoss(reduction="mean").to(self.device)
        
        path = Path(f"bin1/{self.tag}")
        path.mkdir(exist_ok=True, parents=True)
        self.path1 = path
        path = Path(f"bin2/{self.tag}")
        path.mkdir(exist_ok=True, parents=True)
        self.path2 = path
        
    def fit(self, X, y, n_epochs=10):
        train_preds1 = np.zeros((len(X), num_classes_c))
        train_preds2 = np.zeros((len(X), num_classes_t))
        fold = KFold(n_splits=self.n_splits, random_state=self.seed)
        for i, (trn_idx, val_idx) in enumerate(fold.split(X)):
            self.fold_num = i
            self.logger.info(f"Fold {i + 1}")
            X_train, X_val = X[trn_idx, :], X[val_idx, :]
            y_train, y_val = y[trn_idx], y[val_idx]
            
            valid_preds1, valid_preds2 = self._fit(X_train, y_train, X_val, y_val, n_epochs)
            #print('tp1 ' + str(train_preds1.shape[1]))
            #print('vp1 ' + str(valid_preds1.shape[1]))
            train_preds1[val_idx] = valid_preds1
            train_preds2[val_idx] = valid_preds2
        return train_preds1, train_preds2
    
    def _fit(self, X_train, y_train, X_val, y_val, n_epochs):
        seed_torch(self.seed)
        train_dataset = IMetDataset(X_train, labels=y_train, device=self.device)
        train_loader = data.DataLoader(train_dataset, 
                                       batch_size=self.train_batch,
                                       shuffle=True)

        valid_dataset = IMetDataset(X_val, labels=y_val, device=self.device)
        valid_loader = data.DataLoader(valid_dataset,
                                       batch_size=self.valid_batch,
                                       shuffle=False)
        
        model1 = self.model1(**self.kwargs)
        model1.to(self.device)
        
        model2 = self.model2(**self.kwargs)
        model2.to(self.device)
        
        optimizer1 = optim.Adam(params=model1.parameters(), 
                                lr=0.0001)
        optimizer2 = optim.Adam(params=model2.parameters(), 
                                lr=0.0001)
        scheduler1 = CosineAnnealingLR(optimizer1, T_max=n_epochs)
        scheduler2 = CosineAnnealingLR(optimizer2, T_max=n_epochs)
        best_score1 = np.inf
        best_score2 = np.inf
        mb = master_bar(range(n_epochs))
        for epoch in mb:
            model1.train()
            model2.train()
            avg_loss1 = 0.0
            avg_loss2 = 0.0
            for i_batch, y_batch in progress_bar(train_loader, parent=mb):
                #i_batch, y_batch = mixup(i_batch, y_batch, beta(1.0, 1.0))
                y_pred1 = model1(i_batch)
                y_pred2 = model2(i_batch)
                loss1 = self.loss_fn(y_pred1, y_batch[0])
                loss2 = self.loss_fn(y_pred2, y_batch[1])
                optimizer1.zero_grad()
                optimizer2.zero_grad()
                loss1.backward()
                loss2.backward()
                optimizer1.step()
                optimizer2.step()
                avg_loss1 += loss1.item() / len(train_loader)
                avg_loss2 += loss2.item() / len(train_loader)
            valid_preds1, avg_val_loss1, valid_preds2, avg_val_loss2 = self._val(valid_loader, model1, model2)
            scheduler1.step()
            scheduler2.step()

            self.logger.info("=========================================")
            self.logger.info(f"Epoch {epoch + 1} / {n_epochs}")
            self.logger.info("=========================================")
            self.logger.info(f"avg_loss: {avg_loss1:.8f}")
            self.logger.info(f"avg_val_loss: {avg_val_loss1:.8f}")
            self.logger.info(f"avg_loss: {avg_loss2:.8f}")
            self.logger.info(f"avg_val_loss: {avg_val_loss2:.8f}")
            
            if best_score1 > avg_val_loss1:
                torch.save(model1.state_dict(),
                           self.path1 / f"1best{self.fold_num}.pth")
                self.logger.info(f"Save model at Epoch {epoch + 1}")
                best_score1 = avg_val_loss1
                
            if best_score2 > avg_val_loss2:
                torch.save(model2.state_dict(),
                           self.path2 / f"2best{self.fold_num}.pth")
                self.logger.info(f"Save model at Epoch {epoch + 1}")
                best_score2 = avg_val_loss2
                
        model1.load_state_dict(torch.load(self.path1 / f"1best{self.fold_num}.pth"))
        model2.load_state_dict(torch.load(self.path2 / f"2best{self.fold_num}.pth"))
        
        valid_preds1, avg_val_loss1, valid_preds2, avg_val_loss2 = self._val(valid_loader, model1, model2)
        #print('vpp'+str(valid_preds1.shape[1]))
        #self.logger.info(f"Best Validation Loss: {avg_val_loss:.8f}")
        return valid_preds1, valid_preds2
    
    def _val(self, loader, model1, model2):
        model1.eval()
        model2.eval()
        valid_preds1 = np.zeros((len(loader.dataset), num_classes_c))
        valid_preds2 = np.zeros((len(loader.dataset), num_classes_t))
        avg_val_loss1 = 0.0
        avg_val_loss2 = 0.0
        for i, (i_batch, y_batch) in enumerate(loader):
            with torch.no_grad():
                y_pred1 = model1(i_batch).detach()
                avg_val_loss1 += self.loss_fn(y_pred1, y_batch[0]).item() / len(loader)
                valid_preds1[i * self.valid_batch:(i + 1) * self.valid_batch] = \
                    y_pred1.cpu().numpy()
                y_pred2 = model2(i_batch).detach()
                avg_val_loss2 += self.loss_fn(y_pred2, y_batch[1]).item() / len(loader)
                valid_preds2[i * self.valid_batch:(i + 1) * self.valid_batch] = \
                    y_pred2.cpu().numpy()
        #print('vp1'+str(valid_preds1.shape[1]))
        return valid_preds1, avg_val_loss1, valid_preds2, avg_val_loss2
    
    def predict(self, X):
        #print('pred')
        dataset = IMetDataset(X, labels=None)
        loader = data.DataLoader(dataset, 
                                 batch_size=self.valid_batch, 
                                 shuffle=False)
        model1 = self.model1(**self.kwargs)
        model2 = self.model2(**self.kwargs)
        preds1 = np.zeros((X.size(0), num_classes_c))
        #print(list(self.path1.iterdir()))
        for path in self.path1.iterdir():
            with timer(f"Using {str(path)}", self.logger):
                model1.load_state_dict(torch.load(path))
                model1.to(self.device)
                model1.eval()
                temp1 = np.zeros_like(preds1)
                #print('try')
                for i, (i_batch, ) in enumerate(loader):
                    with torch.no_grad():
                        y_pred1 = model1(i_batch).detach()
                        #print(y_pred1[y_pred1 != 0])
                        temp1[i * self.valid_batch:(i + 1) * self.valid_batch] = \
                            y_pred1.cpu().numpy()
                preds1 += temp1 / self.n_splits
        preds2 = np.zeros((X.size(0), num_classes_t))
        for path in self.path2.iterdir():
            with timer(f"Using {str(path)}", self.logger):
                model2.load_state_dict(torch.load(path))
                model2.to(self.device)
                model2.eval()
                temp2 = np.zeros_like(preds2)
                for i, (i_batch, ) in enumerate(loader):
                    with torch.no_grad():
                        y_pred2 = model2(i_batch).detach()
                        temp2[i * self.valid_batch:(i + 1) * self.valid_batch] = \
                            y_pred2.cpu().numpy()
                preds2 += temp2 / self.n_splits
        return preds1, preds2

In [177]:
trainer = Trainer(MultiLayerPerceptron1, MultiLayerPerceptron2, logger, train_batch=64, kwargs={})

In [171]:
from sklearn.model_selection import train_test_split
y = train.attribute_ids.map(lambda x: x.split()).values
valid_preds1, valid_preds2 = trainer.fit(train_tensor, y, n_epochs=40)

2019-05-19 16:25:28,854 Main INFO Fold 1


2019-05-19 16:25:57,916 Main INFO Epoch 1 / 1
2019-05-19 16:25:57,918 Main INFO avg_loss: 0.02138878
2019-05-19 16:25:57,922 Main INFO avg_val_loss: 0.01234947
2019-05-19 16:25:57,923 Main INFO avg_loss: 0.02634290
2019-05-19 16:25:57,924 Main INFO avg_val_loss: 0.01663493
2019-05-19 16:25:57,941 Main INFO Save model at Epoch 1
2019-05-19 16:25:57,957 Main INFO Save model at Epoch 1
2019-05-19 16:26:02,785 Main INFO Fold 2


2019-05-19 16:26:31,897 Main INFO Epoch 1 / 1
2019-05-19 16:26:31,900 Main INFO avg_loss: 0.02133593
2019-05-19 16:26:31,902 Main INFO avg_val_loss: 0.01238627
2019-05-19 16:26:31,904 Main INFO avg_loss: 0.02629573
2019-05-19 16:26:31,906 Main INFO avg_val_loss: 0.01663687
2019-05-19 16:26:31,920 Main INFO Save model at Epoch 1
2019-05-19 16:26:31,934 Main INFO Save model at Epoch 1
2019-05-19 16:26:36,797 Main INFO Fold 3


2019-05-19 16:27:05,763 Main INFO Epoch 1 / 1
2019-05-19 16:27:05,767 Main INFO avg_loss: 0.02114178
2019-05-19 16:27:05,768 Main INFO avg_val_loss: 0.01261050
2019-05-19 16:27:05,769 Main INFO avg_loss: 0.02610728
2019-05-19 16:27:05,773 Main INFO avg_val_loss: 0.01681555
2019-05-19 16:27:05,789 Main INFO Save model at Epoch 1
2019-05-19 16:27:05,804 Main INFO Save model at Epoch 1
2019-05-19 16:27:10,612 Main INFO Fold 4


2019-05-19 16:27:39,707 Main INFO Epoch 1 / 1
2019-05-19 16:27:39,711 Main INFO avg_loss: 0.02080979
2019-05-19 16:27:39,713 Main INFO avg_val_loss: 0.01306381
2019-05-19 16:27:39,715 Main INFO avg_loss: 0.02573812
2019-05-19 16:27:39,717 Main INFO avg_val_loss: 0.01731276
2019-05-19 16:27:39,733 Main INFO Save model at Epoch 1
2019-05-19 16:27:39,749 Main INFO Save model at Epoch 1
2019-05-19 16:27:44,665 Main INFO Fold 5


2019-05-19 16:28:13,759 Main INFO Epoch 1 / 1
2019-05-19 16:28:13,763 Main INFO avg_loss: 0.02026352
2019-05-19 16:28:13,765 Main INFO avg_val_loss: 0.01399481
2019-05-19 16:28:13,768 Main INFO avg_loss: 0.02508862
2019-05-19 16:28:13,771 Main INFO avg_val_loss: 0.01857798
2019-05-19 16:28:13,786 Main INFO Save model at Epoch 1
2019-05-19 16:28:13,801 Main INFO Save model at Epoch 1


In [172]:
def threshold_search(y_pred, y_true):
    score = []
    candidates = np.arange(0, 1.0, 0.01)
    for th in progress_bar(candidates):
        yp = (y_pred > th).astype(int)
        score.append(fbeta_score(y_pred=yp, y_true=y_true, beta=2, average="samples"))
    score = np.array(score)
    pm = score.argmax()
    best_th, best_score = candidates[pm], score[pm]
    return best_th, best_score

In [173]:
y_true = np.zeros((train.shape[0], 1103)).astype(int)
for i, row in enumerate(y):
    for idx in row:
        y_true[i, int(idx)] = 1

In [174]:
best_threshold1, best_score1 = threshold_search(valid_preds1, targets_c)
best_score1
best_threshold2, best_score2 = threshold_search(valid_preds2, targets_t)
best_score2

  'precision', 'predicted', average, warn_for)


0.2195233321503681

In [178]:
test_preds1, test_preds2  = trainer.predict(test_tensor)

pred
[PosixPath('bin1/2019-05-19-16-25-25/1best3.pth'), PosixPath('bin1/2019-05-19-16-25-25/1best2.pth'), PosixPath('bin1/2019-05-19-16-25-25/1best1.pth'), PosixPath('bin1/2019-05-19-16-25-25/1best0.pth'), PosixPath('bin1/2019-05-19-16-25-25/1best4.pth')]
try
tensor([2.5248e-04, 1.0047e-03, 2.0699e-04,  ..., 2.0171e-05, 1.8681e-04,
        1.1756e-01], device='cuda:0')
tensor([1.0845e-04, 6.5119e-04, 9.8716e-05,  ..., 2.2761e-06, 7.9371e-05,
        2.3472e-01], device='cuda:0')
tensor([3.1960e-04, 1.0539e-03, 3.1949e-04,  ..., 1.4618e-06, 4.1715e-05,
        3.9006e-02], device='cuda:0')
tensor([1.0588e-04, 5.9860e-04, 9.3018e-05,  ..., 1.0097e-05, 1.6525e-04,
        4.9028e-02], device='cuda:0')
tensor([0.0002, 0.0007, 0.0002,  ..., 0.0001, 0.0009, 0.0607], device='cuda:0')
tensor([1.4686e-04, 7.4334e-04, 1.7538e-04,  ..., 2.4546e-06, 6.3998e-05,
        9.9638e-02], device='cuda:0')
tensor([1.5932e-04, 6.3283e-04, 1.7209e-04,  ..., 2.1084e-05, 3.3003e-04,
        2.0034e-01], devic

tensor([1.4523e-04, 7.5082e-04, 1.3365e-04,  ..., 4.0698e-05, 4.5111e-04,
        1.2616e-01], device='cuda:0')
tensor([2.0306e-04, 6.7481e-04, 1.6195e-04,  ..., 1.1499e-05, 1.8538e-04,
        4.0496e-02], device='cuda:0')
tensor([1.7492e-04, 8.4895e-04, 1.5897e-04,  ..., 1.1469e-05, 1.7444e-04,
        7.5672e-02], device='cuda:0')
tensor([1.3471e-04, 5.0354e-04, 1.3491e-04,  ..., 3.3929e-05, 3.3235e-04,
        8.6545e-02], device='cuda:0')
tensor([1.7555e-04, 6.6057e-04, 1.6349e-04,  ..., 7.5237e-06, 9.6806e-05,
        8.3095e-02], device='cuda:0')
tensor([2.2843e-04, 1.0647e-03, 2.2277e-04,  ..., 5.1600e-05, 4.0816e-04,
        9.2395e-02], device='cuda:0')
tensor([7.5698e-05, 2.0190e-04, 6.1976e-05,  ..., 5.4642e-06, 1.1409e-04,
        8.6126e-02], device='cuda:0')
tensor([1.4171e-04, 6.8432e-04, 1.5590e-04,  ..., 1.4096e-05, 1.6877e-04,
        7.7436e-02], device='cuda:0')
tensor([1.6728e-04, 6.6626e-04, 1.9981e-04,  ..., 7.8692e-05, 6.3388e-04,
        7.4540e-02], device='c

try
tensor([1.6519e-04, 4.3715e-04, 7.8689e-05,  ..., 3.2086e-05, 1.5128e-04,
        1.1245e-01], device='cuda:0')
tensor([2.4015e-04, 5.3722e-04, 1.1349e-04,  ..., 3.5127e-05, 1.8444e-04,
        1.0318e-01], device='cuda:0')
tensor([3.5212e-04, 7.2613e-04, 1.9520e-04,  ..., 2.0357e-05, 1.0906e-04,
        1.0010e-01], device='cuda:0')
tensor([1.2023e-04, 3.4939e-04, 5.6232e-05,  ..., 1.7963e-05, 1.0899e-04,
        1.0362e-01], device='cuda:0')
tensor([1.9896e-04, 4.8297e-04, 1.0644e-04,  ..., 4.5298e-05, 2.2285e-04,
        1.1393e-01], device='cuda:0')
tensor([1.9802e-04, 5.1493e-04, 1.0400e-04,  ..., 2.5358e-05, 1.4228e-04,
        1.0459e-01], device='cuda:0')
tensor([3.0758e-04, 7.7911e-04, 1.4876e-04,  ..., 9.6787e-05, 4.3341e-04,
        1.3379e-01], device='cuda:0')
tensor([2.7259e-04, 6.9817e-04, 1.3811e-04,  ..., 8.9808e-05, 4.5641e-04,
        1.3972e-01], device='cuda:0')
tensor([2.4778e-04, 6.7353e-04, 1.3230e-04,  ..., 4.8435e-05, 2.3615e-04,
        1.1443e-01], devic

tensor([1.9163e-04, 8.3461e-04, 2.5252e-04,  ..., 3.3172e-05, 4.5772e-04,
        5.0903e-02], device='cuda:0')
tensor([1.8435e-04, 1.3276e-03, 1.9494e-04,  ..., 3.0256e-07, 1.3414e-05,
        1.2778e-02], device='cuda:0')
tensor([0.0001, 0.0010, 0.0002,  ..., 0.0001, 0.0018, 0.0492], device='cuda:0')
tensor([7.6155e-04, 1.4865e-03, 7.0449e-04,  ..., 1.3889e-05, 2.1724e-04,
        1.3569e-01], device='cuda:0')
tensor([5.1897e-05, 1.2862e-04, 2.4960e-05,  ..., 1.3672e-05, 2.2059e-04,
        1.3430e-01], device='cuda:0')
tensor([2.3301e-04, 6.4679e-04, 1.9782e-04,  ..., 1.3718e-06, 3.7137e-05,
        6.2916e-02], device='cuda:0')
tensor([5.4898e-05, 4.6627e-04, 6.1194e-05,  ..., 2.7543e-06, 9.4776e-05,
        3.4110e-02], device='cuda:0')
tensor([0.0002, 0.0008, 0.0001,  ..., 0.0001, 0.0018, 0.0502], device='cuda:0')
tensor([3.1601e-05, 8.6750e-05, 1.8574e-05,  ..., 1.9647e-05, 2.7204e-04,
        8.4344e-02], device='cuda:0')
tensor([1.7159e-04, 4.3773e-04, 1.2030e-04,  ..., 7.9918

In [179]:
preds1 = (test_preds1 > best_threshold1).astype(int)
preds2 = (test_preds2 > best_threshold2).astype(int)

In [180]:
prediction = []
for i in range(preds1.shape[0]):
    pred1 = [i for i in np.argwhere(preds1[i] == 1.0).reshape(-1).tolist() if i != (num_classes_c - 1)]
    pred2 = [(i + num_classes_c - 1) for i in np.argwhere(preds2[i] == 1.0).reshape(-1).tolist() if i != (num_classes_c + num_classes_t - 2)]
    pred_str = " ".join(list(map(str, pred1 + pred2)))
    prediction.append(pred_str)
#print(test_preds1[test_preds1 != 0])
sample.attribute_ids = prediction
sample.to_csv("submission.csv", index=False)
sample.head()

[2.16447306e-04 8.33356660e-04 1.56778563e-04 ... 4.32497574e-05
 4.06892644e-04 1.23885095e-01]


Unnamed: 0,id,attribute_ids
0,10023b2cc4ed5f68,13 121 147 189 369 671 813 1039 1059 1092
1,100fbe75ed8fd887,13 121 147 671 780 813 1059 1092
2,101b627524a04f19,121 147 189 813 1059 1092
3,10234480c41284c6,13 51 147 189 480 483 501 671 737 738 776 813 ...
4,1023b0e2636dcea8,51 79 147 189 671 780 813 1059 1092
