In [None]:
import os
import sys
sys.path.insert(0,'../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler, SequentialSampler
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import albumentations as A

# some hyperparameters

In [None]:
train_path = '../input/issm2020-ai-challenge/semTrain/semTrain'
epoch = 10
img_image = 384
model_name = 'tf_efficientnet_b4'
fold = 5
class_num = 10 
device = 'cuda'
batch_size = 15
val_batch_size = 3
experience = True

## dataset

In [None]:
# make dataset
train_data = []
for target in os.listdir(train_path):
    for image in os.listdir(os.path.join(train_path,target)):
        train_data.append([os.path.join(train_path,target,image),int(target)-1])
df_train = pd.DataFrame(train_data,columns=['image','target'],index=None)

In [None]:
# create dataset
class IssmDataset(Dataset):
    def __init__(self, df, mode='train',transform=None):
        self.df = df
        self.mode = mode
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self,index):
        row = self.df.iloc[index]
        img,label = row.image, row.target
        img = cv2.imread(img)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        if self.transform:
            img = self.transform(image=img)['image'].astype(np.float32)
        else:
            img = img.astype(np.float32)
        img = img.transpose(2,0,1)
        data = torch.tensor(img).float()
        if self.mode == 'test':
            return data
        else:
            return data, torch.tensor(label).long()

In [None]:
# #test
# checkset = df_train.reset_index(drop=True)
# cdataset = IssmDataset(checkset)
# from pylab import rcParams
# rcParams['figure.figsize'] = 15,10
# for i in range(3):
#     _,pos = plt.subplots(1,4)
#     for j in range(4):
#         idx = np.random.randint(len(cdataset))
#         img, label = cdataset[idx]
#         # print(img)
#         img /= 255
#         pos[j].imshow(img.transpose(0,1).transpose(1,2))
#         pos[j].set_title(label)
#         pos[j].axis('off')
#         # plt.show()

## augmentation

In [None]:
train_transform = A.Compose([
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightness(limit=0.05,p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1,scale_limit=0.1,p=0.3),
    A.OneOf([
        A.MotionBlur(blur_limit=5),
        A.MedianBlur(blur_limit=5),
        A.GaussianBlur(blur_limit=5,sigma_limit=0.1),
        A.GaussNoise(var_limit=(5.0, 30.0)),
    ], p=0.5),
    A.OneOf([
        A.OpticalDistortion(distort_limit=1.0),
        A.GridDistortion(num_steps=5, distort_limit=1.),
        A.ElasticTransform(alpha=3),
    ], p=0.5),
    A.Resize(img_image,img_image),
    A.Cutout(max_h_size=int(img_image * 0.1), max_w_size=int(img_image * 0.1), num_holes=2, p=0.7),    
    A.Normalize()
    ])

test_transform = A.Compose([
    A.Resize(img_image,img_image),
    A.Normalize()]
)

In [None]:
# #test
# checkset = df_train.reset_index(drop=True)
# cdataset = IssmDataset(checkset,transform=train_transform)
# from pylab import rcParams
# rcParams['figure.figsize'] = 15,10
# for i in range(3):
#     _,pos = plt.subplots(1,4)
#     for j in range(4):
#         idx = np.random.randint(len(cdataset))
#         img, label = cdataset[idx]
#         # img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
#         # print(img)
#         # img /= 255
#         pos[j].imshow(img.transpose(0,1).transpose(1,2))
#         pos[j].set_title(label)
#         pos[j].axis('off')
#         # plt.show()

In [None]:
def get_model(model_name, out_features, drop_rate=0.5):
    model = timm.create_model(model_name, pretrained=True)
    model.drop_rate = drop_rate
    model.classifier = nn.Linear(model.classifier.in_features,out_features)
    return model

eff_model = get_model(model_name,10)
eff_model.to(device)


In [None]:
class LabelSmoothLoss(nn.Module):
    def __init__(self, smoothing=0.0):
        super(LabelSmoothLoss, self).__init__()
        self.smoothing = smoothing
    
    def forward(self, input, target):
        log_prob = F.log_softmax(input, dim=-1)
        weight = input.new_ones(input.size()) * \
            self.smoothing / (input.size(-1) - 1.)
        weight.scatter_(-1, target.unsqueeze(-1), (1. - self.smoothing))
        loss = (-weight * log_prob).sum(dim=-1).mean()
        return loss

In [None]:
optimizer = optim.SGD(eff_model.parameters(),lr=0.01,momentum=0.9)
schduler = CosineAnnealingLR(optimizer, T_max=epoch)
# criterion = nn.CrossEntropyLoss() 
criterion = LabelSmoothLoss()

In [None]:
def train_one_epoch(model,optimizer,criterion,data_loader):
    print('run No.{} epoches, lr = {}'.format(epoch,optimizer.param_groups[0]['lr']))
    model.train()
    train_loss = []
    for data,label in data_loader:
        optimizer.zero_grad()
        data,label = data.to(device),label.to(device)
        pred = model(data)
        loss = criterion(pred, label)
        loss.backward()
        optimizer.step()
        loss_np = loss.detach().cpu().numpy()
        train_loss.append(loss_np)
        average_loss = sum(train_loss[-100:])/min(len(train_loss),100)
        print('average loss = {}, loss = {}'.format(average_loss, loss))


In [None]:
def val_epoch(model,optimizer,criterion,data_loader):
    model.eval()
    loss_sum = []
    with torch.no_grad():
        for data,label in data_loader:
            data,label = data.to(device),label.to(device)
            pred = model(data)
            loss = criterion(pred, label)
            loss_np = loss.detach().cpu().numpy()
            loss_sum.append(loss_np)
            average_loss = sum(loss_sum)/len(loss_sum)
        print('average loss = {}'.format(average_loss))
        
    

# cross validation 

In [None]:
def preparedata(train_idx,val_idx,df=df_train):
    train_=df_train.loc[train_idx,:].reset_index(drop=True)
    val_=df_train.loc[val_idx,:].reset_index(drop=True)
    train_ds = IssmDataset(train_,'train',train_transform)
    val_ds = IssmDataset(val_,'test',test_transform)
    train_loader = DataLoader(train_ds,batch_size=batch_size,shuffle=True,num_workers=4)
    val_loader = DataLoader(val_ds,batch_size=val_batch_size,shuffle=False,num_workers=4)
    return train_loader,val_loader

In [None]:
from sklearn.model_selection import StratifiedKFold
kf = StratifiedKFold(n_splits=fold,shuffle=False)
for train_index , test_index in kf.split(df_train.image,df_train.target):
    train_loader,val_loader = preparedata(train_index,test_index)
    if not experience: 
        train_one_epoch(eff_model,optimizer,criterion,train_loader)
        val_epoch(eff_model,optimizer,criterion,val_loader)
    