Credit: https://www.kaggle.com/yasufuminakama/ranzcr-resnet200d-3-stage-training-step2 


In [1]:
import os
import sys
sys.path.append('../input/pytorch-images-seresnet')
import math
import time
import random
import numpy as np 
import cv2
from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingLR
import albumentations
from albumentations import *
from albumentations.pytorch import ToTensorV2
import timm
from torch.cuda.amp import autocast, GradScaler
import glob as glob
import pandas as pd
from contextlib import contextmanager

In [2]:
class CFG:
    device='GPU'
    nprocs=1
    print_freq=100
    num_workers=4
    model_name='resnet200d'
    teacher='../input/resnet200d-public/resnet200d_320_CV9632.pth'
    startpoint = '../input/resnet200d-pretrained-weight/resnet200d_ra2-bdba9bf9.pth'
    size=512
    scheduler='CosineAnnealingLR'
    epochs=1
    T_max=1
    lr=5e-4 
    min_lr=1e-6
    batch_size=16 
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=416
    target_size=11
    n_fold=5
    trn_fold=[0]
    train=True
    

In [3]:
train_files = glob.glob('../input/data/*/*/*.png')
data={'image_path':train_files}
folds = pd.DataFrame(data,columns=['image_path'])
folds

Unnamed: 0,image_path
0,../input/data/images_003/images/00006199_010.png
1,../input/data/images_003/images/00004833_016.png
2,../input/data/images_003/images/00006260_000.png
3,../input/data/images_003/images/00004911_010.png
4,../input/data/images_003/images/00004186_007.png
...,...
112115,../input/data/images_006/images/00012351_000.png
112116,../input/data/images_006/images/00012558_000.png
112117,../input/data/images_006/images/00011572_000.png
112118,../input/data/images_006/images/00011654_000.png


In [4]:
@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


def init_logger(log_file='train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [5]:
class TrainDataset(Dataset):
    def __init__(self, df, use_annot=False, annot_size=50, transform=None):
        self.df = df
        self.file_names = df['image_path'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = file_name
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [6]:
def get_transforms(*, data):
    
    if data == 'train':
            return Compose([
            Resize(CFG.size, CFG.size),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

In [7]:
class CustomResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, CFG.target_size)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return features

In [8]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, teacher_model, model, criterion, optimizer, epoch, scheduler, device):
    scaler = GradScaler()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (images) in enumerate(train_loader):
        data_time.update(time.time() - end)
        images = images.to(device)
        with torch.no_grad():
            teacher_features = teacher_model(images)
        
        batch_size = images.size(0)
        with autocast():
            features = model(images)
            loss = criterion(teacher_features, features)

            losses.update(loss.item(), batch_size)
            if CFG.gradient_accumulation_steps > 1:
                loss = loss / CFG.gradient_accumulation_steps
            scaler.scale(loss).backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
            if (step + 1) % CFG.gradient_accumulation_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                global_step += 1

        batch_time.update(time.time() - end)
        end = time.time()

        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   ))
    return losses.avg

In [9]:
def train_loop(folds, fold):
    
    train_folds = folds.reset_index(drop=True)


    train_dataset = TrainDataset(train_folds, use_annot=True,
                                 transform=get_transforms(data='train'))
    
    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=True, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
    teacher_model = CustomResNet200D(CFG.model_name, pretrained=False)
    teacher_model.load_state_dict(torch.load(CFG.teacher)['model'])
    for param in teacher_model.parameters():
        param.requires_grad = False
    teacher_model.eval()
    teacher_model.to(device)
    
    model = CustomResNet200D(CFG.model_name, pretrained=True)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)

    train_criterion = nn.MSELoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        start_time = time.time()

        avg_loss = train_fn(train_loader, teacher_model, model, train_criterion, optimizer, epoch, scheduler, device)

        scheduler.step()


        elapsed = time.time() - start_time

        if CFG.device == 'GPU':
            LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}   time: {elapsed:.0f}s')
        
           
        if avg_loss < best_loss:
            best_loss = avg_loss      
            LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save({'model': model.state_dict()},
                       f'{CFG.model_name}_fold{fold}_best_loss.pth')

In [10]:
def main():
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                train_loop(folds, fold)

In [11]:
# if __name__ == '__main__':
#     main()