In [3]:
import sys
import os
import cv2
import pdb
import time
import warnings
import random
import math
import warnings

import numpy as np
import pandas as pd

from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from torchcontrib.optim import SWA
import torchvision.models as models

from matplotlib import pyplot as plt
import albumentations as albu
warnings.filterwarnings("ignore")

import nvidia_smi
from apex import amp

In [None]:
from scheduler import CosineAnnealingLR_with_Restart

In [4]:
def get_transforms(phase):
    list_transforms = []
    if phase == "train":
        list_transforms.extend(
            [
                albu.HorizontalFlip(p=0.5)
            ]
        )
    list_transforms.extend(
        [
            ToTensor()
        ]
    )
    list_trfms = albu.Compose(list_transforms)
    return list_trfms

class SteelDataset(Dataset):
    def __init__(self, df, data_folder, phase):
        self.df = df
        self.root = data_folder
        self.phase = phase
        self.transforms = get_transforms(phase)
        self.fnames = self.df.index.tolist()

    def __getitem__(self, idx):
        
        image_id = self.df.iloc[idx].name
        label = float(self.df.iloc[idx][:4].notnull().values.any()) 
        image_path = os.path.join(self.root, "train_images",  image_id)
        img = cv2.imread(image_path)
        
        augmented = self.transforms(image=img)
        img = augmented['image']
        return img, label

    def __len__(self):
        return len(self.fnames)
    
def _init_fn(worker_id):
    np.random.seed(int(SEED))
    
def provider(
    train_df,
    val_df,
    phase,
    batch_size=16,
    num_workers=4,
):
    '''Returns dataloader for the model training'''

    df = train_df if phase == "train" else val_df
    image_dataset = SteelDataset(df, data_folder, phase)
    dataloader = DataLoader(
        image_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=False,
        shuffle=True,   
        worker_init_fn = _init_fn
    )
    
    return dataloader

In [5]:
def epoch_log(phase, epoch, epoch_loss, start):
    '''logging the metrics at the end of an epoch'''
    
    print("Loss: %0.4f" % (epoch_loss))


In [6]:
class Trainer(object):
    '''This class takes care of training and validation of our model'''
    def __init__(self, model, train_df, val_df, fold):
          
        self.lr = 5e-4
        optimizer = SWA(optim.Adam(model.parameters(), lr=self.lr)) 
        
        self.net, self.optimizer = amp.initialize(
            model.to('cuda:0'), optimizer, opt_level="O2", 
            keep_batchnorm_fp32=True, loss_scale="dynamic")
            
        self.fold = fold
        self.num_workers = 0
        self.batch_size = {"train": 4, "val": 4}
        self.accumulation_steps = 32 // self.batch_size['train']
        self.num_epochs = 40
        self.best_loss = float("inf")
        self.phases = ["train", "val"]
        self.device = torch.device("cuda:0")
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
        self.net = model
        self.criterion = nn.BCELoss()
        self.scheduler = CosineAnnealingLR_with_Restart(
            self.optimizer,
            T_max=6, 
            T_mult=1, 
            model=model, 
            out_dir=f'segmentation_fold_{fold}', 
            take_snapshot=True, 
            eta_min=1e-6)
        self.net = self.net.to(self.device)
        cudnn.benchmark = True
        self.dataloaders = {
            phase: provider(
                train_df=train_df,
                val_df=val_df,
                phase=phase,
                batch_size=self.batch_size[phase],
                num_workers=self.num_workers,
            )
            for phase in self.phases
        }
        self.losses = {phase: [] for phase in self.phases}
        
    def forward(self, images, targets):
        images = images.to(self.device)
        labels = targets.to(self.device).float()
        outputs = self.net(images)
        loss = self.criterion(torch.sigmoid(outputs), labels)
        return loss, outputs

    def iterate(self, epoch, phase):
        
        start = time.strftime("%H:%M:%S")
        print(f"Starting epoch: {epoch} | phase: {phase} | ⏰: {start}")
        
        batch_size = self.batch_size[phase]
        self.net.train(phase == "train")
        dataloader = self.dataloaders[phase]
        running_loss = 0.0
        total_batches = len(dataloader)
        self.optimizer.zero_grad()
    
        for itr, batch in enumerate(dataloader):
            images, targets = batch
            loss, outputs = self.forward(images, targets)
            loss = loss / self.accumulation_steps
            if phase == "train":
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()
                #loss.backward()
                if (itr + 1 ) % self.accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()
            running_loss += loss.item()
            outputs = outputs.detach().cpu()

        epoch_loss = (running_loss * self.accumulation_steps) / total_batches     
        epoch_log(phase, epoch, epoch_loss, start)   
        self.losses[phase].append(epoch_loss)
        
        nvidia_smi.nvmlInit()
        handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
        # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
        
        torch.cuda.empty_cache()
        
        return epoch_loss

    def start(self):
        for epoch in range(self.num_epochs):
            self.iterate(epoch, "train")
            state = {
                "epoch": epoch,
                "best_loss": self.best_loss,
                "state_dict": self.net.state_dict(),
                "optimizer": self.optimizer.state_dict(),
                'amp': amp.state_dict()
            }
            
            #to prevent GPU memory from overflowing on validation
            with torch.no_grad():
                val_loss = self.iterate(epoch, "val")
                self.scheduler.step(val_loss)
                
            if val_loss < self.best_loss:
                print("******** New optimal found, saving state ********")
                state["best_loss"] = self.best_loss = val_loss
                torch.save(state, f"./model{self.fold}.pth")
            print()
            if epoch > 10 and epoch % 4 == 0:
                self.optimizer.update_swa()
        self.optimizer.swap_swa_sgd()


In [7]:
SEED = 69
random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False 

In [10]:
sample_submission_path = './data/sample_submission.csv'
train_df_path = './data/train.csv'
data_folder = "./data/"
test_data_folder = "./data/test_images"

In [11]:
df = pd.read_csv(train_df_path)
df['ImageId'], df['ClassId'] = zip(*df['ImageId_ClassId'].str.split('_'))
df['ClassId'] = df['ClassId'].astype(int)
df = df.pivot(index='ImageId',columns='ClassId',values='EncodedPixels')
df['defects'] = df.count(axis=1)
df['defects'] = df['defects'].apply(lambda x : int(x >= 1))
    
train_df, validation_df = train_test_split(df, test_size=0.1, stratify=df["defects"], random_state=SEED)

In [16]:
from sklearn.model_selection import KFold

folds = 6

kf = KFold(n_splits=folds, random_state=SEED, shuffle=True)

for i, (train_index, test_index) in enumerate(kf.split(train_df)):
    model = models.resnext50_32x4d(classes=1)
    model_trainer = Trainer(model, train_df.iloc[train_index], train_df.iloc[test_index], i+1)
    model_trainer.start()
    del(model_trainer)

In [36]:
class ValidationDataset(Dataset):
    '''Dataset for test prediction'''
    def __init__(self, root, df):
        self.root = root
        #df['ImageId'] = df['ImageId_ClassId'].apply(lambda x: x.split('_')[0])
        self.fnames = df['ImageId'].unique().tolist()
        self.num_samples = len(self.fnames)
        self.transform = albu.Compose(
            [
                ToTensor(),
            ]
        )

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        path = os.path.join(self.root, fname)
        image = cv2.imread(path)
        images = self.transform(image=image)["image"]
        return fname, images

    def __len__(self):
        return self.num_samples

In [37]:
batch_size = 4

validation_df['ImageId'] = validation_df['defects'].keys()

validationset = DataLoader(
    ValidationDataset(f'{test_data_folder}/', validation_df),
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=False
)

In [42]:
import glob
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix
import operator

checkpoints = []

for fold in range(folds):
    
    out_dir = f'segmentation_fold_{fold+1}'
    checkpoints = sorted(glob.glob(out_dir + '/*.tar'))

In [23]:
def load_model(path):
    model = resnet34(num_classes=1)
    ch = torch.load(path, map_location={'cuda:0':'cpu'})
    model.load_state_dict(ch['state_dict'])
    return model

In [24]:
models = [load_model(ch) for ch in checkpoints]

In [45]:
def predict(validationset, model):
    predictions = []
    device = torch.device("cuda")
    for i, batch in enumerate(tqdm(validationset)):
        fnames, images = batch
        batch_preds = torch.sigmoid(model(images.to(device)))
        batch_preds = batch_preds.detach().cpu().numpy()
        for fname, preds in zip(fnames, batch_preds):
            for cls, pred in enumerate(preds):
                predictions.append([fname, pred])

    return predictions

In [46]:
def model_results(preds, preds_class, true_labels):
    f1_thresholds = {}
    roc_auc_thresholds = {}
    
    ROC_AUC = roc_auc_score(true_labels, preds)
    F1_score = f1_score(true_labels, preds_class)
    
    print(f'Default probability ROC AUC: {round(ROC_AUC, 3)}')
    print(f'Default ROC AUC: {round(roc_auc_score(true_labels, preds_class), 3)}')
    print(f'Default F1 score: {round(F1_score, 3)}')
    print()
    print('Confusion matrix: ')
    print(confusion_matrix(true_labels, preds_class))
    
    # checking threshold 
    for thresh in np.arange(0.005, 0.95, 0.001): #change step to 0.01 
        thresh = np.round(thresh, 2)
        F1_score = f1_score(true_labels, (preds>thresh).astype(int))
        ROC_AUC = roc_auc_score(true_labels, (preds>thresh).astype(int))
        
        f1_thresholds.update({thresh: F1_score})
        roc_auc_thresholds.update({thresh: ROC_AUC})
        
    max_f1 = max(f1_thresholds.items(), key=operator.itemgetter(1))[1]
    best_f1_thresh = max(f1_thresholds.items(), key=operator.itemgetter(1))[0]
    
    max_roc_auc = max(roc_auc_thresholds.items(), key=operator.itemgetter(1))[1]
    best_roc_auc_thresh = max(roc_auc_thresholds.items(), key=operator.itemgetter(1))[0]
    
    best_preds_class = (preds>best_f1_thresh).astype(int)

    print()
    print(f'Best roc-auc score: {round(max_roc_auc, 3)} with threshold {best_roc_auc_thresh}')
    print(f'Best f1 score: {round(max_f1, 3)} with threshold {best_f1_thresh}')
       
    print()
    print('Updated Confusion Matrix: ')
    print(confusion_matrix(true_labels, best_preds_class))
    
    return preds, best_preds_class

In [47]:
def validate(predictions): 
    df = pd.DataFrame(predictions, columns=['ImageId', 'Defected'])
    df['Class'] = df['Defected'].apply(lambda x : int(x >= 0.85)) 
    model_results(df['Defected'].values, df['Class'].values, validation_df['defects'].values)

In [None]:
for i, (model) in enumerate(models):
    print(F'Model: {i}')
    
    device = torch.device("cuda")
    model.to(device)
    model.eval()

    predictions_df = predict(validationset, model)
    
    validate(predictions_df)