In [None]:
!pip install "../input/pretrained-models/pretrained-models.pytorch-master"

In [None]:
# Import libraries
import os
import time
import random
import pandas as pd
import matplotlib.pyplot as plt
import json
import cv2
import albumentations as albu
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold, train_test_split
import albumentations as albu
from albumentations.pytorch import ToTensorV2


import pretrainedmodels

In [None]:


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(42)

In [None]:
train_path = "../input/ranzcr-clip-catheter-line-classification/train"
train_files = os.listdir(train_path)

test_path = "../input/ranzcr-clip-catheter-line-classification/test"
test_files = os.listdir(test_path)

train_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')
submission = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')



In [None]:
train_df.head()

In [None]:
train, valid = train_test_split(
    train_df, 
    test_size=0.15, 
    random_state=42,
)

# reset index on both dataframes
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

print(train.shape)
print(valid.shape)

In [None]:
#Some basic augs
#Try more augs and other img_size for better results

img_size = 256

train_augs = albu.Compose([
    albu.Resize(height=img_size, width=img_size, p=1.0),
    #albu.Cutout(p=0.1, num_holes=10,  max_h_size=8, max_w_size=8),
    albu.Normalize(    
        mean=[0.5],
        std=[0.5],),
    ToTensorV2(),
])

valid_augs = albu.Compose([
    albu.Resize(height=img_size, width=img_size, p=1.0),
    albu.Normalize(
        mean=[0.5],
        std=[0.5],),
    ToTensorV2(),
])

In [None]:
class RanzcrDataset(Dataset):
    
    def __init__(self, files_folder_path, df, transfroms = None):
        self.files_folder_path = files_folder_path
        self.df = df
        self.transforms = transfroms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        image_id = self.df.StudyInstanceUID.values[idx]
        image = cv2.imread(os.path.join(self.files_folder_path, image_id + ".jpg" ), 0)
    
        if self.transforms:
            image = self.transforms(image=image)['image']
            
        labels = self.df[self.df.StudyInstanceUID == image_id].values.tolist()[0][1:-1]
        labels = torch.tensor(labels,dtype= torch.float32) #.view(1,-1)
        
        return image, labels
    
        

In [None]:
trainset = RanzcrDataset(train_path,  train, train_augs)
validset = RanzcrDataset(train_path,  valid, valid_augs)

#Try diff params for Dataloader too
trainloader = DataLoader(trainset, batch_size = 32, num_workers = 4, shuffle = True)
validloader = DataLoader(validset, batch_size = 32, num_workers = 4, shuffle = False)

To work with gray images we need to patch first conv layer from 3 input channels to 1. 

In [None]:
def patch_first_conv(model, in_channels):
    """Change first convolution layer input channels.
    In case:
        in_channels == 1 or in_channels == 2 -> reuse original weights
        in_channels > 3 -> make random kaiming normal initialization
    """

    # get first conv
    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            break

    # change input channels for first conv
    module.in_channels = in_channels
    weight = module.weight.detach()
    reset = False

    if in_channels == 1:
        weight = weight.sum(1, keepdim=True)
    elif in_channels == 2:
        weight = weight[:, :2] * (3.0 / 2.0)
    else:
        reset = True
        weight = torch.Tensor(
            module.out_channels,
            module.in_channels // module.groups,
            *module.kernel_size
        )

    module.weight = nn.parameter.Parameter(weight)
    if reset:
        module.reset_parameters()

In [None]:
#load our pretrained model. Try to use different models like efficientnet or resnest.

model = pretrainedmodels.__dict__['se_resnext50_32x4d']( pretrained=None)
model.load_state_dict(torch.load("../input/seresnext50/se_resnext50_32x4d-a260b3a4.pth"))

#if you want to freeze layers than uncomment next 2 rows:

#for param in model_conv.parameters():
#    param.requires_grad = False
model.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)

#we have 11 classes
model.last_linear = nn.Linear(2048, 11)
patch_first_conv(model, 1)
#learning params
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=2, )
criterion = nn.BCEWithLogitsLoss()
num_epochs=10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def train_model(model_conv, train_loader, valid_loader, criterion, optimizer, n_epochs=num_epochs, attempt=1):
    model_conv.to(device)
    valid_loss_min = np.Inf
    patience = 5
    # current number of epochs, where validation loss didn't increase
    p = 0
    # whether training should be stopped
    stop = False

    # number of epochs to train the model
    for epoch in range(1, n_epochs+1):
        print(time.ctime(), 'Epoch:', epoch)

        train_loss = []
        train_auc = []

        for batch_i, (data, target) in tqdm(enumerate(train_loader)):

            data, target = data.cuda(), target.cuda()

            optimizer.zero_grad()
            output = model_conv(data)
            loss = criterion(output, target.float())
            train_loss.append(loss.item())
            loss.backward()
            optimizer.step()

        model_conv.eval()
        val_loss = []
        for batch_i, (data, target) in tqdm(enumerate(valid_loader)):
            data, target = data.cuda(), target.cuda()
            output = model_conv(data)
            loss = criterion(output, target.float())
            val_loss.append(loss.item()) 
            
        print(f'Epoch {epoch}, train loss: {np.mean(train_loss):.4f}, valid loss: {np.mean(val_loss):.4f}.')

        valid_loss = np.mean(val_loss)
        scheduler.step(valid_loss)
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model_conv.state_dict(), 'model_{}.pt'.format(attempt))
            valid_loss_min = valid_loss
            p = 0

        # check if validation loss didn't improve
        if valid_loss > valid_loss_min:
            p += 1
            print(f'{p} epochs of increasing val loss')
            if p > patience:
                print('Stopping training')
                stop = True
                break        

        if stop:
            break
    return model_conv

In [None]:
#Train it! 
#feel free to change optimizer, scheduler and other params
#dont forget to increase epochs number!!
model_seresnext = train_model(model, trainloader, validloader, criterion = criterion, 
                              optimizer = optimizer, n_epochs=2, attempt=1)

In [None]:
class RanzcrTestDataset(Dataset):
    
    def __init__(self, files_folder_path, test_files, transfroms = None):
        self.files_folder_path = files_folder_path
        self.test_files = test_files
        self.transforms = transfroms
        
    def __len__(self):
        return len(self.test_files)
    
    def __getitem__(self, idx):
        
        image = cv2.imread(os.path.join(self.files_folder_path, self.test_files[idx] ), 0)
    
        if self.transforms:
            image = self.transforms(image=image)['image']
            
        return image
    

In [None]:
testset = RanzcrTestDataset(test_path, test_files, valid_augs)
testloader = DataLoader(testset, batch_size = 64, num_workers = 0, shuffle = False)


In [None]:
model_seresnext.eval()
preds = []
tk0 = tqdm(enumerate(testloader), total=len(testloader))

for i, images in tk0:

    images = images.to(device)

    with torch.no_grad():
        y_preds = model_seresnext(images)

    preds.append(torch.sigmoid(y_preds).to('cpu').numpy())

In [None]:
predictions = np.concatenate(preds)
target_cols = train.columns[1:-1]

for i, row in enumerate(predictions):
    submission.loc[i, target_cols] = row
    
submission.to_csv('submission.csv', index=False)
submission.head()