In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

In [None]:
import numpy as np
import pandas as pd 
import os
import time 
import albumentations as A
from albumentations.pytorch import ToTensorV2
from collections import defaultdict
from tqdm import tqdm
from sklearn import model_selection, preprocessing 
from sklearn.model_selection import KFold, GroupKFold
import cv2
from matplotlib import pyplot as plt
from PIL import Image
import copy

import torch 
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from torch.utils.data import Dataset, DataLoader 
import torchvision
from torchvision import models, transforms 
from torch.cuda.amp import autocast, GradScaler



In [None]:
params = {
    "model": "efficientnet_b3",
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "lr": 0.0001,
    "batch_size": 2,
    "num_workers": 16,
    "num_epochs": 5,
    "T_0":6, # CosineAnnealingWarmRestarts
    "min_lr":1e-6
}

In [None]:
ROOT_DIR = "../input/ranzcr-clip-catheter-line-classification"
train_dir = "../input/ranzcr-clip-catheter-line-classification/train/"

In [None]:
df = pd.read_csv(os.path.join(ROOT_DIR, "train.csv"))

In [None]:
df.head(5)

In [None]:
len(df['StudyInstanceUID'].unique())

**Plot some Data**

In [None]:
import random 
img_list = os.listdir(os.path.join(ROOT_DIR, "train/"))
fig, ax = plt.subplots(3, 3, figsize = (12, 12))

for row in range(3):
    for col in range(3):
        rand_idx = np.random.randint(len(img_list))
        img = cv2.imread(os.path.join(ROOT_DIR, "train/"+img_list[rand_idx]),cv2.COLOR_BGR2RGB)
        ax[row, col].imshow(img)
        #print(img.shape)
        
                    
fig.show()

In [None]:
sample_df = pd.read_csv(os.path.join(ROOT_DIR, "sample_submission.csv"))

In [None]:
sample_df.head()

In [None]:
classes = sample_df.columns[1:]

In [None]:
len(classes)

In [None]:
labels = df[classes]

In [None]:
labels

In [None]:
#img_list = os.list_dir(os.path.join(ROOT_DIR, df["StudyInstanceUID"]+".jpg"))

In [None]:
classes = df.columns[1:].values

In [None]:
sample_df.columns[1:]

In [None]:
classes = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal',
       'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal',
       'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
       'Swan Ganz Catheter Present']

In [None]:
df[classes].values

In [None]:
df[classes[:5]].value_counts().plot.bar(figsize=(12, 8))
plt.show()

**Defining Datasets and visualization**

In [None]:
df.iloc[: ,0][1]

In [None]:
class RANZRDataset(Dataset):
    def __init__(self, data_dir, df, transform=None):
        self.data_dir = data_dir
        self.df = df
        self.files = df["StudyInstanceUID"].values
        self.labels = df[classes].values
        self.transform = transform
        
    def __getitem__(self, idx):
        img_name = self.files[idx]
        file_path = f"{self.data_dir}{img_name}"+".jpg"
#         image = Image.open(file_path).convert('RGB')
#         image = np.array(image)
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        #print(file_path)
        if self.transform:
            augmented = self.transform(image = image)
            image = augmented["image"]
        
        label = torch.tensor(self.labels[idx]).float()
        
        return image, label
    def __len__(self):
        return len(self.df)
        
        

In [None]:
dataset = RANZRDataset(train_dir, df)
img, label = dataset[0]
plt.imshow(img)
plt.show()
print(label)

In [None]:
def get_train_transform():
    return A.Compose([
        A.RandomResizedCrop(300, 300, p=1),
        A.Flip(p=0.5),
        A.RandomRotate90(),
        A.OneOf([
            A.OpticalDistortion(p=0.3),
            A.GridDistortion(p=.1),
            A.IAAPiecewiseAffine(p=0.3),
        ], p=0.2),
        A.OneOf([
            A.CLAHE(clip_limit=2),
            A.IAASharpen(),
            A.IAAEmboss(),
            A.RandomBrightnessContrast(),            
        ], p=0.3),
        A.HueSaturationValue(p=0.3),
        #A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.50, rotate_limit=45, p=.25),
        #A.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
        ToTensorV2(),

    ])
        
def get_valid_transform():
    return A.Compose([
        A.RandomResizedCrop(300, 300, p=1),
        #A.Flip(0.5),
        #A.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
        ToTensorV2(),

    ])

In [None]:
dataset = RANZRDataset(train_dir, df, get_train_transform())
#img , label = dataset[0]
print(np.array(img).shape)
fig, ax = plt.subplots(1, 4, figsize = (12, 10))
for i in range(0, 4):
    img , label = dataset[i]
    ax[i].imshow(img.permute(2,1,0))
    ax[i].set_title(np.array(label))
plt.show()


In [None]:
train_dataset = RANZRDataset(train_dir, df, get_train_transform())
valid_dataset = RANZRDataset(train_dir, df, get_valid_transform())

#indices = torch.randperm(len(train_dataset)).tolist()

# train_dataset = torch.utils.data.Subset(train_dataset, indices=indices[:-100] )
# valid_dataset = torch.utils.data.Subset(valid_dataset, indices=indices[-100:] )
train_loader = DataLoader(train_dataset, 
                         batch_size = params["batch_size"], 
                         num_workers = params["num_workers"], 
                         pin_memory=True, 
                         shuffle = True)
valid_loader = DataLoader(valid_dataset, 
                         batch_size = params["batch_size"], 
                         num_workers = params["num_workers"],
                         pin_memory= True, 
                         shuffle = False)

In [None]:
print(len(train_dataset), len(valid_dataset))

**Defining models**

In [None]:
len(classes)

In [None]:
import timm
#EFFICIENTNET-B7
class RANZRModel(nn.Module):
    def __init__(self, model_name=params["model"], pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, len(classes))

    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
def train_one_epoch(epoch, model, criterion, optimizer, scheduler):
    since = time.time()

    
    data_cnt = 0
    model.train()# Set model to training mode 
    print('Epoch {}/{}'.format(epoch, params["num_epochs"] - 1))
    print('-' * 10)
    running_loss = 0.0
    for (inputs, labels) in train_loader:
        
        inputs = inputs.to(params["device"]).float()
        labels = labels.to(params["device"]).float()
        
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # statistics
        running_loss += loss.item() 
        scheduler.step()
        
    print("Epoch {} - Training loss: {:.4f}".format(epoch, running_loss/len(train_loader)))
        
        
def valid_one_epoch(epoch, model, criterion, optimizer, scheduler):
    since = time.time()


    # Each epoch has a training and validation phase

    preds =[]
    model.eval()  
    valid_loss = 0.0

    # Iterate over data.
    for inputs, labels in valid_loader:
        inputs = inputs.to(params["device"]).float()
        labels = labels.to(params["device"]).float()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy()) 
        #print(np.mean(preds))
        valid_loss += loss.item() 
   
    valid_loss = valid_loss/len(valid_loader)
    y_pred = np.concatenate(preds)
    print("val_loss: {:0.4f} ".format(valid_loss))
    return valid_loss, y_pred

In [None]:
model = RANZRModel(model_name=params["model"], pretrained=True)
model.to(params["device"])
criterion = nn.BCEWithLogitsLoss().to(params["device"])
optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=params["T_0"], T_mult=1, eta_min=params["min_lr"], last_epoch=-1)


In [None]:
kf = GroupKFold(n_splits=3)
groups = df["PatientID"].values
X = df["StudyInstanceUID"]
y = df[classes]
fold_var = 1
#kf = kf.get_n_splits(X,y,groups)
for train_index, test_index in kf.split(X,y, groups):
    #print(train_index, test_index)
    train_dataset = RANZRDataset(train_dir, df.iloc[train_index], get_train_transform())
    valid_dataset = RANZRDataset(train_dir, df.iloc[test_index], get_valid_transform())
    
    #print(len(train_dataset), len(valid_dataset))
    train_loader = DataLoader(train_dataset, 
                         batch_size = params["batch_size"], 
                         num_workers = params["num_workers"], 
                         pin_memory=True, 
                         shuffle = True)
    valid_loader = DataLoader(valid_dataset, 
                             batch_size = params["batch_size"], 
                             num_workers = params["num_workers"],
                             pin_memory= True, 
                             shuffle = False)
    
    print(f"{'-'*10} fold {fold_var} result {'-'*10}")
    best_loss = 1e10
    for epochs in range (params["num_epochs"]):
        train_one_epoch(epochs,model, criterion, optimizer, scheduler)
        with torch.no_grad():
            val_loss , preds = valid_one_epoch(epochs,model, criterion, optimizer, scheduler)
            scheduler.step()
            if best_loss > val_loss:
                best_loss = val_loss
                torch.save(model.state_dict(), f'./fold_{fold_var}_{params["model"]}_best.pth')
    fold_var += 1

Inference Notebook is here [kaggle kernels pull razatabish/ranzr-clip-inference](http://)