In [None]:
import numpy as np
import pandas as pd
import copy
import time
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torch.optim import lr_scheduler

os.listdir('/kaggle/input/ranzcr-clip-catheter-line-classification')

In [None]:
path = '/kaggle/input/ranzcr-clip-catheter-line-classification/'
train_image = path + 'train/'
test_image = path + 'test/'

df = pd.read_csv(path + 'train.csv')

train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

print(train_df.shape)
print(val_df.shape)

train_df.head()

In [None]:
label_cols = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
out_features = len(label_cols)

In [None]:
class XrayData(Dataset):
    def __init__(self, df, img_paths, transform=None, verbose=False):
        self.df = df
        self.imgs = (self.df['StudyInstanceUID'] + '.jpg').values
        self.labels = self.df[label_cols].values
        self.img_paths = img_paths
        self.transform = transform
        self.verbose = verbose
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = torch.tolist(idx)
            
        image_path = self.img_paths + self.imgs[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.labels[idx]
        label = torch.tensor(label).float()
        
        if self.transform:
            aug = self.transform(image=image)
            image = aug['image']
        
        if self.verbose:
            print(image_path)
            print(label)
            
            plt.imshow(image.numpy().transpose(1, 2, 0))
            plt.axis('off')
            plt.show()
        
        return (image, label)
    
    
train_data = XrayData(train_df, train_image,
                      transform=A.Compose([
                          A.RandomResizedCrop(224, 224),
                          A.HorizontalFlip(),
                          A.Normalize(),
                          ToTensorV2(),
                      ]))
val_data = XrayData(val_df, train_image,
                    transform=A.Compose([
                        A.Resize(256, 256),
                        A.CenterCrop(224, 224),
                        A.Normalize(),
                        ToTensorV2(),
                    ]))

dataloaders = {
    'train': DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4),
    'val': DataLoader(val_data, batch_size=32, shuffle=False, num_workers=4)
}

dataset_sizes = {
    'train': len(train_data),
    'val': len(val_data)
}

In [None]:
# # For Testing Only (Best Practice) 

# model = models.resnet50(pretrained=True)
# in_features = model.fc.in_features
# model.fc = nn.Linear(in_features, out_features)

# device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
# model = model.to(device)

# criterion = nn.BCEWithLogitsLoss()

# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# sample_image, sample_label = next(iter(dataloaders['train']))
# model.eval()
# outputs = model(sample_image)
# sample_label = sample_label
# # print(outputs)
# # print(sample_label)
# # print(criterion(outputs, sample_label))
# temp = []
# temp.append(outputs.detach().numpy())
# temp.append(outputs.detach().numpy())
# temp = np.array(temp)
# temp = temp.reshape(-1, 11)
# pd.DataFrame(temp)

In [None]:
def train_model(model, criterion, optimizer, scheduler, epochs=25):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 0.0
    
    for epoch in range(epochs):
        print(f"Epoch {epoch}/{epochs-1}")
        print("-"*10)
        
        running_loss = 0.0
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
              
            for images, labels in dataloaders[phase]:
                images = images.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(images)
                    loss = criterion(outputs, labels.data)
                    
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                
                running_loss += loss.item() * images.size(0)
            
            if phase == 'train':
                scheduler.step()
            
            epoch_loss = running_loss / dataset_sizes[phase]
            
            print(f'{phase} loss: {epoch_loss:.4f}')
            
            if phase == 'val':
                if epoch == 0:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                else:
                    if epoch_loss < best_loss:
                        best_loss = epoch_loss
                        best_model_wts = copy.deepcopy(model.state_dict())
            
        print()
    
    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60}m {time_elapsed % 60}s')
    print(f'Best val loss: {best_loss:.4f}')
    
    model.load_state_dict(best_model_wts)
    
    return model


# Define Model
model = models.resnet50(pretrained=True)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, out_features)

# Set to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Set Loss
criterion = nn.BCEWithLogitsLoss()

# Set Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Set Scheduler for optimizer
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Retrieve the best model and its predictions
model = train_model(model, criterion, optimizer, scheduler, epochs=25)

In [None]:
torch.save(model, 'model_ranzcr.pth')