In [None]:
import numpy as np 
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
from torch.optim import lr_scheduler
import torch.nn.functional as F
import torchvision.transforms
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.cuda.amp import autocast, GradScaler

import shutil
import pandas as pd
from PIL import Image, ImageDraw
import os
import shutil
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn import preprocessing
from contextlib import contextmanager
from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip,
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout,
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensorV2, ToTensor
import copy
import time
from collections import defaultdict
import cv2
import matplotlib.pyplot as plt
from glob import glob
import sys
import math

In [None]:

class CFG:
    num_workers = 4
    size = 512
    scheduler = 'CosineAnnealingLR'
    segmentation_epochs = 15
    segmentation_batch_size = 16
    target_cols = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                   'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal',
                   'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                   'Swan Ganz Catheter Present']
    
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

model_dir = "weights"
best_model_path = model_dir + "/" + "best.pth"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

In [None]:
class MyMiniUNet(nn.Module):
    def __init__(self, n_channels, n_classes):
        super(MyMiniUNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes

        self.inc = nn.Conv2d(n_channels, 32, kernel_size=3, padding=1)
        self.down1 = MyDown(32, 64)
        self.down2 = MyDown(64, 128)
        self.down3 = MyDown(128, 256)

        self.up1 = MyUp(384, 128)
        self.up2 = MyUp(192, 64)
        self.up3 = MyUp(96, 32)
        self.outc = nn.Conv2d(32, n_classes, kernel_size=1)

    def forward(self, x):

        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.up1(x4, x3)
        x6 = self.up2(x5, x2)
        x7 = self.up3(x6, x1)
        output = self.outc(x7)
        return output


class MyDown(nn.Module):

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.my_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1),
            nn.InstanceNorm2d(out_channels),
            nn.LeakyReLU(0.01)
        )

    def forward(self, x):
        return self.my_conv(x)


class MyUp(nn.Module):

    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.up = nn.Upsample(scale_factor=2, mode='nearest')

        self.my_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.InstanceNorm2d(out_channels),
            nn.LeakyReLU(0.01)
        )

    def forward(self, x1, x2):
        x1 = self.up(x1)

        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])

        x1 = torch.cat([x2, x1], dim=1)
        return self.my_conv(x1)

In [None]:
class TrainSegmentationDataset(Dataset):
    def __init__(self, df, train_path, mask_path):
        self.df = df
        self.file_names = np.unique(df['StudyInstanceUID'].values)
        self.transform = Compose([
            Resize(CFG.size, CFG.size),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
        self.mask_transform = Compose([
            Resize(CFG.size, CFG.size),
            ToTensor(),
        ])
        self.train_path = train_path
        self.mask_path = mask_path

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = self.train_path + "/" + file_name + ".jpg"
        mfile_path = self.mask_path + "/masks/" + file_name + ".jpg"
        image = cv2.imread(file_path)
        mask = cv2.imread(mfile_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.mask_transform(image=mask)
        image = self.transform(image=image)
        image = image['image']
        label = label['image']
        label = label.reshape((1,) + label.shape)
        return image, label

In [None]:

def dice_loss(pred, target, smooth=1.):
    pred = pred.contiguous()
    target = target.contiguous()

    intersection = (pred * target).sum(dim=2).sum(dim=2)

    loss = (1 - ((2. * intersection + smooth) / (pred.sum(dim=2).sum(dim=2) + target.sum(dim=2).sum(dim=2) + smooth)))

    return loss.mean()

def calc_loss(pred, target, metrics, bce_weight=0.5):
    bce = F.binary_cross_entropy_with_logits(pred, target)

    pred = torch.sigmoid(pred)
    dice = dice_loss(pred, target)

    loss = bce * bce_weight + dice * (1 - bce_weight)

    metrics['bce'] += bce.data.cpu().numpy() * target.size(0)
    metrics['dice'] += dice.data.cpu().numpy() * target.size(0)
    metrics['loss'] += loss.data.cpu().numpy() * target.size(0)

    return loss

def print_metrics(metrics, epoch_samples, phase):
    outputs = []
    for k in metrics.keys():
        outputs.append("{}: {:4f}".format(k, metrics[k] / epoch_samples))

    LOGGER.info("{}: {}".format(phase, ", ".join(outputs)))

    
def train_model(model, optimizer, scheduler, dataloaders, num_epochs=25):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e10

    for epoch in range(num_epochs):
        LOGGER.info('Epoch {}/{}'.format(epoch, num_epochs - 1))
        LOGGER.info('-' * 10)

        since = time.time()

        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                for param_group in optimizer.param_groups:
                    LOGGER.info("LR: " + str(param_group['lr']))

                model.train() 
            else:
                model.eval()

            metrics = defaultdict(float)
            epoch_samples = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = calc_loss(outputs, labels, metrics)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                epoch_samples += inputs.size(0)

            print_metrics(metrics, epoch_samples, phase)
            epoch_loss = metrics['loss'] / epoch_samples

            if phase == 'val' and epoch_loss < best_loss:
                LOGGER.info("saving best model")
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(best_model_wts, best_model_path)

        time_elapsed = time.time() - since
        LOGGER.info('{:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    LOGGER.info('Best val loss: {:4f}'.format(best_loss))

    model.load_state_dict(best_model_wts)
    return model


In [None]:
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
train_annotations = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train_annotations.csv")
images_path = "../input/ranzcr-clip-catheter-line-classification/train"
mask_path = "../input/generate-masks-ranzr"
training, validation = train_test_split(train_annotations, test_size=0.2)
training = training.reset_index()
validation = validation.reset_index()
train_dataset = TrainSegmentationDataset(training, images_path, mask_path)
valid_dataset = TrainSegmentationDataset(validation, images_path, mask_path)
train_loader = DataLoader(train_dataset,batch_size=CFG.segmentation_batch_size,shuffle=True,num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_dataset,batch_size=CFG.segmentation_batch_size,shuffle=False,num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
model = MyMiniUNet(3, 1)
optimizer_ft = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
dataloaders = {"train": train_loader, "val": valid_loader}
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=30, gamma=0.1)
model = model.to(device)
LOGGER.info("****** starting training of segmentation model ******")
trained_model = train_model(model, optimizer_ft, exp_lr_scheduler,dataloaders, CFG.segmentation_epochs)
LOGGER.info("****** ended training of segmentation model ******")