In [None]:
import sys
sys.path.append('../input/pretrainedmodels/pretrainedmodels-0.7.4')
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import scipy as sp
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from tqdm.auto import tqdm
from functools import partial
import cv2
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR, CosineAnnealingWarmRestarts
from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip,
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout,
    IAAAdditiveGaussianNoise, Transpose
)
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
import timm
from torch.cuda.amp import autocast, GradScaler
from matplotlib import pyplot as plt
import warnings
import pretrainedmodels

warnings.filterwarnings('ignore')

path = "/kaggle/input/ranzcr-clip-catheter-line-classification/"
pretrained_path = ''
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train = pd.read_csv(os.path.join(path, "train.csv"))


class PARAMETER:
    debug = False
    debug_size = 15000
    batch_size = 32
    n_fold = 2
    epochs = 2
    seed = 242
    size = 512
    num_workers = 4
    model_name = 'resnext50_32x4d' #'resnet200d_320'
    lr = 1e-5
    weight_decay = 1e-6
    scheduler = 'ReduceLROnPlateau'  # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    factor = 0.2     # ReduceLROnPlateau
    patience = 4     # ReduceLROnPlateau
    eps = 1e-6       # ReduceLROnPlateau
    T_max = 6        # CosineAnnealingLR
    T_0 = 6          # CosineAnnealingWarmRestarts
    target_size = 11
    target_cols = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                   'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal',
                   'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                   'Swan Ganz Catheter Present']


if PARAMETER.debug:
    PARAMETER.epochs = 1
    train = train.sample(n=PARAMETER.debug_size, random_state=PARAMETER.seed).reset_index(drop=True)


def get_transforms(*, data):
    if data == 'train':
        return Compose([
            # Resize(CFG.size, CFG.size),
            RandomResizedCrop(PARAMETER.size, PARAMETER.size, scale=(0.85, 1.0)),
            HorizontalFlip(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return Compose([
            Resize(PARAMETER.size, PARAMETER.size),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])


class Pre_trained_model(torch.nn.Module):
    def __init__(self, model_name='inceptionv4', pretrained='imagenet'):
        super(Pre_trained_model, self).__init__()
        # define structure of the network here
        self.model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=pretrained)
        self.linear = nn.Linear(1000, PARAMETER.target_size)

    def forward(self, input):
        # apply network and return output
        x = self.model(input)
        x = self.linear(x)
        return x

class MyModel(torch.nn.Module):
    def __init__(self, model_name='inceptionv4', pretrained=False):
        super().__init__()
        # define structure of the network here
        self.model = pretrainedmodels.__dict__[model_name](num_classes=1001, pretrained=pretrained)
#         pretrained = torch.load(r'../input/complet/0.9554569214239389.pth')
#         self.model.load_state_dict(pretrained)
        
        
        self.linear = nn.Linear(1001, 11)

    def forward(self, input):
        
        x = self.model(input)
        x = self.linear(x)
        return x


def get_scheduler(optimiser):
    global scheduler
    if PARAMETER.scheduler == 'ReduceLROnPlateau':
        scheduler = ReduceLROnPlateau(optimiser, mode='min', factor=PARAMETER.factor, patience=PARAMETER.patience,
                                      threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0,
                                      eps=1e-08, verbose=True)
    elif PARAMETER.scheduler == 'CosineAnnealingLR':
        scheduler = CosineAnnealingLR(optimiser, T_max=PARAMETER.T_max, eta_min=0, last_epoch=-1)
    elif PARAMETER.scheduler == 'CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(optimiser, T_0=PARAMETER.T_0, T_mult=1, eta_min=0, last_epoch=-1)
    return scheduler


class InputModel(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.labels = df[PARAMETER.target_cols].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{path}/train/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).float()
        return image, label


class OutputModel(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{path}/test/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image


def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        score = roc_auc_score(y_true[:, i], y_pred[:, i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores


def train_fn(train_loader, net, criterion, optimiser, epoch, device):
    scaler = GradScaler()
    net.train()
    runningloss = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        optimiser.zero_grad()
        with autocast():
            outputs = net(images)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimiser)
        scaler.update()
        runningloss += loss.item()
        if i % 32 == 31:
            print(f'train：{(i + 1) * 100 / len(train_loader):.2f}', '% ',end='')
            print("Epoch: %2d, Batch: %4d, Loss: %.3f"
                  % (epoch + 1, i + 1, runningloss / 32))
            runningloss = 0
    return


def valid_fn(valid_loader, net, criterion, device, valid_labels):
    net.eval()
    preds = []
    runningloss = 0
    num = 0
    for i, (images, labels) in enumerate(valid_loader):
        images = images.to(device)
        labels = labels.to(device)
        with torch.no_grad():
            outputs = net(images)
            loss = criterion(outputs, labels)
        preds.append(outputs.sigmoid().to('cpu').numpy())
        runningloss += loss.item()
        num = num + 1
        if i % 10 == 9:
            print(f'正在评估：{(i + 1) * 100 / len(valid_loader):.2f}', '%')
    predictions = np.concatenate(preds)
    runningloss = runningloss / num
    score, scores = get_score(valid_labels, predictions)
    print(score)
    print(scores)
    PATH = str(score) + '.pth'
    print('save model: ',PATH)
    torch.save(net.state_dict(), PATH)
    return runningloss


def train_loop(folds, fold, net):
    print("Using device: {}"
          "\n".format(str(device)))
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[PARAMETER.target_cols].values

    train_dataset = InputModel(train_folds,
                               transform=get_transforms(data='train'))
    valid_dataset = InputModel(valid_folds,
                               transform=get_transforms(data='valid'))

    train_loader = DataLoader(train_dataset,
                              batch_size=PARAMETER.batch_size,
                              shuffle=True,
                              num_workers=PARAMETER.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=PARAMETER.batch_size,
                              shuffle=False,
                              num_workers=PARAMETER.num_workers, pin_memory=True, drop_last=False)




    optimiser = Adam(net.parameters(), lr=PARAMETER.lr, weight_decay=PARAMETER.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimiser)
    criterion = nn.BCEWithLogitsLoss()
    for epoch in range(PARAMETER.epochs):
        print(f"\n========== fold: {fold} training ==========")
        train_fn(train_loader, net, criterion, optimiser, epoch, device)
        print('Result processing')
        val_loss = valid_fn(valid_loader, net, criterion, device, valid_labels)
        if PARAMETER.scheduler == 'ReduceLROnPlateau':
            print(val_loss)
            scheduler.step(val_loss)
        else:
            scheduler.step()


def main():
    # GroupKFold
    folds = train.copy()
    GKF = GroupKFold(n_splits=PARAMETER.n_fold)
    groups = folds['PatientID'].values
    for n, (train_index, val_index) in enumerate(GKF.split(folds, folds[PARAMETER.target_cols], groups)):
        folds.loc[val_index, 'fold'] = int(n)
    folds['fold'] = folds['fold'].astype(int)

    # train
    net = MyModel().to(device)
    net.load_state_dict(torch.load('../input/complet/0.9554569214239389.pth', map_location='cuda:0'))
#     for fold in range(PARAMETER.n_fold):
#         train_loop(folds, fold, net)

    # predict
    test = pd.read_csv(os.path.join(path, "sample_submission.csv"))
    test_dataset = OutputModel(test, transform=get_transforms(data='valid'))
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=PARAMETER.batch_size,
                                              shuffle=False,
                                              num_workers=PARAMETER.num_workers, pin_memory=True, drop_last=False)
    print('\nPredict processing')
    net.eval()
    preds = []
    for i, batch in enumerate(test_loader):
        inputs = batch
        inputs = inputs.to(device)
        with torch.no_grad():
            outputs = net(inputs)
        preds.append(outputs.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    print('Save prediction')
    test[PARAMETER.target_cols] = predictions
    test[['StudyInstanceUID'] + PARAMETER.target_cols].to_csv('submission.csv', index=False)
    print('Finish')


if __name__ == '__main__':
    main()