In [None]:
!pip install timm --quiet
!pip install -U albumentations --quiet
!pip install transformers --quiet
!pip install adamp --quiet
import gc
import os
import psutil
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm 

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import accuracy_score,roc_auc_score

from adamp import AdamP

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
import gc
from tqdm.auto import tqdm
import cv2
import warnings
warnings.simplefilter('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class config:
    INPUT_ROOT = "../input/siim-covid19-resized-to-512px-png/train"
    SEED = 42
    train_BATCH_SIZE = 12
    val_BATCH_SIZE = 64
    MODEL_NAME = "tf_efficientnetv2_m_in21ft1k"
    LEARNING_RATE = 1e-3
    NUM_EPOCHS = 25

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(config.SEED)

In [None]:
df=pd.read_csv('../input/siim-covid19-detection/train_study_level.csv')
ann_df=pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')

In [None]:
id1=[]
for x in range(len(df)):
    id1.append(df['id'].iloc[x].split(sep='_')[0])
df['StudyInstanceUID']=id1
df.drop(columns=['id'],inplace=True)
path=[]
ann_df=ann_df.merge(df,on='StudyInstanceUID')
for x in range(len(ann_df)):
    ann_df['id'].iloc[x]=ann_df['id'].iloc[x].split(sep='_')[0]
for instance_id in tqdm(ann_df['id']):
    path.append(os.path.join(config.INPUT_ROOT, instance_id +'.png'))
ann_df['path'] = path

In [None]:
unique=ann_df['StudyInstanceUID'].unique()
index_comp=[]
for x in range(len(unique)):
    index_val=ann_df[ann_df.StudyInstanceUID==unique[x]].index
    index_comp.append(index_val[0])
ann_df=ann_df.iloc[index_comp].reset_index(drop=True)

In [None]:
label_array=ann_df[['Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance']].values
label=np.argmax(label_array,axis=1)
ann_df['target']=label

In [None]:
df=ann_df[['path','Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance','StudyInstanceUID','target']]
print(len(df))

In [None]:
class SIIM_Train_Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y,transforms):
        self.X = X
        self.y = y
        self.transforms=transforms
        
    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        fname = self.X[idx]
        label = self.y[idx]
        img = Image.open(fname).convert('RGB')
        img = np.array(img)
        if self.transforms:
            img = self.transforms(image=img)["image"]
        return img,torch.tensor(label,dtype=torch.float32)

In [None]:
def train_tfms():
      return A.Compose([
              A.Resize(384, 384,always_apply=True,p=1.0),     
              A.HorizontalFlip(p=0.5),
              A.RandomBrightness(limit=0.1, p=0.75),
              A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=30, border_mode=0, p=0.75),
              A.Cutout(max_h_size=int(384 * 0.4), max_w_size=int(384 * 0.4), num_holes=1, p=0.75),    #512 for 384
              A.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                    always_apply=True,p=1.0
              ),
              ToTensorV2(always_apply=True,p=1.0)
      ])
    
def valid_tfms():
      return A.Compose([
              A.Resize(384,384,always_apply=True,p=1.0),
              A.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                    always_apply=True,p=1.0
              ),
              ToTensorV2(always_apply=True,p=1.0)
      ])

In [None]:
def train_loop(train_data_loader, model, optimizer,criterion):
    losses = []
    model.train()
    optimizer.zero_grad()
    for n_iter, (X, y) in tqdm(enumerate(train_data_loader), total=len(train_data_loader)):
        X, y = X.to(device).float(), y.to(device)
        outputs = model(X)
        loss = criterion(outputs,y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        losses.append(loss.item())
        del X,y
    return losses

def valid_loop(valid_dataloader,model,criterion):
    losses = []
    predicts = []
    model.eval()
    for i,(x,y) in tqdm(enumerate(valid_dataloader),total=len(valid_dataloader)):
        x , y = x.to(device).float() , y.to(device)
        with torch.no_grad():
            outputs = model(x)
            loss = criterion(outputs , y)
        losses.append(loss.item())
        predicts.extend(outputs.detach().cpu().numpy())
        del x,y
    return losses,predicts

In [None]:
class SIIMNet(nn.Module):
    def __init__(self, model_name):
        super(SIIMNet, self).__init__()
        self.model_name = model_name
        self.base_model = timm.create_model(model_name, pretrained=True,num_classes=4)
    def forward(self, x):
        x=self.base_model(x)
        return x

In [None]:
from sklearn.model_selection import GroupKFold
skf = GroupKFold(n_splits=5)
for fold, (train_index, valid_index) in enumerate(skf.split(df,groups=df['StudyInstanceUID'].values)):
    if fold==3:
        print(f"### FOLD-{fold} ###")
        set_seed(config.SEED)

        train_df=df.iloc[train_index].reset_index(drop=True)
        valid_df=df.iloc[valid_index].reset_index(drop=True)
        Id_train = train_df['path'].values
        label_train = train_df[['Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance']].values
        Id_valid = valid_df['path'].values
        label_valid = valid_df[['Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance']].values
        train_dset = SIIM_Train_Dataset(Id_train,label_train,train_tfms())
        train_data_loader = torch.utils.data.DataLoader(train_dset, batch_size=config.train_BATCH_SIZE, shuffle=True, num_workers=2,drop_last=True,pin_memory=True)
        valid_dset = SIIM_Train_Dataset(Id_valid,label_valid,valid_tfms())
        valid_dataloader = torch.utils.data.DataLoader(valid_dset, batch_size=config.val_BATCH_SIZE, shuffle=False, num_workers=2,pin_memory=True)
        model = SIIMNet(config.MODEL_NAME).to(device)
        optimizer = AdamP(model.parameters(), lr = config.LEARNING_RATE)
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=2, threshold=0.00004, min_lr=1e-5, verbose=True) # (0.75,0.00004,1e-5) (0.7)
        best_score = 0
        results = []
        criterion_train =  nn.BCEWithLogitsLoss().to(device)
        criterion_val = nn.BCEWithLogitsLoss().to(device)
        for epoch in range(config.NUM_EPOCHS):
            print(f"epoch={epoch}")
            train_losses = train_loop(train_data_loader, model, optimizer,criterion_train)
            valid_losses, valid_predicts = valid_loop(valid_dataloader, model,criterion_val)
            
            y_true = valid_df[['Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance']].values
            roc_auc = roc_auc_score(y_true, valid_predicts)

            t_loss, v_loss = np.array(train_losses).mean(), np.array(valid_losses).mean()
            scheduler.step(v_loss)
            res = {"t_loss": t_loss, "v_loss": v_loss, "roc_auc_score":roc_auc}
            if best_score<roc_auc:
                best_score=roc_auc
                torch.save(model.state_dict(), f"SIIMnet_f{fold}_best_1model_{config.MODEL_NAME}_512_resolution.pth")
            print(res)
            results.append(res)
            torch.save({'model_state_dict':model.state_dict(),'optimizer_state_dict':optimizer.state_dict(),
                          'scheduler_state_dict':scheduler.state_dict(),'train_loss':t_loss,'valid_loss':v_loss,
                          'roc_auc_score':roc_auc}
                          , f"SIIMnet_f{fold}_last_model_{config.MODEL_NAME}_512_resolution.pth")
            with open(f'file_f{fold}_{config.MODEL_NAME}SIIM_512_resolution.txt','a') as file1:
                file1.write(f'Epoch:{epoch} Train_loss:{t_loss} Valid_loss:{v_loss} roc_auc_score:{roc_auc}\n')
            gc.collect()
            torch.cuda.empty_cache()
        del model,optimizer,scheduler,train_data_loader,valid_dataloader,criterion_train,criterion_val,train_losses,valid_losses,valid_predicts
        torch.cuda.empty_cache()