In [1]:
import numpy as np
from pytorch_lightning import seed_everything, LightningModule, Trainer
from sklearn.utils import class_weight
import torch.nn as nn
import torch
from torch.utils.data.dataloader import DataLoader
from pytorch_lightning.callbacks import EarlyStopping,ModelCheckpoint,LearningRateMonitor
from torch.optim.lr_scheduler import  ReduceLROnPlateau
import torchvision
from sklearn.metrics import classification_report,f1_score,accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from glob import glob
from PIL import Image
import cv2
from torch.utils.data import DataLoader, Dataset,ConcatDataset

2021-12-12 01:29:59.251427: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
img_size=512
import albumentations as A
from albumentations.pytorch import ToTensorV2
aug= A.Compose([
            A.Resize(img_size+32,img_size+32),
            A.RandomCrop(img_size,img_size),
            A.HorizontalFlip(0.5),
            A.VerticalFlip(0.5),
            A.ShiftScaleRotate(rotate_limit=3),
            A.Blur(),A.RandomGamma(),
            A.Sharpen(), A.GaussNoise(),
            A.CoarseDropout(8,64,64),
            A.CLAHE(0.5),
            A.Normalize(mean=(0), std=(1)),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [3]:
class DataReader(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __getitem__(self, index):
        x=self.dataset[index][0]
        y=self.dataset[index][1]
        if self.transform:
            x=np.array(x)
            x=cv2.cvtColor(src=x, code=cv2.COLOR_RGB2GRAY)
            x=self.transform(image=x)['image']
        return x, y
    
    def __len__(self):
        return len(self.dataset)

In [4]:
import timm
import torchmetrics
import torchvision.models as models
from torch.utils.data import random_split
import torchxrayvision as xrv

In [5]:
class OurModel(LightningModule):
    def __init__(self,combined,train_subsampler,test_subsampler):
        super(OurModel,self).__init__()
        #architecute
        self.model = xrv.models.ResNet(weights="resnet50-res512-all")
        self.model.model.fc=nn.Sequential(nn.Linear(2048,512),nn.ReLU(),
                              nn.Linear(in_features=512, out_features=3),
                              )
        
        self.model=self.model.model
        self.train_subsampler=train_subsampler
        self.test_subsampler=test_subsampler
        self.combined=combined
        
        #parameters
        self.lr=1e-3
        self.batch_size=24
        self.numworker=4
        self.acc = torchmetrics.Accuracy()
        self.criterion=nn.CrossEntropyLoss()
        
        self.trainacc,self.valacc=[],[]
        self.trainloss,self.valloss=[],[]
        
        
    def forward(self,x):
        x= self.model(x)
        return x

    def configure_optimizers(self):
        opt=torch.optim.AdamW(params=self.parameters(),lr=self.lr )
        scheduler=ReduceLROnPlateau(opt,mode='min', factor=0.75, patience=5)
        return {'optimizer': opt,'lr_scheduler':scheduler,'monitor':'val_loss'}
        
 
    def train_dataloader(self):
        return DataLoader(DataReader(self.combined,aug), batch_size = self.batch_size, 
                          num_workers=self.numworker,sampler=self.train_subsampler,pin_memory=True,shuffle=False)

    def training_step(self,batch,batch_idx):
        image,label=batch
        out = self(image)
        loss=self.criterion(out,label)
        acc=self.acc(out,label)
        return {'loss':loss,'acc':acc}

    def training_epoch_end(self, outputs):
        loss=torch.stack([x["loss"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        acc=torch.stack([x["acc"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        self.trainacc.append(acc)
        self.trainloss.append(loss)
        print('training loss accuracy ',self.current_epoch,loss, acc)
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        
    def val_dataloader(self):
        ds=DataLoader(DataReader(self.combined,aug), batch_size = self.batch_size,
                      num_workers=self.numworker,pin_memory=True,sampler=self.test_subsampler, shuffle=False)
        return ds

    def validation_step(self,batch,batch_idx):
        image,label=batch
        out=self(image)
        loss=self.criterion(out,label)
        acc=self.acc(out,label)
        return {'loss':loss,'acc':acc}

    def validation_epoch_end(self, outputs):
        loss=torch.stack([x["loss"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        acc=torch.stack([x["acc"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        self.valacc.append(acc)
        self.valloss.append(loss)
        print('validation loss accuracy ',self.current_epoch,loss, acc)
        self.log('val_loss', loss)
        self.log('val_acc', acc)

In [6]:
!ls ..

Binary	code  intro.odp  multiclass


In [7]:
train_split=torchvision.datasets.ImageFolder('../multiclass/train/')
val_split=torchvision.datasets.ImageFolder('../multiclass/validation/')
combined=ConcatDataset([train_split,val_split])
print(len(combined))

21390


In [8]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=5,random_state=21,shuffle=True)

In [9]:
for fold,(train_idx,val_idx) in enumerate(kfold.split(combined)):
    print('------------fold no---------{}----------------------'.format(fold))
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsampler = torch.utils.data.SubsetRandomSampler(val_idx)


    lr_monitor = LearningRateMonitor(logging_interval='epoch')

    model=OurModel(combined,train_subsampler,val_subsampler)
    
    trainer = Trainer(max_epochs=30,
                    deterministic=True,
                    gpus=-1,precision=16,
                    accumulate_grad_batches=4,
                    progress_bar_refresh_rate=0,
                    callbacks=[lr_monitor],
                    num_sanity_val_steps=0,
                    )
    #trainer.fit(model)
    #torch.save(model.state_dict(), '../multiclass/last_{}.pth'.format(fold))
    
    model.load_state_dict(torch.load('../multiclass/last_{}.pth'.format(fold)))
    trainer.validate(model)
    break

------------fold no---------0----------------------


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


validation loss accuracy  0 0.32 0.91
--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'val_acc': 0.9100000262260437, 'val_loss': 0.3199999928474426}
--------------------------------------------------------------------------------


In [11]:
# test leaderboard

```
"Accuracy_score": 0.9358333333333333,
"Sensitivity_score": 0.9358333333333334,
"Specificity_score": 0.9382374634526824
```