In [1]:
! pip install -U catalyst tensorflow albumentations timm git+git://github.com/mlmed/torchxrayvision.git#egg=torchxrayvision

Collecting catalyst
[?25l  Downloading https://files.pythonhosted.org/packages/32/cb/10860c15a9226728f14a5243e99e8399c6a5735b29f465bc91f3a2a29d34/catalyst-20.4.1-py2.py3-none-any.whl (326kB)
[K     |█                               | 10kB 25.6MB/s eta 0:00:01[K     |██                              | 20kB 33.2MB/s eta 0:00:01[K     |███                             | 30kB 33.6MB/s eta 0:00:01[K     |████                            | 40kB 22.0MB/s eta 0:00:01[K     |█████                           | 51kB 13.0MB/s eta 0:00:01[K     |██████                          | 61kB 12.4MB/s eta 0:00:01[K     |███████                         | 71kB 12.2MB/s eta 0:00:01[K     |████████                        | 81kB 13.0MB/s eta 0:00:01[K     |█████████                       | 92kB 12.8MB/s eta 0:00:01[K     |██████████                      | 102kB 12.5MB/s eta 0:00:01[K     |███████████                     | 112kB 12.5MB/s eta 0:00:01[K     |████████████                    | 122

In [2]:
import os
import sys
import random
import subprocess

import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

import torchxrayvision as xrv

ImportError: ignored

## Data from [Covid-19 chestxray dataset](https://github.com/ieee8023/covid-chestxray-dataset)

With repo updates, you can rerun this notebook to gather better results.

In [0]:
def run_cmd(cmd, stderr=subprocess.STDOUT):
  out = None
  try:
    out = subprocess.check_output(
      [cmd], 
      shell=True,
      stderr=subprocess.STDOUT, 
      universal_newlines=True,
    )
  except subprocess.CalledProcessError as e:
    print(f'ERROR {e.returncode}: {cmd}\n\t{e.output}', flush=True, file=sys.stderr)
    raise e
  return out

def clone_data(data_root):
  clone_uri = 'https://github.com/ieee8023/covid-chestxray-dataset.git'
  if os.path.exists(data_root):
      assert os.path.isdir(data_root), \
        f'{data_root} should be cloned from {clone_uri}'
  else:
      print(
        'Cloning the covid chestxray dataset. It may take a while\n...\n', 
        flush=True
        )
      run_cmd(f'git clone {clone_uri} {data_root}')

In [4]:
data_root = "./data"
clone_data(data_root)

Cloning the covid chestxray dataset. It may take a while
...



In [5]:
from catalyst.dl import utils

dataset = xrv.datasets.COVID19_Dataset(
    imgpath=f'{data_root}/images',
    csvpath=f'{data_root}/metadata.csv',
    transform=None,
)
print(f'Covid Chest x-ray stats dataset stats:\n{dataset}\n\n', flush=True)

n_train = int(0.8 * len(dataset))
n_valid = len(dataset) - n_train

utils.set_global_seed(42)
train_data, valid_data = torch.utils.data.random_split(
    dataset, [n_train, n_valid])


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject

alchemy not available, to install alchemy, run `pip install alchemy-catalyst`.


NameError: ignored

In [6]:
train_data[0], train_data[0]['PA'].shape, train_data[0]['lab'].shape, \
  len(train_data), len(valid_data)

NameError: ignored

### Augmentations with [Albumentations](https://github.com/albumentations-team/albumentations)

In [0]:
from catalyst.data import Augmentor
import albumentations as albu
from albumentations.pytorch import ToTensor

BORDER_CONSTANT = 0
BORDER_REFLECT = 2
crop_size = 224
scale_size = crop_size * 4


train_transforms = albu.Compose([
  albu.LongestMaxSize(max_size=scale_size),
  albu.PadIfNeeded(scale_size, scale_size, border_mode=BORDER_CONSTANT),
  albu.RandomCrop(crop_size, crop_size),
  albu.OneOf([
    # Random shifts, stretches and turns with a 50% probability
    albu.ShiftScaleRotate( 
      shift_limit=0.1,
      scale_limit=0.1,
      rotate_limit=15,
      border_mode=BORDER_REFLECT,
      p=0.5
    ),
    albu.Flip(p=0.5),
    albu.RandomRotate90(p=0.5),     
  ]),
  albu.IAAPerspective(scale=(0.02, 0.05), p=0.3),
  albu.JpegCompression(quality_lower=80),
  ToTensor()
])

valid_transforms = albu.Compose([
  albu.LongestMaxSize(max_size=scale_size),
  albu.PadIfNeeded(scale_size, scale_size, border_mode=BORDER_CONSTANT),
  albu.CenterCrop(crop_size, crop_size),
  ToTensor()
])


# Takes an image from the input dictionary by the key `dict_key` 
# and performs `train_transforms` on it.
train_transforms_fn = Augmentor(
    dict_key="PA",
    # due to dataset sctucture from https://github.com/ieee8023/covid-chestxray-dataset
    # we need some indices tricks
    augment_fn=lambda x: train_transforms(image=x[0][:, :, None])["image"]
)


# Similarly for the validation part of the dataset. 
# we only perform scaling and center crop
valid_transforms_fn = Augmentor(
    dict_key="PA",
    augment_fn=lambda x: valid_transforms(image=x[0][:, :, None])["image"]
)

### PyTorch Loaders

In [0]:
batch_size = 8
num_workers = 4
train_data = list(train_data)
valid_data = list(valid_data)

train_loader = utils.get_loader(
  train_data,
  open_fn=lambda x: x,
  dict_transform=train_transforms_fn,
  batch_size=batch_size,
  num_workers=num_workers,
  shuffle=True,
  drop_last=True,
)

valid_loader = utils.get_loader(
  valid_data,
  open_fn=lambda x: x,
  dict_transform=valid_transforms_fn,
  batch_size=batch_size,
  num_workers=num_workers,
  shuffle=False, 
  drop_last=True,
)

# based on https://github.com/ieee8023/covid-chestxray-dataset#view-current-images-and-metadata
class_names = [
    'ARDS', 
    'Bacterial Pneumonia', 
    'COVID-19', 
    'MERS', 
    'No Finding', 
    'Pneumonia', 
    'SARS', 
    'Streptococcus', 
    'Viral Pneumonia'
]

## Monitoring with Tensorboard

In [0]:
! pkill -9 tensorboard
! rm -rf ./logs
%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir ./logs

## Training with [Catalyst](https://github.com/catalyst-team/catalyst)

In [0]:
import warnings
warnings.simplefilter("ignore")

## Metric learning models and Loss function

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from timm import create_model


class DenseCrossEntropy(nn.Module):
    def forward(self, x, target):
        x = x.float()
        target = target.float()
        logprobs = torch.nn.functional.log_softmax(x, dim=-1)

        loss = -logprobs * target
        loss = loss.sum(-1)
        return loss.mean()


class ArcFaceLoss(nn.modules.Module):
    def __init__(self, s=30.0, m=0.5):
        super().__init__()
        self.crit = DenseCrossEntropy()
        self.s = s
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, logits, labels):
        logits = logits.float()
        cosine = logits
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        output = (labels * phi) + ((1.0 - labels) * cosine)
        output *= self.s
        loss = self.crit(output, labels)
        return loss / 2


class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)

    def forward(self, features):
        cosine = F.linear(F.normalize(features), F.normalize(self.weight))
        return cosine


class TIMMetricLearningMModels(nn.Module):
    def __init__(self, model_name, num_classes):
        super(TIMMetricLearningMModels, self).__init__()
        self.model = create_model(
            model_name=model_name,
            pretrained=True,
            num_classes=num_classes,
            in_chans=3,
        )

        features_num = self.model.num_features
        embedding_size = 512

        self.neck = nn.Sequential(
            nn.BatchNorm1d(features_num),
            nn.Linear(features_num, embedding_size, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(embedding_size),
            # nn.Linear(embedding_size, embedding_size, bias=False),
            # nn.BatchNorm1d(embedding_size),
        )
        self.arc_margin_product = ArcMarginProduct(embedding_size, num_classes)
        self.arc_loss = ArcFaceLoss()
        self.head = nn.Linear(embedding_size, num_classes)

    def freeze(self):
        for param in self.model.parameters():
            param.requires_grad = False

    def unfreeze(self):
        for param in self.model.parameters():
            param.requires_grad = True

    def embed(self, x):
        x = self.model.forward_features(x)
        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        embedding = self.neck(x)
        return embedding

    def metric_classify(self, embedding):
        return self.arc_margin_product(embedding)

    def classify(self, embedding):
        return self.head(embedding)

    def forward(self, x):
        x = x.repeat(1,3,1,1)
        embedding = self.embed(x)
        logits = self.classify(embedding)
        logits_ml = self.metric_classify(embedding)
        return logits, logits_ml, self.arc_loss

class MetricLearningLoss(nn.Module):
    """
    NLL loss with label smoothing.
    """
    def __init__(self, ratio=0.5):
        """
        Constructor for the LabelSmoothing module.
        :param smoothing: label smoothing factor
        """
        super(MetricLearningLoss, self).__init__()
        self.ratio = ratio
        self.classification_loss_fn = FocalLossBinary()

    def forward(self, logits, logits_ml, loss_fn, lab):
        ohe = lab
        classification_loss = self.classification_loss_fn(logits, lab)
        arcface_loss = loss_fn(logits_ml, ohe)
        return self.ratio * classification_loss + (1 - self.ratio) * arcface_loss

## Normal transfer learning model

In [0]:
class TIMMModels(nn.Module):
    def __init__(self, model_name, num_classes):
        super(TIMMModels, self).__init__()
        self.model = create_model(
            model_name=model_name,
            pretrained=True,
            num_classes=num_classes,
            in_chans=3,
        )

    def freeze(self):
        for param in self.model.parameters():
            param.requires_grad = False

    def unfreeze(self):
        for param in self.model.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = x.repeat(1,3,1,1)
        return self.model(x)

In [0]:
import torch
from catalyst.dl import SupervisedRunner, CriterionCallback, AUCCallback
from catalyst.contrib.nn import FocalLossBinary

# experiment setup
logdir = "./logs"
num_epochs = 20
num_classes = 9

# data
loaders = {"train": train_loader, "valid": valid_loader}

# model, criterion, optimizer, scheduler
model = TIMMetricLearningMModels(model_name="resnet34", num_classes=num_classes)
criterion = MetricLearningLoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[8, 16], gamma=0.3)

# model runner
runner = SupervisedRunner(
  input_key = "PA",
  output_key = ["logits", "logits_ml", "loss_fn"],
  input_target_key = ["lab"],
)

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=False,
    callbacks=[
        CriterionCallback(
            input_key=["lab"],
            output_key=["logits", "logits_ml", "loss_fn"],
        ),
        AUCCallback(
            input_key="lab",
            output_key="logits",
            prefix="auc",
            num_classes=num_classes,
            class_names=class_names,
        )
    ],
    # let's maximaze AUC for COVID-19 prediction
    main_metric="auc/class_COVID-19",
    # AUC needs to be maximized.
    minimize_metric=False,
)

In [0]:
import torch
from catalyst.dl import SupervisedRunner, CriterionCallback, AUCCallback
from catalyst.contrib.nn import FocalLossBinary

# experiment setup
logdir = "./logs_normal/"
num_epochs = 20
num_classes = 9

# data
loaders = {"train": train_loader, "valid": valid_loader}

# model, criterion, optimizer, scheduler
# model = xrv.models.DenseNet(num_classes=num_classes)
model = TIMMModels(model_name="resnet34", num_classes=num_classes)
criterion = FocalLossBinary()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[8, 16], gamma=0.3)

# model runner
runner = SupervisedRunner(
  input_key = "PA",
  output_key = "logits",
  input_target_key = "lab",
)

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=False,
    callbacks=[
        CriterionCallback(
            input_key="lab",
            output_key="logits",
        ),
        AUCCallback(
            input_key="lab",
            output_key="logits",
            prefix="auc",
            num_classes=num_classes,
            class_names=class_names,
        )
    ],
    # let's maximaze AUC for COVID-19 prediction
    main_metric="auc/class_COVID-19",
    # AUC needs to be maximized.
    minimize_metric=False,
)

In [7]:
#mostafa samy
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2
import os


numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192


can't resolve package from __spec__ or __package__, falling back on __name__ and __path__



In [0]:
dataset = "/content/data/images"

In [9]:
dataset

'/content/data/images'