# Preparations

## Data Mounting

In [1]:
# Check, with local module, whether runtime is colaboratory

try:  # local runtime
    import library_check
except ImportError:  # colab runtime
    library_check = None
    from google.colab import drive  # NOQA
    drive.mount('/content/drive')  # NOQA
    colaboratory = True
else:
    colaboratory = False

## Runtime Check

In [2]:
# System Information

import sys
import platform
print(f"OS version: \t\t{platform.platform()}\n"
      f"Python version:\t\t{sys.version}")

OS version: 		macOS-11.5.2-arm64-arm-64bit
Python version:		3.8.10 | packaged by conda-forge | (default, May 11 2021, 06:27:18) 
[Clang 11.1.0 ]


## Library Installation

In [3]:
# Install required libraries

if colaboratory:  # colab runtime
    # !pip install pydicom albumentations==0.4.6 efficientnet_pytorch effdet
    # !pip install mxnet-cu101==1.7.0 d2l==0.16.6
    !git clone https://github.com/kdha0727/lung-opacity-and-covid-chest-x-ray-detection/
    %cd lung-opacity-and-covid-chest-x-ray-detection
    !pip install -r requirements.txt
    import library_check
    library_check.check()
    import data_prep_utils
    root = "/content/drive/Shareddrives/2021 하계 SAT/"
    data_prep_utils.set_root(root)
else:  # local runtime
    library_check.check()
    import data_prep_utils
data_prep_utils.init()

All required libraries are installed.


In [4]:
# After all installation, import all libraries used.

import inspect
import random
import pydicom as dcm
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import seaborn as sns

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torch
import torchvision
import torchsummary

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2


from timm.models.efficientnet import tf_efficientnet_b4
from effdet import get_efficientdet_config, DetBenchTrain
from effdet.efficientdet import HeadNet, BiFpn, _init_weight

from skimage import io, transform

In [5]:
# And, import custom-defined Lazy Data Wrappers and Utilities

from data_prep_utils import covid_19_radiography_dataset
from data_prep_utils import rsna_pneumonia_detection_challenge

from data_prep_utils.dataset import pil_loader, dicom_loader
import train_utils

# Data Analysis and Processing

* Note: All preprocessing processes are modularized as "Data Wrapper" package

**Class Information**
* Normal: 0
* Lung Opacity: 1
* COVID-19: 2
* Viral Pneumonia: 3

In [6]:
print('\n\t'.join(map(str, ['Labels: [', *covid_19_radiography_dataset.class_to_idx.items()])), '\n]')


Labels: [
	('Normal', 0)
	('Lung_Opacity', 1)
	('COVID', 2)
	('Viral Pneumonia', 3) 
]


# Modeling via PyTorch


In [7]:
# Dataset Class Source Code

print(inspect.getsource(data_prep_utils.dataset.ImageWithPandas))
print(inspect.getsource(data_prep_utils.dataset.ImageFolder))

class ImageWithPandas(VisionDataset):
    """A generic data loader where the image path and label is given as pandas DataFrame.

    Args:
        dataframe (pandas.DataFrame): A data table that contains image path, target class,
            and extra outputs.
        label_id (string): Data frame`s image path label string.
        label_target (string): Data frame`s target class label string.
        label_extras (tuple[string] or string, optional): Data frame`s label that will
            be used for extra outputs.
        root (string, optional): Root directory path. Use unless data frame`s column
            contains file folders.
        extension (string, optional): An extension that will be concatenated after
            image file name. Use unless data frame`s column contains extension.
        class_to_idx (dict[str, int], optional): A mapping table that converts class
            label string into integer value. If not given, sorted index value will
            be used as cla

In [8]:
data_transform = torchvision.transforms.Compose([
    torchvision.transforms.RandomResizedCrop(256),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.45],std=[0.225])
])

In [9]:

def get_train_transforms():
    return A.Compose(
        [
            # A.RandomSizedCrop(min_max_height=(1024, 1024), height=1024, width=1024, p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2, p=0.9),
            # A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Resize(height=256, width=256, p=1),
            # A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=256, width=256, p=1.0),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )

In [10]:
classification_dataset_1 = rsna_pneumonia_detection_challenge.torch_classification_dataset(data_transform)

classification_dataset_2 = covid_19_radiography_dataset.torch_classification_dataset(data_transform)

detection_dataset = rsna_pneumonia_detection_challenge.torch_detection_dataset(get_train_transforms())


cls_dset = torch.utils.data.ConcatDataset([
    classification_dataset_1,
    classification_dataset_2
])

In [11]:
# Make data loader from dataset

batch_size = 64

train_loader_cls = torch.utils.data.DataLoader(cls_dset,
                                              batch_size=batch_size, shuffle=True,
                                              num_workers=2)

train_loader_cls_1 = torch.utils.data.DataLoader(classification_dataset_1,
                                              batch_size=batch_size, shuffle=True,
                                              num_workers=2)
train_loader_cls_2 = torch.utils.data.DataLoader(classification_dataset_2,
                                              batch_size=batch_size, shuffle=True,
                                              num_workers=2)

train_loader_det_1 = torch.utils.data.DataLoader(
        detection_dataset,
        batch_size=2,
        shuffle=True,
        # pin_memory=False,
        num_workers=2
)

train_loader_det_1_prime = rsna_pneumonia_detection_challenge.torch_detection_dataset(
    get_train_transforms(),
        batch_size=4,
        shuffle=True,
        # pin_memory=False,
        num_workers=2
)

train_loaders = [
    # train_loader_cls_1, train_loader_cls_2,
    train_loader_det_1_prime
]

## Design Model Architecture
* Base Model: EfficientNet

In [12]:
from efficientnet_pytorch import EfficientNet
from efficientnet_pytorch.utils import get_model_params

# Shared Feature Extractor
feature_extractor_depth = 4
# FIXME -> from pretrained
feature_extracor_model_name = f'efficientnet-b{feature_extractor_depth}'
feature_extractor = EfficientNet.from_name(feature_extracor_model_name, include_top=False, image_size=256)
feature_extractor._change_in_channels(1)
feature_extractor.out_channels = feature_extractor._bn1.num_features

# # Get stem static or dynamic convolution depending on image size
# blocks_args, global_params = get_model_params(feature_extracor_model_name, {})
# global_params.image_size = 256
#
# # Stem
# in_channels = 3  # rgb
# out_channels = round_filters(32, global_params)  # number of output channels
# self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)


In [13]:
class Classifier(nn.Module):

    def __init__(self, backbone, num_classes, out_channels=None, dropout_rate=0.2):
        super().__init__()
        out_channels = out_channels or backbone.out_channels
        self.feature_extractor = backbone
        self.dropout = nn.Dropout(dropout_rate)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(out_channels, num_classes)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.dropout(x)
        x = self.flatten(x)
        print(x.shape)
        x = self.fc(x)
        return x


# model = Classifier(feature_extractor, 4)

from models import get_full_network

model = get_full_network(
    pretrained_backbone=colaboratory  # FIXME: remove this line
).float()

num_epochs = 3

lr = 0.0002

optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          48, 48, kernel_size=(3, 3), stride=[1, 1], groups=48, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          48, 12, kernel_size=(1, 1), stride=(1, 1)
          (static_padding):

In [14]:
from torch.utils.data import ConcatDataset

dset = ConcatDataset([
    classification_dataset_1,
    classification_dataset_2
])


In [None]:
from train_utils import AdvancedFitter

fitter = AdvancedFitter(
    model, optimizer,
    num_epochs,
    train_iter = train_loaders,
    val_iter = train_loaders,
    snapshot_dir = 'snapshots',
    verbose=True,
    timer=True,
    log_interval=1,
)
fitter.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
with fitter:
    fitter.run()


Advanced Fitter Initialized.

<Start Learning> 				Total 3 epochs

Epoch 1
[Train]	 Progress: 11/1505 (00.73%), 	Loss: 11.366343 

Exception ignored in: <Finalize object, dead>
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/multiprocessing/util.py", line 213, in __call__
    try:
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/site-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
    _error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 3967) is killed by signal: Interrupt: 2. 
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x120730160>
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1297, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/homebre

In [None]:
def get_next():
    try:
        iterator = iter(train_loaders[0])
        return next(iterator)
    except RuntimeError:  # 오류안뜰때까지 뽑아보자
        return get_next()

    
nxt = get_next()
print({k: v.shape for k, v in nxt.items()})

In [None]:
next(iter(train_loaders[0]))['image'].shape