In [None]:
import pandas as pd
import os
import numpy as np 
import pandas as pd 
from datetime import datetime
import time
import random
from tqdm import tqdm_notebook as tqdm # progress bar
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import cv2
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from glob import glob
import numba
import re
from numba import jit
from PIL import Image
import gc
import warnings
warnings.filterwarnings('ignore')

# Data from this competition

In [None]:
def get_bbox(row):
    bboxes = []
    bbox = []
    for i, l in enumerate(row.label.split(' ')):
        if (i % 6 == 0) | (i % 6 == 1):
            continue
        bbox.append(float(l))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []  
            
    return bboxes
    

def scale_box(row):
    if row['class'] == 'opacity':
        scale_x = 256/row.dim1
        scale_y = 256/row.dim0

        scaled_boxes = []
        for box in row.xyxy:
            x = int(np.round(box[0]*scale_x, 4))
            y = int(np.round(box[1]*scale_y, 4))
            w = int(np.round(box[2]*(scale_x), 4))
            h = int(np.round(box[3]*scale_y, 4))
            scaled_boxes.append([x, y, w, h])

        return scaled_boxes

df = pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')
df['class'] = df.apply(lambda row: row.label.split(' ')[0], axis=1)
df['filename'] = df.apply(lambda row: row.id[:-6], axis=1)

meta = pd.read_csv('../input/siim-covid19-resized-to-256px-png/meta.csv')
meta.columns = ['filename', 'dim0', 'dim1', 'split']

df = df.merge(meta, on='filename', how='left')

df['xyxy'] = df.apply(get_bbox, axis=1)
df['xyxy'] = df.apply(scale_box, axis=1)
# df.drop(columns=['split'], inplace=True)

df.head(3)

In [None]:
opacity = {}
none = []

for index, row in df.iterrows():
    name = row.filename
    if row['class'] == 'opacity':
        opacity[name]= row.xyxy
    else:
        none.append(name)
        
len(opacity), len(none)

# Data from RSNA Pneumonia Detection Challenge

In [None]:
old_competition_df = pd.read_csv('../input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv')

extract_box = lambda row: [i*256/1024 for i in [row['x'], row['y'], row['x']+row['width'], row['y']+row['height']]]

for index, row in old_competition_df.iterrows():
    pid = row['patientId']
    if row.Target == 1:
        if pid not in opacity:
            opacity[pid] = []
        opacity[pid].append(extract_box(row))
    ''' want less negative samples
    else:
        if none[-1] != pid:
            none.append(pid)
    '''
            
len(opacity), len(none)

# Split

In [None]:
data = opacity.copy()

'''
for name in none:
    data[name] = None
'''
    
train, valid  = [i.to_dict() for i in train_test_split(pd.Series(data), train_size=0.8, random_state=42)]

len(train), len(valid)

# Dataset

In [None]:
class LungDataset(Dataset):

    def __init__(self, data):
        super().__init__()
        self.all_names, self.all_boxes = zip(*data.items())

    def __getitem__(self, index: int):
        
        name = self.all_names[index]
        boxes = self.all_boxes[index]
        
        if '-' in name:
            img = cv2.imread(f'../input/rsna-256/{name}.png', 0)
        else:
            img = cv2.imread(f'../input/siim-covid19-resized-to-256px-png/train/{name}.png', 0)
                
        if boxes != None:
            transform = A.Compose([
                A.HorizontalFlip(p=.5),
                A.RandomGamma(p=1),
                A.ShiftScaleRotate(rotate_limit=10, p=.5),
                A.Cutout(p=.3),
                A.RandomBrightness(p=.5),
                ToTensorV2()
            ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=[]))
            
            sample = transform(image=img, bboxes=boxes)

            tmp = np.array(sample['bboxes'])
            
            assert np.all(tmp[:, 3]>tmp[:, 1]) & np.all(tmp[:, 2]>tmp[:, 0])
          
            target = {"boxes": torch.as_tensor(sample['bboxes'], dtype=torch.float32),
                      "labels": torch.ones((len(boxes)), dtype=torch.int64),
                      "image_id": torch.tensor([index]),
                      "area": torch.as_tensor((tmp[:,2]-tmp[:,0])*(tmp[:,3]-tmp[:,1]), dtype=torch.float32),
                      "iscrowd": torch.zeros(len(boxes), dtype=torch.int64)}
        else:
            transform= A.Compose([
                A.HorizontalFlip(p=.5),
                A.RandomGamma(p=1),
                A.ShiftScaleRotate(rotate_limit=10, p=.5),
                A.Cutout(p=.3),
                A.RandomBrightness(p=.5),
                ToTensorV2()
            ])
            
            sample = transform(image=img)
            
            target = {"boxes": torch.zeros((0,4), dtype=torch.float32),
                      "labels": torch.zeros(0, dtype=torch.int64),
                      "image_id": torch.tensor([index]),
                      "area": torch.zeros(0, dtype=torch.float32),
                      "iscrowd": torch.zeros((0), dtype=torch.int64)}
            
        return sample['image']/255, target
        
    def __len__(self) -> int:
        return len(self.all_names)

# Show One Image Using Dataset

In [None]:
def plot_box(img, boxes, ax=None): # box format: xyxy
    ax = plt.gca() if ax is None else ax
    for box in boxes:
        rect = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
    ax.imshow(img, cmap='gray')
    
train_dataset = LungDataset(train)
valid_dataset = LungDataset(valid)

image, target = train_dataset[31]

image = image.reshape(256, 256)
boxes = target['boxes'].tolist()

plot_box(image, boxes)

In [None]:
target

# Show Multiple Images Using DataLoader

In [None]:
def get_train_data_loader(train_dataset, batch_size=16):
    return DataLoader(
        train_dataset,
        batch_size = batch_size,
        shuffle = True,
        num_workers = 4,
        collate_fn = collate_fn
    )

def get_valid_data_loader(valid_dataset, batch_size=16):
    return DataLoader(
        valid_dataset,
        batch_size = batch_size,
        shuffle = True,
        num_workers = 4,
        collate_fn = collate_fn
    )    

def collate_fn(batch):
    return tuple(zip(*batch))

train_data_loader = get_train_data_loader(train_dataset, batch_size=16)
valid_data_loader = get_valid_data_loader(valid_dataset, batch_size=16)
data = iter(valid_data_loader)

In [None]:
images, targets = next(data)

fig, ax = plt.subplots(figsize=(20, 20), nrows=4, ncols=4)

for i in range(16):    
    boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
    image = images[i].reshape(256, 256)
    
    plot_box(image, boxes, ax[i // 4][i % 4])
    
plt.savefig('lungs.png')

# Train

In [None]:
%%capture
!git clone https://github.com/pytorch/vision.git
!cp vision/references/detection/utils.py .
!cp vision/references/detection/transforms.py .
!cp vision/references/detection/coco_eval.py .
!cp vision/references/detection/engine.py .
!cp vision/references/detection/coco_utils.py .
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2 # opacity + none
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
# model.load_state_dict(torch.load('../input/siim-packages/weight/epoch4.pth'))

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params)

In [None]:
from engine import train_one_epoch, evaluate

num_epochs = 2

for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
    torch.save(model.state_dict(), f'epoch{epoch}.pth')
    evaluate(model, valid_data_loader, device=device)