In [None]:
import pandas as pd
import numpy as np
import cv2
import os

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision
from torchvision.ops import box_iou, MultiScaleRoIAlign
from torchvision import models

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning.metrics import AveragePrecision

SEED=2484
DEVICE=torch.device('cuda')
pl.utilities.seed.seed_everything(SEED)

In [None]:
class VBDDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None, phase='train'):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        self.phase = phase

    def __getitem__(self, idx):

        image_id = self.image_ids[idx]
        records = self.df[self.df['image_id'] == image_id]

        image = cv2.imread(f'{self.image_dir}/{image_id}.png', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        if self.phase == 'test':
            if self.transforms:
                sample = {
                    'image': image,
                }
                sample = self.transforms(**sample)
                image = sample['image']
            return image, image_id

        boxes = records[['x_min', 'y_min', 'x_max', 'y_max']].values
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        # all the labels are shifted by 1 to accomodate background
        labels = torch.squeeze(torch.as_tensor((records.class_id.values+1,), dtype=torch.int64))
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['area'] = area
        target['image_id'] = torch.tensor([idx])
        target['iscrowd'] = iscrowd
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.as_tensor(sample['bboxes'])

        return image, target

    def __len__(self):
        return self.image_ids.shape[0]

In [None]:
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
#         A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ])

def collate_fn(batch):
    return tuple(zip(*batch))

def format_prediction_string(labels, boxes, scores):
    pred_strings = []
    for j in zip(labels, scores, boxes):
        pred_strings.append("{0} {1:.4f} {2} {3} {4} {5}".format(
            j[0], j[1], j[2][0], j[2][1], j[2][2], j[2][3]))
    return " ".join(pred_strings)

In [None]:
class VinDetector(pl.LightningModule):
    def __init__(self, **kwargs):
        super().__init__()

        self.model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        num_classes = 15
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        self.learning_rate = 1e-3
        self.batch_size = 4

    def forward(self, x):
        return self.model(x)

    def prepare_data(self):
        df = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
        df = df[df['class_id'] != 14].reset_index(drop=True)
        self.train_dataset = VBDDataset(df, '../input/vinbigdata-chest-xray-original-png/train', get_train_transform())

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, pin_memory=True, num_workers=4, collate_fn=collate_fn)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        loss_dict = self.model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        self.log('Loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        return {"loss": loss}

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.95, weight_decay=1e-5, nesterov=True)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=6, eta_min=0, verbose=True)
        return [optimizer], [scheduler]

In [None]:
net = VinDetector()
trainer = pl.Trainer(max_epochs=5, gpus=1, progress_bar_refresh_rate=100)
trainer.fit(net)

In [None]:
for p in net.model.parameters():
    p.requires_grad = True

trainer = pl.Trainer(max_epochs=10, gpus=1, progress_bar_refresh_rate=100, precision=16)
trainer.fit(net)

In [None]:
df = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv')
test_dataset = VBDDataset(df, '../input/vinbigdata-chest-xray-original-png/test', get_valid_transform(), phase='test')
test_data_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn)

In [None]:
detection_threshold = 0.5
results = []
net.model.to(DEVICE)
net.model.eval()

with torch.no_grad():
    for images, image_ids in test_data_loader:
        images = list(image.to(DEVICE) for image in images)
        outputs = net.model(images)
        for i, image in enumerate(images):
            image_id = image_ids[i]
            result = {
                'image_id': image_id,
                'PredictionString': '14 1.0 0 0 1 1'
            }
            boxes = outputs[i]['boxes'].data.cpu().numpy()
            labels = outputs[i]['labels'].data.cpu().numpy()
            scores = outputs[i]['scores'].data.cpu().numpy()
            if len(boxes) > 0:
                labels = labels - 1
                labels[labels == -1] = 14
                selected = scores >= detection_threshold
                boxes = boxes[selected].astype(np.int32)
                scores = scores[selected]
                labels = labels[selected]
                if len(boxes) > 0:
                    result = {
                        'image_id': image_id,
                        'PredictionString': format_prediction_string(labels, boxes, scores)
                    }
            results.append(result)

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.head()

In [None]:
test_df.to_csv('./fasterrcnn.csv', index=False)