In [None]:
!pip install /kaggle/input/detectron2/omegaconf-2.0.6-py3-none-any.whl
!pip install /kaggle/input/detectron2/iopath-0.1.8-py3-none-any.whl
!pip install /kaggle/input/detectron2/fvcore-0.1.3.post20210317/fvcore-0.1.3.post20210317/
!pip install /kaggle/input/detectron2/pycocotools-2.0.2/dist/pycocotools-2.0.2.tar
!pip install /kaggle/input/detectron2/detectron2-0.4cu110-cp37-cp37m-linux_x86_64.whl

In [None]:
import pandas as pd
import numpy as np
import os
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tqdm import tqdm
from typing import Any, Dict, List
from PIL import Image

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.data import transforms as T
from detectron2.modeling import build_model

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import models, transforms

In [None]:
cfg = get_cfg()
config_name = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml" 
cfg.merge_from_file(model_zoo.get_config_file(config_name))
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.WEIGHTS = '../input/covid-detectron2-training-resnet101/output/model_final.pth'
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set the testing threshold for this model

predictor = DefaultPredictor(cfg)

In [None]:
def get_image_to_study_dict():
    path = '../input/siim-covid19-detection/test'
    image_to_study = {}
    
    for dirname, _, filenames in tqdm(os.walk(path)):
        for file in filenames:
            image_id = file.replace('.dcm', '')
            study_id = dirname.split('/')[-2]
            image_to_study[image_id] = study_id
    
    return image_to_study

def load_image_batch(test_dir, image_path_batch):
    image_batch, image_shapes = [], []
    resize_shape = (800, 800)
    transform = [T.Resize(resize_shape)]
    
    for p in image_path_batch:
        path = os.path.join(test_dir, p)
        image = np.load(path)
        height, width = image.shape
        image_shapes.append(image.shape)
        image, transforms = T.apply_transform_gens(transform, image)
        image = np.expand_dims(image.astype('float32'), axis=0)
        image = torch.from_numpy(image)
        image_batch.append({'image': image, 'height': height, 'width': width})
    
    return image_batch

In [None]:
def format_outputs(outputs):
    preds = []    
    for i, pred in enumerate(outputs):
        instances = pred['instances']
        fields: Dict[str, Any] = instances.get_fields()
        pred_boxes = fields['pred_boxes'].tensor.cpu().numpy()
        scores = fields['scores'].cpu().numpy()
        pred_classes = fields['pred_classes'].cpu().numpy()
        
        predString = []
        for box, score, class_id in zip(pred_boxes, scores, pred_classes):
            xmin, ymin, xmax, ymax = box.astype(np.int32)
            s = f'opacity {score:.6f} {xmin} {ymin} {xmax} {ymax}'
            predString.append(s)
        
        if len(predString) == 0:
            predString = 'none 1 0 0 1 1'
        else:
            predString = ' '.join(predString)        
        preds.append(predString)
        
    return preds

# def record_scores(outputs, image_path_batch, image_to_study, study_scores):
#     for i, pred in enumerate(outputs):
#         instances = pred['instances']
#         fields: Dict[str, Any] = instances.get_fields()
#         scores = fields['scores'].cpu().numpy()
#         pred_classes = fields['pred_classes'].cpu().numpy()
        
#         image_id = image_path_batch[i][:-4]
#         study_id = image_to_study[image_id]
#         if study_id not in study_scores:
#             study_scores[study_id] = [[] for _ in range(3)]
        
#         for score, class_id in zip(scores, pred_classes):
#             study_scores[study_id][class_id].append(score)
            
# def format_study_scores(study_scores):
#     thing_classes = ["typical", "indeterminate", "atypical"]
#     study_preds = {'id': [], 'PredictionString': []}
#     for study_id, scores in study_scores.items():
#         predString = []
#         for i, score in enumerate(scores):
#             if len(score) != 0:
#                 sc = np.mean(score)
#                 class_name = thing_classes[i]
#                 s = f'{class_name} {sc:.6f} 0 0 1 1'
#                 predString.append(s)
        
#         if len(predString) == 0:
#             predString = 'negative 1 0 0 1 1'
#         else:
#             predString = ' '.join(predString)
        
#         study_preds['id'].append(f'{study_id}_study')
#         study_preds['PredictionString'].append(predString)
    
#     return study_preds

In [None]:
def add_private_entries(image_preds, study_preds):
    image_ids = [_id.replace('_image', '') for _id in image_preds['id']]
    study_ids = [_id.replace('_study', '') for _id in study_preds['id']]
    public_images = set(image_ids)
    public_studies = set(study_ids)
    
    path = '../input/siim-covid19-detection/test'
    private_study_ids = set()
    private_image_ids = set()
    for dirname, _, filenames in tqdm(os.walk(path)):            
        for file in filenames:
            study_id = dirname.split('/')[-2]
            image_id = file[:-4]
            
            if study_id not in public_studies:
                private_study_ids.add(study_id)
    
            if image_id not in public_images:
                private_image_ids.add(image_id)
    
    private_image_preds = {'id': [], 'PredictionString': []}
    private_study_preds = {'id': [], 'PredictionString': []}
    
    for study_id in private_study_ids:
        private_study_preds['id'].append(f'{study_id}_study')
        private_study_preds['PredictionString'].append('negative 1 0 0 1 1')
    
    for image_id in private_image_ids:
        private_image_preds['id'].append(f'{image_id}_image')
        private_image_preds['PredictionString'].append('none 1 0 0 1 1')     
        
    preds = {
        'id': image_preds['id'] + study_preds['id'] + private_image_preds['id'] + private_study_preds['id'],
        'PredictionString':
            image_preds['PredictionString'] + \
            study_preds['PredictionString'] + \
            private_image_preds['PredictionString'] + \
            private_study_preds['PredictionString']
    }
    return preds

In [None]:
test_dir = '../input/covid-detectron2-test-set-preprocessing/test-npy'
image_paths = os.listdir(test_dir)
batch_size = 4
image_preds = {'id': [], 'PredictionString': []}

In [None]:
for i in tqdm(range(0, len(image_paths), batch_size)):
    if i + batch_size < len(image_paths):
        image_path_batch = image_paths[i:i+batch_size]
    else:
        image_path_batch = image_paths[i:]
    
    image_batch = load_image_batch(test_dir, image_path_batch)
    
    with torch.no_grad():
        outputs = predictor.model(image_batch)
        
    image_ids = [f'{p[:-4]}_image' for p in image_path_batch]
    predStrings = format_outputs(outputs)
    # record_scores(outputs, image_path_batch, image_to_study, study_scores)
    image_preds['id'] += image_ids
    image_preds['PredictionString'] += predStrings
    
# study_preds = format_study_scores(study_scores)

In [None]:
class Covid19Dataset(Dataset):
    def __init__(self, root_dir, image_paths, transform):
        self.root_dir = root_dir
        self.image_paths = image_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        path = os.path.join(self.root_dir, self.image_paths[idx])
        data = np.load(path)
        data = np.repeat(data[..., np.newaxis], 3, -1)
        image = Image.fromarray(data)
        image = self.transform(image)
        image_id = self.image_paths[idx].split('.')[0]
        
        return image, image_id

In [None]:
def initialize_model(num_classes):
    model = models.resnet50(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)
    return model

def record_scores(outputs, image_ids, image_to_study, study_scores):
    
    def softmax(x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)
    
    outputs = outputs.detach().cpu().numpy()
    for output, image_id in zip(outputs, image_ids):
        output = softmax(output)
        study_id = image_to_study[image_id]
        if study_id not in study_scores:
            study_scores[study_id] = [[] for _ in range(4)]
        for i in range(4):
            study_scores[study_id][i].append(output[i])
            
def format_study_scores(study_scores, threshold=0.5):
    thing_classes = ["negative", "typical", "indeterminate", "atypical"]
    study_preds = {'id': [], 'PredictionString': []}
    
    for study_id, scores in study_scores.items():
        _id = study_id + '_study'
        pred = []
        scores = np.mean(scores, axis=1)
        for i, score in enumerate(scores):
            if score > threshold:
                pred.append(f'{thing_classes[i]} {score:.6f} 0 0 1 1')
        
        if len(pred) == 0:
            idx = np.argmax(scores)
            pred.append(f'{thing_classes[idx]} {scores[idx]:.6f} 0 0 1 1')
        
        preds = ' '.join(pred)
        study_preds['id'].append(_id)
        study_preds['PredictionString'].append(preds)
        
    return study_preds

In [None]:
num_classes = 4
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
image_to_study = get_image_to_study_dict()
transform = transforms.Compose([
    transforms.Resize((800, 800)),
    transforms.ToTensor(),
])

test_dir = '../input/covid-detectron2-test-set-preprocessing/test-npy'
image_paths = os.listdir(test_dir)
test_set = Covid19Dataset(test_dir, image_paths, transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)
model = initialize_model(num_classes)
model.load_state_dict(torch.load('../input/covid19-classification/resnet50.pt'))
model = model.to(device)

In [None]:
model.eval()
study_scores = {}
for images, image_ids in tqdm(test_loader):
    images = images.to(device)
    outputs = model(images)
    record_scores(outputs, image_ids, image_to_study, study_scores)
study_preds = format_study_scores(study_scores)

In [None]:
preds = add_private_entries(image_preds, study_preds)
sub = pd.DataFrame(data=preds)
sub

In [None]:
sub.to_csv('submission.csv', index=False)