In [None]:
%%capture
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!pip install '/kaggle/input/ensembleboxes-106/ensemble_boxes-1.0.6-py3-none-any.whl' -f ./ --no-index --no-deps

In [None]:
import pydicom
import os
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
from pydicom.pixel_data_handlers.util import apply_voi_lut
from fastai.vision.all import *
import albumentations as A
import cv2
from pathlib import Path
from joblib import Parallel, delayed
import shutil
from ensemble_boxes import *

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

In [None]:
def dicom2np(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
        
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize(img, max_size, keep_ratio=True, interpolation=cv2.INTER_LANCZOS4):
    
    if keep_ratio:
        tfms = A.Compose(
            [A.LongestMaxSize(max_size = max_size, interpolation = interpolation)])
    else:
        tfms = A.Compose(
            [A.Resize(height = max_size, width = max_size, interpolation = interpolation)])
        
    tfmd = tfms(image=img)    
   
    return tfmd

def process_item(path):
    
    tfmd = resize(dicom2np(path), 1024)
    
    img_path = Path(*path.parts[3:]).with_suffix('.jpg')
    cv2.imwrite('test/' + img_path.name, tfmd['image'])

In [None]:
fast_sub = (pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv').shape[0] == 2477)

In [None]:
if fast_sub:
    dicom_files = get_files(f'../input/siim-covid19-detection/test', extensions=['.dcm', '.dicom'])[:32]
else:
    dicom_files = get_files(f'../input/siim-covid19-detection/test', extensions=['.dcm', '.dicom'])

if not os.path.isdir('/kaggle/working/test'):
    os.makedirs('/kaggle/working/test')
o = Parallel(n_jobs=4)(delayed(process_item)(f) for f in tqdm(dicom_files))

In [None]:
img2path = {f.stem : f for f in dicom_files}
img2study = {f.stem : f.parent.parent.name for f in dicom_files}

## Detection

In [None]:
if not os.path.isdir('/kaggle/working/yolov5'):
    shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5')

In [None]:
def read_results(file):
    dicom_path = Path(*(['/kaggle'] + list(img2path[file.stem].parts)[1:]))
    with open(file, 'r') as f:
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    bbox = data[:,1:5]
    conf = data[:,  5]
    label = data[:, 0]
    
    dicom = pydicom.filereader.dcmread(dicom_path, stop_before_pixels=True)
    width, height = dicom.Columns, dicom.Rows
    
    bbox[:,[0,1]] = bbox[:,[0,1]] - bbox[:,[2,3]]/2
    bbox[:,[2,3]] = bbox[:,[0,1]] + bbox[:,[2,3]]
    
    ids = file.stem
    
    return {'ids':ids, 'label':label, 'conf':conf, 'bbox':bbox, 'width': width, 'height':height}

In [None]:
%%capture

test_dir = '/kaggle/working/test'
#img_size = 640

weights_list = [
    [' '.join([
    '/kaggle/input/siimcovid19-models/yolov5s_10_640_BL1-CV0.pt',
    '/kaggle/input/siimcovid19-models/yolov5s_10_640_BL1-CV1.pt',
    '/kaggle/input/siimcovid19-models/yolov5s_10_640_BL1-CV2.pt',
    '/kaggle/input/siimcovid19-models/yolov5s_10_640_BL1-CV3.pt']), 640, 0.001, 0.5],
    [' '.join([
    '/kaggle/input/siimcovid19modelsv2/yolov5m_30_768_BL2-CV0.pth',
    '/kaggle/input/siimcovid19modelsv2/yolov5m_30_768_BL2-CV1.pth',
    '/kaggle/input/siimcovid19modelsv2/yolov5m_30_768_BL2-CV2.pth',   
    '/kaggle/input/siimcovid19modelsv2/yolov5m_30_768_BL2-CV3.pth']), 786, 0.001, 0.5],   
]

results = []

for weights, img_size, conf, iou in weights_list:
    !python detect.py --weights $weights\
    --img $img_size\
    --augment\
    --conf $conf\
    --iou $iou\
    --source $test_dir\
    --device 0\
    --save-txt --save-conf --exist-ok

    txt_files = get_files('runs/detect/exp/labels', extensions=['.txt'])
    results.append([read_results(txt_file) for txt_file in txt_files])
    shutil.rmtree('/kaggle/working/yolov5/runs/detect/exp/labels')
    
results = list(zip(*results))

In [None]:
def fuse_and_scale(result):
    
    ids, labels, scores, bboxes, width, height = zip(*[d.values() for d in result])

    iou_thr = 0.45
    skip_box_thr = 0.0001
    weights = [1] * len(labels)

    bboxes, scores, labels = weighted_boxes_fusion(
        bboxes, scores, labels,
        weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    bboxes = bboxes * np.array([[width[0], height[0], width[0], height[0]]])
    
    return {'id':ids[0]+'_image', 'scores':scores, 'bboxes':bboxes, 'labels':labels}

In [None]:
%%capture
fused_results = pd.DataFrame([fuse_and_scale(r) for r in results])
#fused_results = results[0]

## Classification

In [None]:
def get_dls(df, presize, size, bs):

    covid19 = DataBlock(
        blocks=(ImageBlock(cls=PILImageBW), MultiCategoryBlock),
        splitter = RandomSplitter(),
        getters=[ColReader('path'), ColReader('label')],
        item_tfms = Resize(presize, method='squish'),
        batch_tfms = [*aug_transforms(size = size, mult=0.5), Normalize.from_stats(mean=0.53, std=0.23)]
        )
    return covid19.dataloaders(df, bs = bs, workers = 4)

In [None]:
img_files = get_image_files('/kaggle/working/test')
df = pd.DataFrame({'path': img_files})
df['ImageUID'] = df['path'].apply(lambda x: x.stem + '_image')
df['StudyUID'] = df['path'].apply(lambda x: img2study[x.stem] + '_study')
df['label'] = [["negative", "typical", "indeterminate", "atypical"]] * len(df)
df.head()

In [None]:
dls = get_dls(df, 768, 384, 64)
test_dl = dls.test_dl(img_files)
test_dl.show_batch()

In [None]:
class MultiHeadModel(Module):
    def __init__(self, body, head1, head4):
        self.body = body
        self.head1 = head1
        self.head4 = head4
    
    def forward(self, x):
        features = self.body(x)
        #ys = [self.head4(features), self.head1(features)]
        #y = torch.cat(ys, dim = -1)
        return self.head4(features)

In [None]:
class TransformerModel(Module):
    def __init__(self, model):
        self.model = model
    
    def forward(self, x):
        y = self.model(x)
        return y[:, :-1]
    
class TransformerModelAux(Module):
    def __init__(self, model):
        self.model = model
    
    def forward(self, x):
        y = self.model(x)
        return y[:, -1:]

In [None]:
model_metadata = [
    ['/kaggle/input/siimcovid19modelsv2/deit_base_patch16_384-CV0.pth', 'deit_base_patch16_384', 1, 512, 384, 32],
    ['/kaggle/input/siimcovid19modelsv2/deit_base_patch16_384-CV1.pth', 'deit_base_patch16_384', 1, 512, 384, 32],
    ['/kaggle/input/siimcovid19modelsv2/deit_base_patch16_384-CV2.pth', 'deit_base_patch16_384', 1, 512, 384, 32],
    ['/kaggle/input/siimcovid19modelsv2/deit_base_patch16_384-CV3.pth', 'deit_base_patch16_384', 1, 512, 384, 32],
    ['/kaggle/input/siimcovid19modelsv2/swin_base_patch4_window12_384-CV0.pth', 'swin_base_patch4_window12_384', 1, 512, 384, 32],
    ['/kaggle/input/siimcovid19modelsv2/swin_base_patch4_window12_384-CV1.pth', 'swin_base_patch4_window12_384', 1, 512, 384, 32],
    ['/kaggle/input/siimcovid19modelsv2/swin_base_patch4_window12_384-CV2.pth', 'swin_base_patch4_window12_384', 1, 512, 384, 32],
    ['/kaggle/input/siimcovid19modelsv2/swin_base_patch4_window12_384-CV3.pth', 'swin_base_patch4_window12_384', 1, 512, 384, 32],  
    ['/kaggle/input/siimcovid19modelsv2/twins_pcpvt_base-CV0.pth', 'twins_pcpvt_base', 1, 512, 384, 32],  
    ['/kaggle/input/siimcovid19modelsv2/twins_pcpvt_base-CV1.pth', 'twins_pcpvt_base', 1, 512, 384, 32],  
    ['/kaggle/input/siimcovid19modelsv2/twins_pcpvt_base-CV2.pth', 'twins_pcpvt_base', 1, 512, 384, 32],  
    ['/kaggle/input/siimcovid19modelsv2/twins_pcpvt_base-CV3.pth', 'twins_pcpvt_base', 1, 512, 384, 32],  
]

In [None]:
%%capture

preds_acc = []

for weights, arch, cut, presize, size, bs in model_metadata:
    
    dls = get_dls(df, presize, size, bs)
    test_dl = dls.test_dl(img_files)
    
    if cut < 0:
        body = timm.create_model(arch, pretrained=False, num_classes = 0, in_chans = 1)
        body = nn.Sequential(*list(body.children())[:cut])
        head1 = create_head(num_features_model(body), 1, concat_pool=True)
        head4 = create_head(num_features_model(body), 4, concat_pool=True)
        model = MultiHeadModel(body, head1, head4)
    
    else:
        model = timm.create_model(arch, pretrained=False, num_classes = 5, in_chans = 1)
    
    if torch.cuda.is_available():
        model.load_state_dict(torch.load(weights))
    else:
        model.load_state_dict(torch.load(weights, map_location=torch.device('cpu')))
        
    learn = Learner(dls, model, loss_func = BCEWithLogitsLossFlat())
    preds = learn.tta(dl = test_dl)[0]
    preds_acc += [preds]
    
    
preds_acc = torch.stack(preds_acc).mean(dim=0)
# preds_acc, preds_acc_aux = torch.split(preds_acc, 4, dim=1)
preds_acc, preds_acc_aux = preds_acc[:,:4], preds_acc[:,3:4]
preds_acc.mean(dim = 0), preds_acc_aux.mean()

In [None]:
vocab = ["atypical", "indeterminate", "typical", "negative"]

In [None]:
preds_df = pd.DataFrame(torch.cat([preds_acc, preds_acc_aux], dim = 1), columns = vocab + ['is_none'])
preds_df['id'] = df['StudyUID']
preds_df = preds_df.groupby('id').agg('mean').reset_index()
preds_df.corr()

In [None]:
preds_df = preds_df.drop('is_none', axis = 1)

In [None]:
preds_df_aux = pd.DataFrame({
    'id': df['ImageUID'],
    'is_none': preds_acc_aux.flatten()
})

## Building the prediction Strings

### Study level prediction

In [None]:
prediction_string = [' '.join([f'{v} {p:.6f} 0 0 1 1' for v, p in zip(list(preds_df.columns)[1:], pp)]) for pp in preds_df.drop('id', axis = 1).values]
submission_study = pd.DataFrame({
    'id': preds_df['id'],
    'PredictionString': prediction_string
}).sort_values('id').reset_index(drop=True)
submission_study.head()

### Image level prediction

In [None]:
submission_dummy = pd.DataFrame({
    'id': preds_df_aux['id'],
    'PredictionString': preds_df_aux['is_none'].apply(lambda x: f'none {x:.6f} 0 0 1 1')
})
submission_dummy.head()

In [None]:
fused_results = fused_results.merge(preds_df_aux, on = 'id')
fused_results['is_none'] = fused_results.apply(lambda x: [x['is_none']] * len(x.labels), axis = 1)
fused_results['PredictionString'] = fused_results.apply(lambda x: ' '.join(f"opacity {(score*(1-is_none)):.6f} {' '.join(map(str, map(round, bbox)))}" for score, bbox, is_none in zip(x.scores, x.bboxes, x.is_none)), axis = 1)
#fused_results.head()

In [None]:
submission_image = fused_results[['id', 'PredictionString']]
submission_dummy['PredictionString'] = submission_dummy.merge(submission_image, on = 'id', how = 'outer').fillna('').apply(lambda x: x[1] + ' ' + x[2], axis = 1)

### Final prediction

In [None]:
submission = pd.concat([submission_study, submission_dummy])
submission.to_csv('/kaggle/working/submission.csv',index=False)
submission

In [None]:
shutil.rmtree('/kaggle/working/yolov5')
shutil.rmtree('/kaggle/working/test')