In [None]:
#https://www.kaggle.com/richardepstein/load-gdcm-in-notebook-without-internet
!cp ../input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2


'''
> import sys
> sys.version
'3.7.10 | packaged by conda-forge | (default, Feb 19 2021, 16:07:37) \n[GCC 9.3.0]'

'''

In [None]:
data_dir  = '/kaggle/input/siim-covid19-detection'
model_dir = '../input/finalsiimcovid2021/trained_model'



# common ---
import numpy as np
import pandas as pd
import glob
import sys
import cv2

from timeit import default_timer as timer

import collections
from collections import defaultdict


# pytorch ---
import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import *

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.parallel.data_parallel import data_parallel

from torch.nn.utils.rnn import *


#-- siim ------
import pydicom 
from pydicom.pixel_data_handlers.util import apply_voi_lut


#-- other ------
#https://www.kaggle.com/kozodoi/local-installation-for-timm-pytorch-image-models
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm
print('timm', timm.__version__)

#https://www.kaggle.com/shonenkov/wbf-approach-for-ensemble
sys.path.append('../input/weightedboxesfusion')
from ensemble_boxes import *

#https://www.kaggle.com/shonenkov/omegaconf
sys.path.append('../input/omegaconf')
import omegaconf


#-- helper -- 
def time_to_str(t, mode='min'):
    if mode=='min':
        t  = int(t)/60
        hr = t//60
        min = t%60
        return '%2d hr %02d min'%(hr,min)

    elif mode=='sec':
        t   = int(t)
        min = t//60
        sec = t%60
        return '%2d min %02d sec'%(min,sec)

    else:
        raise NotImplementedError

In [None]:
#dataset.py

def dicom_to_image(dicom, voi_lut=True, fix_monochrome=True):

    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array

    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.max(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data


 
dcm_size = 640

dcm_file = glob.glob(data_dir + '/test/**/*dcm', recursive=True)
df_meta = pd.DataFrame({'dcm_file': dcm_file})
df_meta['image_id' ] = df_meta.dcm_file.map(lambda x: x.split('/')[-1].replace('.dcm', '') + '_image')
df_meta['study_id' ] = df_meta.dcm_file.map(lambda x: x.split('/')[-3].replace('.dcm', '') + '_study')
df_meta.loc[:, 'width' ] = 0
df_meta.loc[:, 'height'] = 0



class SiimDataset(Dataset):
    def __init__(self, df=df_meta):
        super().__init__()
        self.df = df
        self.length = len(df)

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]

        dicom = pydicom.read_file(d.dcm_file)
        image = dicom_to_image(dicom, voi_lut=True)
        height,width = image.shape

        self.df.loc[index,'width' ] = width
        self.df.loc[index,'height'] = height

        image = cv2.resize(image, dsize=(dcm_size, dcm_size), interpolation=cv2.INTER_AREA)

        #---
        d = self.df.iloc[index] #reload for width,height
        r = {
            'index': index,
            'd': d,
            'image': image,
        }
        return r


def null_collate(batch):
    collate = defaultdict(list)
    for b, r in enumerate(batch):
        for k, v in r.items():
            collate[k].append(v)

    # ---
    image = np.stack(collate['image'])
    image = image.reshape(-1, 1, dcm_size, dcm_size).repeat(3, 1)
    image = np.ascontiguousarray(image)
    image = image.astype(np.float32) / 255
    collate['image'] = torch.from_numpy(image)

    return collate



In [None]:
#study model

from timm.models.efficientnet import *

class StudyNet(nn.Module):
    def __init__(self):
        super(StudyNet, self).__init__()

        e = tf_efficientnetv2_m_in21ft1k(pretrained=False)
        self.b0 = nn.Sequential(
            e.conv_stem,
            e.bn1,
            e.act1,
        )
        self.b1 = e.blocks[0]
        self.b2 = e.blocks[1]
        self.b3 = e.blocks[2]
        self.b4 = e.blocks[3]
        self.b5 = e.blocks[4]
        self.b6 = e.blocks[5]
        self.b7 = e.blocks[6]
        self.b8 = nn.Sequential(
            e.conv_head, #384, 1536
            e.bn2,
            e.act2,
        )
        self.logit = nn.Linear(1280, 4)

        self.mask = nn.Sequential(
            nn.Conv2d(176, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 1, kernel_size=1, padding=0),
        )

    @torch.cuda.amp.autocast()
    def forward(self, image):
        batch_size = len(image)
        x = 2*image-1     # ; print('input ',   x.shape)

        x = self.b0(x) #; print (x.shape)  # torch.Size([2, 40, 256, 256])
        x = self.b1(x) #; print (x.shape)  # torch.Size([2, 24, 256, 256])
        x = self.b2(x) #; print (x.shape)  # torch.Size([2, 32, 128, 128])
        x = self.b3(x) #; print (x.shape)  # torch.Size([2, 48, 64, 64])
        x = self.b4(x) #; print (x.shape)  # torch.Size([2, 96, 32, 32])
        x = self.b5(x) #; print (x.shape)  # torch.Size([2, 136, 32, 32])
        #------------
        mask = self.mask(x)
        #-------------
        x = self.b6(x) #; print (x.shape)  # torch.Size([2, 232, 16, 16])
        x = self.b7(x) #; print (x.shape)  # torch.Size([2, 384, 16, 16])
        x = self.b8(x) #; print (x.shape)  # torch.Size([2, 1536, 16, 16])
        x = F.adaptive_avg_pool2d(x,1).reshape(batch_size,-1)
        #x = F.dropout(x, 0.5, training=self.training)
        logit = self.logit(x)

        return logit, mask

    
#image model
sys.path.append('../input/finalsiimcovid2021') 
from image_model.model import ImageNet
from image_model.model import *

In [None]:

#load study model ------------------------------------------------------------------------------
study_net_checkpoint=[
    model_dir + '/study/eff2m-512-lovasz/fold0_00005600_model.pth',
    model_dir + '/study/eff2m-512-lovasz/fold1_00006600_model.pth',
    model_dir + '/study/eff2m-512-lovasz/fold2_00006000_model.pth',
    model_dir + '/study/eff2m-512-lovasz/fold3_00005800_model.pth',
    model_dir + '/study/eff2m-512-lovasz/fold4_00006800_model.pth',
]

study_net = []
for i in range(5):
    net = StudyNet()
    net.load_state_dict(torch.load(study_net_checkpoint[i])['state_dict'], strict=True)
    study_net.append(net)
print('load study_net ok!')




#load image model ------------------------------------------------------------------------------

image_net_checkpoint=[
    model_dir + '/image/effdet-d3-640-s3.0/fold0_0000-swa_model.pth',
    model_dir + '/image/effdet-d3-640-s3.0/fold1_0000-swa_model.pth',
    model_dir + '/image/effdet-d3-640-s3.0/fold2_0000-swa_model.pth',
    model_dir + '/image/effdet-d3-640-s3.0/fold3_0000-swa_model.pth',
    model_dir + '/image/effdet-d3-640-s3.0/fold4_0000-swa_model.pth',
]
image_net = []
for i in range(5):
    net = ImageNet()
    net.load_state_dict(torch.load(image_net_checkpoint[i])['state_dict'], strict=True)
    image_net.append(net)
print('load image_net ok!')


 
    
 

In [None]:
#csv processing

def probability_to_df_study(df, probability):
    df = df[:len(probability)] #<debug>

    df_study = pd.DataFrame()
    df_study.loc[:,'id'] = df.study_id

    for i, col in enumerate(['negative','typical','indeterminate','atypical']):
        df_study.loc[:,col]=probability[:,i]

    df_study = df_study.groupby('id', as_index=False).mean()
    df_study.loc[:, 'PredictionString'] = \
           'negative '      + df_study.negative.apply(lambda x: '%0.6f'%x)      + ' 0 0 1 1' \
        + ' typical '       + df_study.typical.apply(lambda x: '%0.6f'%x)       + ' 0 0 1 1' \
        + ' indeterminate ' + df_study.indeterminate.apply(lambda x: '%0.6f'%x) + ' 0 0 1 1' \
        + ' atypical '      + df_study.atypical.apply(lambda x: '%0.6f'%x)      + ' 0 0 1 1'

    df_study = df_study[['id','PredictionString']]
    return df_study



def detection_to_df_image(df, detection):
    df = df[:len(detection)] #<debug>

    df_image = pd.DataFrame()
    df_image.loc[:,'id'] = df.image_id
    #df_image.loc[:, 'PredictionString']=''

    predict_string = []
    for i,det in enumerate(detection):
        d = df.iloc[i]

        s = ''
        for x0, y0, x1, y1, c in det:
            x0 = int(x0*d.width )
            y0 = int(y0*d.height)
            x1 = int(x1*d.width )
            y1 = int(y1*d.height)
            s += ' opacity %0.5f %4d %4d %4d %4d'%(c,x0,y0,x1,y1)
        predict_string.append(s)

    df_image.loc[:, 'PredictionString'] = predict_string
    #df_image = df_image[['id','PredictionString']]
    return df_image

def make_fake_opacity_prediction(df_image, df_study, df_meta):

    predict_string=[]
    for i,d in df_image.iterrows():
        #if d.PredictionString == 'none 1 0 0 1 1': continue
        #print(d.PredictionString)
        p = d.PredictionString

        study_id = df_meta[df_meta.image_id==d.id].study_id.values[0]
        q = df_study[df_study.id==study_id].PredictionString.values[0]
        q = q[:25].replace('negative','none')
        p = q + ' ' + p
        predict_string.append(p)

    df_image.loc[:,'PredictionString']=predict_string
    return df_image

In [None]:
#inference and make submission csv here!!!!

def process_one_batch_for_image(image_640):
    image_size = 640

    detection = []
    for i in range(5):  # <debug>

        with torch.no_grad():
            probability_flat = []
            box_flat = []

            logit, delta = image_net[i](image)
            probability, box = infer_prediction(logit, delta, image_net[i].anchor)
            p = pyramid_to_flat(probability)
            b = pyramid_to_flat(box)
            probability_flat.append(p)
            box_flat.append(b)

            # --------------------------------------------------
            # flip in tta
            logit, delta = image_net[i](torch.flip(image, dims=(3,)))
            probability, box = infer_prediction(logit, delta, net.anchor)
            p = pyramid_to_flat(probability)
            b = pyramid_to_flat(box)
            b[..., [0, 2]] = image_size - b[..., [2, 0]]
            probability_flat.append(p)
            box_flat.append(b)

            #'scale+crop' in tta:
            s = int(0.10 * image_size)
            m = F.interpolate(image, size=(image_size + 2 * s, image_size + 2 * s), mode='bilinear', align_corners=False)
            m = m[:, :, s:s + image_size, s:s + image_size]

            logit, delta = image_net[i](m)
            probability, box = infer_prediction(logit, delta, net.anchor)
            p = pyramid_to_flat(probability)
            b = pyramid_to_flat(box)
            b[..., :4] = b[..., :4] + s
            b[..., :4] = b[..., :4] / (image_size + 2 * s) * image_size
            probability_flat.append(p)
            box_flat.append(b)

            # --------------------------------------------------
            probability_flat = torch.cat(probability_flat, 1)
            box_flat = torch.cat(box_flat, 1)
            det = do_non_max_suppression(
                probability_flat,
                box_flat,
                nms_objectness_threshold=0.01,
                nms_iou_threshold=0.5,
                nms_pre_max_num=1000,
                nms_post_max_num=50,
            )
            #box normalised to 0,1 in do_non_max_suppression !!!
            detection.append(det)

    #------------
    batch_size = len(image_640)
    num_model = len(detection)

    ensemble = []
    for b in range(batch_size):
        box   = []
        score = []
        label = []
        for i in range(num_model):
            det = detection[i][b]
            x = det[:, :4]
            x = np.clip(x, 0, 1)  # clip to image size
            s = det[:, 4]
            l = [1] * len(s)
            box.append(x.tolist())
            score.append(s.tolist())
            label.append(l)

        box, score, label = weighted_boxes_fusion(box, score, label, weights=None, iou_thr=0.60)
        # box, score, label = nms(box, score, label, weights=None,iou_thr=0.60)

        score = score.reshape(-1, 1)
        e = np.concatenate([box, score], 1)[:100]
        ensemble.append(e)

    return ensemble



def process_one_batch_for_study(image_512, image_640):
    ensemble = 0

    # for i in range(5):
    for i in range(5):  # <debug>
        with torch.no_grad():
            prob = []

            logit, _ = study_net[i](image_512)
            prob.append(F.softmax(logit, -1))

            # ----
            # 'flip' in tta:
            logit, _ = study_net[i](torch.flip(image_512, dims=(3,)))
            prob.append(F.softmax(logit, -1))

            # 'scale' in tta:
            logit, _ = study_net[i](image_640)
            prob.append(F.softmax(logit, -1))
            # ----

        prob = torch.stack(prob, 0).mean(0)
        ensemble += prob ** 0.5

    return ensemble


#------




if 1:

    dataset = SiimDataset()  # null_augment
    loader = DataLoader(
        dataset,
        sampler=SequentialSampler(dataset),
        batch_size=3,
        drop_last=False,
        num_workers=0,
        pin_memory=True,
        collate_fn=null_collate,
    )
    print('len(loader)', len(loader))

    #-----------------------------------------------------

    for net in study_net:
        net.eval()
        net.cuda()

    for net in image_net:
        net.eval()
        net.cuda()

    start_timer = timer()
    study_probability = []
    image_detection = []
    for t, batch in enumerate(loader):
        # if t==5 :break  #<debug>

        image = batch['image'].cuda()
        image_512 = F.interpolate(image, size=(512,512), mode='bilinear', align_corners=False)
        image_640 = image

        ensemble = process_one_batch_for_study(image_512, image_640)
        study_probability.append(ensemble.float().data.cpu().numpy())

        ensemble = process_one_batch_for_image(image_640)
        image_detection.extend(ensemble)

        #-----
        print('\r\tbatch t=%d of loader : %s' % (t, time_to_str(timer() - start_timer, 'sec')),end='', flush=True)
    print('')
    # -----

    study_probability = np.concatenate(study_probability)
    print('study_probability',study_probability.shape)

    df_study = probability_to_df_study(dataset.df, study_probability)
    print('df_study', df_study.shape)
    print(df_study)
    print('')

    #----

    print('len(image_detection)',len(image_detection))

    df_image = detection_to_df_image(dataset.df, image_detection)
    print('df_image', df_image.shape)
    print(df_image)
    print('')

    #----
    # we use study 'negative' for image 'none' prediction
    # this is more accurate ????
    df_image = make_fake_opacity_prediction(df_image, df_study, dataset.df)
    print('df_image', df_image.shape)
    print(df_image)
    print('')


    #----
    #submit :
    df_submit = pd.concat([df_study, df_image]).reset_index(drop=True)
    print(df_submit)
    df_submit[['id', 'PredictionString']].to_csv('./submission.csv', index=False)

print('*** sucess !!!! ***')