# [SIIM-FISABIO-RSNA COVID-19 Detection](https://www.kaggle.com/c/siim-covid19-detection)
> Identify and localize COVID-19 abnormalities on chest radiographs

![](https://storage.googleapis.com/kaggle-competitions/kaggle/26680/logos/header.png)

# COMMIT or not

In [None]:
import pandas as pd
from glob import glob

filepaths = glob('/kaggle/input/siim-covid19-detection/test/**/*dcm',recursive=True)
test_df = pd.DataFrame({'filepath':filepaths,})
test_df['image_id'] = test_df.filepath.map(lambda x: x.split('/')[-1].replace('.dcm', '')+'_image')
test_df['study_id'] = test_df.filepath.map(lambda x: x.split('/')[-3].replace('.dcm', '')+'_study')
test_df.to_csv('test-raw.csv',index=False)
test_df.head()

# Submission Mode

In [None]:
debug=True

if test_df.shape[0]==1263:
    commit=False
else:
    commit=True
    debug=False
    
commit = True
    
# debug  = True  
# commit = True 

# Install **gdcm** & **libjpeg** without internet

In [None]:
if commit:
    !pip install /kaggle/input/scd-repo-dataset/pycocotools-2.0.2/dist/pycocotools-2.0.2.tar -q
    !pip install /kaggle/input/scd-repo-dataset/thop-0.0.31.post2005241907-py3-none-any.whl -q
    !pip install /kaggle/input/scd-repo-dataset/seaborn-0.11.1-py3-none-any.whl -q
    !pip install /kaggle/input/scd-repo-dataset/albumentations-1.0.0-py3-none-any.whl -q
    !pip install /kaggle/input/scd-repo-dataset/addict-2.4.0-py3-none-any.whl -q
    !pip install /kaggle/input/scd-repo-dataset/timm-0.4.12-py3-none-any.whl -q

In [None]:
if commit:
    !conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
    !conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
    !conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
    !conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
    !conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
    !conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

# Install **WBF**

In [None]:
if commit:
    !pip install -q /kaggle/input/siimcovid19detection-scripts-dataset/wbf

# Packages

In [None]:
import os
from glob import glob
import shutil
from tqdm.notebook import tqdm
tqdm.pandas()
import numpy as np
import pandas as pd
import math
import cv2
import matplotlib.pyplot as plt

# Args

In [None]:
TTA = 1 # cls
dim = 512 # det
aspect_ratio = False

# params for geometric mean
## bbox
ALPHA = 0.7 # det
BETA  = 0.2 # 4cls
GAMMA = 0.1 # 2cls

## none
BETA2  = 0.8 # 4cls
GAMMA2 = 0.2 # 2cls

## negative
BETA3  = 1.0 # 4cls
GAMMA3 = 0.0 # 2cls

## bbox-filter
BBOX_FILTER = True

## detection args
NMS_CONF = 0.001 # 0.001 - chris
NMS_IOU  = 0.5
MAX_DET  = 1000 # max bbox per img

# Writing Images

In [None]:
if commit:
    !python /kaggle/input/siimcovid19detection-scripts-dataset/dicom2image.py --debug {int(debug)}\
                                                                              --img 768 640 512

# Inference **Study-Level**

In [None]:
if commit:
    !python /kaggle/input/siimcovid19detection-scripts-dataset/infer_study.py --debug {int(debug)}

In [None]:
if commit:
    img_cls_df = pd.read_csv('/kaggle/working/image_cls.csv')
    display(img_cls_df.head(2))
    display(img_cls_df[["0","1","2","3","opacity"]].iloc[:100].mean(0))

# YOLOv5 Repo

In [None]:
# !rm -r /kaggle/working/yolov5
# %cd /kaggle/working

In [None]:
if commit:
    %cp -r /kaggle/input/siimcovid19detection-scripts-dataset/yolov5 /kaggle/working/yolov5
    %cd /kaggle/working/yolov5

# Inference **Image-Level**

## WBF

In [None]:
backbones = sorted(glob('/kaggle/input/siimcovid19detection-checkpoint-dataset/det/*/'))
folds = [0,1,2,3,4] 
%cd /kaggle/working/yolov5
for k,backbone in enumerate(backbones):
    
    for fold in folds:
        
        print('#'*25)
        print('### Backbone =',k,', Fold =',fold)
        print('#'*25)
        
        model = backbone + f'/fold-{fold}'

        if commit:
            !python detect.py --weights_dirs $model\
            --img $dim\
            --conf $NMS_CONF\
            --iou $NMS_IOU\
            --source /tmp/Dataset/test/{dim}\
            --save-txt\
            --save-conf\
            --exist-ok\
            --save-img 0\
            --augment\
            --project runs/detect_b{k}_f{fold}\
            --max-det $MAX_DET

In [None]:
%cd /kaggle/working

# Detection Helper

In [None]:
def voc2yolo(image_height, image_width, bboxes):
    """
    voc  => [x1, y1, x2, y1]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]/ image_height
    
    w = bboxes[..., 2] - bboxes[..., 0]
    h = bboxes[..., 3] - bboxes[..., 1]
    
    bboxes[..., 0] = bboxes[..., 0] + w/2
    bboxes[..., 1] = bboxes[..., 1] + h/2
    bboxes[..., 2] = w
    bboxes[..., 3] = h
    
    return bboxes

def yolo2voc(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

def coco2yolo(image_height, image_width, bboxes):
    """
    coco => [xmin, ymin, w, h]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # normolizinig
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]/ image_height
    
    # converstion (xmin, ymin) => (xmid, ymid)
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]/2
    
    return bboxes

def yolo2coco(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes

def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, confs, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
     
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'yolo':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            conf  = confs[idx]
            
            if cls in show_classes:
            
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                label = cls if class_name else str(get_label(cls))
                label +=f'-{conf:0.2f}'
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = label ,
                             line_thickness = line_thickness)
            
    elif bbox_format == 'coco':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                label = cls if class_name else str(cls_id)
                label +=f'-{conf:0.2f}'
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = label,
                             line_thickness = line_thickness)

    elif bbox_format == 'voc_pascal':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                label = cls if class_name else str(cls_id)
                label +=f'-{conf:0.2f}'
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = label,
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')

    return image

# Random Color
np.random.seed(10)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255)) for idx in range(1)]

# Sanity Check

## WBF

In [None]:
if commit:
    for bb in range(len(backbones)):
        for ff in range(len(folds)):
            print('#'*25)
            print('### Backbone =',bb,'Fold =',ff)
            print('#'*25)
            
            idx = 0
            test_df = pd.read_csv('test.csv')
            test_df['label_path'] = f'/kaggle/working/yolov5/runs/detect_b{bb}_f{ff}/exp/labels/'+test_df.image_id.map(lambda x: x.replace('_image',''))+'.txt'
            test_df['image_path'] = f'/tmp/Dataset/test/{dim}/'+test_df.image_id.map(lambda x: x.replace('_image',''))+'.png'
            def viz(idx=10):
                df = test_df
                row = df.iloc[idx]
                img           = load_image(row.image_path)
                # img           = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                # img          = cv2.resize(img, dsize = (1024, 1024))
                image_height  = row.height
                image_width   = row.width
                print('image shape:', img.shape)
                # bboxes_voc    = np.array(row.bboxes)
                # bboxes_yolo   = voc2yolo(image_height, image_width, bboxes_voc)
                bboxes_yolo = []
                confs = []
                f = open(row.label_path, 'r')
                while True:
                    line = f.readline().strip(' \n')
            #         print('bbox:\n',line)
                    if line==None or len(line)==0:
                        break
                    bboxes_yolo.append(list(np.array(line.split(' ')).astype(np.float32))[1:-1])
                    confs.append(list(np.array(line.split(' ')).astype(np.float32))[-1])
                clsses        = ['opacity']*len(bboxes_yolo)
                class_ids     = [0]*len(bboxes_yolo)

                plt.figure(figsize = (5, 5))
                plt.imshow(draw_bboxes(img = img,
                               bboxes = bboxes_yolo, 
                               classes = clsses,
                               class_ids = class_ids,
                               confs=confs,
                               class_name = True, 
                               colors = colors, 
                               bbox_format = 'yolo',
                               line_thickness = 2))
                plt.show()
            viz(23);  

# 2cls Model

## Detection

In [None]:
POST_PROCESS = False
# WE DON'T DO DETECTION POST PROCESS HERE. WE DO BELOW AFTER WBF
files_to_remove = []

if commit:
    for bb in range(len(backbones)):
        for ff in range(len(folds)):
            total = 0
            image_ids = []
            PredictionStrings = []
            if POST_PROCESS:
                img_cls_df = pd.read_csv('/kaggle/working/image_cls.csv')
            for file_path in tqdm(glob(f'/kaggle/working/yolov5/runs/detect_b{bb}_f{ff}/exp/labels/*txt')):
                image_id = file_path.split('/')[-1].split('.')[0]+'_image'
                w, h = test_df.loc[test_df.image_id==image_id,['width', 'height']].values[0]
                f = open(file_path, 'r')
                data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
                data = data[:, [0, 5, 1, 2, 3, 4]]
                total+=data.shape[0]
                bboxes = list(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1).astype(str))
                if POST_PROCESS:
                    prob_b = 1-img_cls_df.query("image_id==@image_id")["0"].values[0]
                    prob_g = img_cls_df.query("image_id==@image_id")["opacity"].values[0]
                for idx in range(len(bboxes)):
                    bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
                    bboxes[idx] = 'opacity' if idx%6==0 else bboxes[idx]
                    if (idx%6==1)&(POST_PROCESS):
                        bboxes[idx] = str(np.power(float(bboxes[idx]), ALPHA)*np.power(prob_b, BETA)*np.power(prob_g, GAMMA)) # geometric mean (x^alpha)*(y^beta)
                    elif (idx%6==1):
                        bboxes[idx] = f'{float(bboxes[idx]):.9}'
                image_ids.append(image_id)
                PredictionStrings.append((' '.join(bboxes)))
            print('Total BBox:',total)
            pred_img_df = pd.DataFrame({'image_id':image_ids,
                                       'PredictionString':PredictionStrings})
            pred_img_df.to_csv(f'test_b{bb}_f{ff}.csv',index=False)
            files_to_remove.append(f'test_b{bb}_f{ff}.csv')

In [None]:
if commit:
    print('Example pred_img_df dataframe')
    display(pred_img_df.head())

In [None]:
if commit:
    iou_thr = 0.625

    wbf_files = []; final = []
    for bb in range(len(backbones)):
        model = []
        for ff in range(len(folds)):
            model.append(f'test_b{bb}_f{ff}.csv')
        wbf_files.append(model)
        final.append(f'test_b{bb}.csv')
    wbf_files.append(final)
    display(wbf_files)

## Fusing Boxes

In [None]:
import warnings
warnings.filterwarnings("ignore") #wbf
if commit:
    for wbf_num,files in enumerate(wbf_files):
        print('#'*25)
        print('### Backbone',wbf_num)
        print('#'*25)
        print()

        # FILLNA, REMOVE WORD OPACITY, REMOVE WORD IMAGE FROM ID, REMOVE NONE PREDICTIONS
        preds = [pd.read_csv(file).fillna('').rename({'id':'image_id'},axis=1) for file in files]
        for p in preds:
            p.PredictionString = p.PredictionString.str.replace('opacity','0')
            p.image_id = p.image_id.map(lambda x: x.split('_')[0])
            for index,row in p.iterrows():
                text = ''
                s = row.PredictionString.split(' ')
                if len(s)%6!=0: print('ERROR')
                for k in range(len(s)//6):
                    if s[k*6]=='none': continue
                    text += f'{s[k*6]} {s[k*6+1]} {s[k*6+2]} {s[k*6+3]} {s[k*6+4]} {s[k*6+5]} '
                row.PredictionString = text[:-1]

        # GET TEST WIDTHS AND HEIGHTS
        df_test = test_df.copy()
        df_test['image_id'] = df_test['image_id'].map(lambda x: x.split('_')[0])
        df_test = df_test[['image_id','width','height']].set_index('image_id')
        if debug:
            df_test = df_test.iloc[:100]
        df_test.width = df_test.width.astype('int32')
        df_test.height = df_test.height.astype('int32')

        # CONVERT PREDS AS STRING TO DATAFRAME
        print('Converting',len(preds),'dataframe of string to dataframe of numbers...')
        for i, pred in enumerate(preds):
            print(i,', ',end='')
            new_pred = []
            for index, row in pred.iterrows():
                #if index%50==0: print(index,', ',end='')
                if row.PredictionString == '': continue
                try:
                    data_flat = np.array(row.PredictionString.split(' '))
                except:
                    print('###',row.PredictionString,'###')
                data_matrix = data_flat[:len(data_flat) // 6 * 6].reshape(-1, 6)

                df = pd.DataFrame( {
                    'image_id' : np.repeat(row.image_id,len(data_matrix)), 
                    'score' : data_matrix[:,1].astype(float),
                    'x_min' : data_matrix[:,2].astype(int),
                    'y_min' : data_matrix[:,3].astype(int),
                    'x_max' : data_matrix[:,4].astype(int),
                    'y_max' : data_matrix[:,5].astype(int),
                    'class_id' : data_matrix[:,0].astype(int)})
                new_pred.append(df)
            preds[i] = pd.concat(new_pred).join(df_test, on=['image_id'])
        print(); print()

        for i, sub_name in enumerate(files):
            print(sub_name,'has box count', len(preds[i]))
        print()

        # NORMALIZE BOXES
        for i, pred in enumerate(preds):
            pred['x_min'] = pred['x_min'] / pred['width']
            pred['y_min'] = pred['y_min'] / pred['height']
            pred['x_max'] = pred['x_max'] / pred['width']
            pred['y_max'] = pred['y_max'] / pred['height']

        print('Here are preds',files[0],'preds...')
        display( preds[0].sort_values(['score']).head() )
        print()

        from ensemble_boxes import weighted_boxes_fusion, non_maximum_weighted, soft_nms, nms

        sub_results = []
        label_dict = {0: 'opacity'}

        # processing of other classes
        print('Processing boxes with WBF...')
        for jj,image_id in enumerate(preds[0].image_id.unique()):
            if jj%100==0: print(jj,', ',end='')

            boxes_list, labels_list, scores_list = [], [], []

            for i in range(len(preds)):
                sub_df = preds[i][preds[i].image_id == image_id].sort_values(['score'])
                boxes_list.append(sub_df[['x_min', 'y_min', 'x_max', 'y_max']].values)
                labels_list.append(sub_df['class_id'].values)
                scores_list.append(sub_df['score'].values)

            boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, iou_thr=iou_thr)

            sub_df_weighted = pd.DataFrame(boxes, columns=['x_min', 'y_min', 'x_max', 'y_max'])
            if len(sub_df)>0:
                sub_df_weighted['image_id'] = image_id
                sub_df_weighted['x_min'] = (sub_df_weighted['x_min'] * sub_df.width.values[0]).astype(int)
                sub_df_weighted['x_max'] = (sub_df_weighted['x_max'] * sub_df.width.values[0]).astype(int)
                sub_df_weighted['y_min'] = (sub_df_weighted['y_min'] * sub_df.height.values[0]).astype(int)
                sub_df_weighted['y_max'] = (sub_df_weighted['y_max'] * sub_df.height.values[0]).astype(int)
                sub_df_weighted['height'] = (sub_df.height.values[0]).astype(int)
                sub_df_weighted['width'] = (sub_df.width.values[0]).astype(int)
                sub_df_weighted['score'] = scores
                sub_df_weighted['class_id'] = labels.astype(int)
                sub_df_weighted['class_name'] = sub_df_weighted['class_id'].apply(lambda s : label_dict[s])

                sub_results.append(sub_df_weighted.copy(deep=True))

        preds_test_weight = pd.concat(sub_results)
        preds_test_weight = preds_test_weight[preds[0].columns]
        print(); print()

        print('Done. We now have',len(preds_test_weight),'bboxes\n')

        subs = pd.DataFrame(columns=['image_id','PredictionString'])

        print('Converting dataframe of numbers to dataframe of strings...')
        for jj,(image_id, sub_df) in enumerate(preds_test_weight.groupby('image_id')):
            if jj%50==0: print(jj,', ',end='')

            predsxx = ''
            for index, row in sub_df.iterrows():
                predsxx += f'{int(row.class_id)} {row.score} {int(row.x_min)} {int(row.y_min)} {int(row.x_max)} {int(row.y_max)} '

            subs.loc[len(subs)] = (image_id, predsxx[:-1])

        display( subs.head() )

        if (wbf_num+1) != len(wbf_files):
            subs.to_csv(f'test_b{wbf_num}.csv',index=False)
            print('Wrote to',f'test_b{wbf_num}.csv\n')
            files_to_remove.append(f'test_b{wbf_num}.csv')
        else:
            subs.to_csv('test_wbf.csv',index=False)
            print('Wrote to','test_wbf.csv\n')
            files_to_remove.append('test_wbf.csv')

In [None]:
# CREATE DETECTION PREDICTION STRINGS
if commit:
    pred_img_df = pd.read_csv('test_wbf.csv')
    for f in files_to_remove: os.system(f'rm {f}')
    img_cls_df = pd.read_csv('/kaggle/working/image_cls.csv')

    for index,row in pred_img_df.iterrows():
        row.image_id = row.image_id+'_image'

        # DETECTION SCORE POST PROCESS
        image_id = row.image_id
        prob_b = 1-img_cls_df.query("image_id==@image_id")["0"].values[0]
        prob_g = img_cls_df.query("image_id==@image_id")["opacity"].values[0]

        # WRITE DETECTION PREDICTION STRINGS
        p = row.PredictionString.split(' ')
        if len(p)%6!=0: print('ERROR')
        text = ''
        for k in range(len(p)//6):
            pr = str(np.power(float(p[k*6+1]), ALPHA)*np.power(prob_b, BETA)*np.power(prob_g, GAMMA)) # geometric mean (x^alpha)*(y^beta)
            text += f'opacity {pr} {p[k*6+2]} {p[k*6+3]} {p[k*6+4]} {p[k*6+5]} '
        row.PredictionString = text[:-1]

    pred_img_df.head()

In [None]:
if commit:
#     pred_img_df = pd.read_csv('test_wbf.csv') # dummy
    image_df = pd.merge(test_df[['image_id']], pred_img_df, on='image_id', how='left').fillna("none 1 0 0 1 1")
    image_df = image_df.rename(columns={'image_id':'id'})
    print(image_df.PredictionString.value_counts().iloc[0:1])
    display(image_df.head())

# Utilize 4cls & 2cls for `none`

In [None]:
if commit:
    img_cls_df = pd.read_csv('/kaggle/working/image_cls.csv')
    def fix_labels(row):
        image_id = row['id']
        prob_b = img_cls_df.query("image_id==@image_id")["0"].values[0] # study-level classifier
        prob_g = 1-img_cls_df.query("image_id==@image_id")["opacity"].values[0] # 2cls opacity classifier
        prob   = np.power(prob_b, BETA2)*np.power(prob_g, GAMMA2)
        if row['PredictionString']!="none 1 0 0 1 1":
            row['PredictionString'] = (row['PredictionString']+' '+f"none {prob} 0 0 1 1").strip(' ')
        return row
    image_df = image_df.progress_apply(fix_labels, axis=1)
    print('max: ',image_df.PredictionString.value_counts()[:1])

# BBox Filter

In [None]:
if BBOX_FILTER & commit:
    image_df = image_df.merge(test_df.rename(columns={'image_id':'id',
                                                     'width':'Width',
                                                     'height':'Height'})[['id', 'Width', 'Height']])
    image_df.to_csv('/kaggle/working/image-lvl-nofilter.csv',index=False)
    print(image_df.head(2))

In [None]:
if BBOX_FILTER & commit:
    %cd /kaggle/working
    %cp -r /kaggle/input/siimcovid19detection-scripts-dataset/bbox /kaggle/working
    %cd /kaggle/working/bbox

In [None]:
if BBOX_FILTER & commit:
    !python3 bbox_filter.py --sub-csv /kaggle/working/image-lvl-nofilter.csv\
    --save-csv /kaggle/working/image-lvl-filter.csv

In [None]:
if BBOX_FILTER & commit:
    image_df = pd.read_csv('/kaggle/working/image-lvl-filter.csv')
    %cd /kaggle/working

# Utilize **2cls** for `negative`

In [None]:
if commit:
    CLASS_LABELS  = ['0', '1', '2', '3']
    img_cls_df = pd.read_csv('/kaggle/working/image_cls.csv')
    
    prob_g = 1-img_cls_df["opacity"].values[:, None] # 2cls-opacity

    prob_0  = img_cls_df[CLASS_LABELS[0:1]].values # none
    prob_0  = np.power(prob_0, BETA3)*np.power(prob_g, GAMMA3)

    prob_1  = img_cls_df[CLASS_LABELS[1:]].values # typical,atypical,indeterminate
    prob_1  = np.power(prob_1, BETA3)*np.power(1-prob_g, GAMMA3)

    img_cls_df.loc[:, CLASS_LABELS] = np.concatenate([prob_0, prob_1], axis=1).tolist()

In [None]:
if commit:
    name2label = { 
        'negative': 0,
        'indeterminate': 1,
        'atypical': 2,
        'typical': 3}
    label2name  = {v:k for k, v in name2label.items()}
    
    study_df = img_cls_df.groupby(['study_id'])[CLASS_LABELS].max().reset_index()
    study_df.rename(columns={'study_id':'id'}, inplace=True)

    def get_PredictionString(row, thr=0):
        string = ''
        for idx in range(4):
            conf =  row[str(idx)]
            if conf>thr:
                string+=f'{label2name[idx]} {conf} 0 0 1 1 '
        if len(string)==0:
            string = 'negative 1.0 0 0 1 1'
        string = string.strip()
        return string

    #------------------------
    # Submission csv  
    #------------------------
    study_df['PredictionString'] = study_df.progress_apply(get_PredictionString, axis=1)
    study_df = study_df.drop(CLASS_LABELS, axis=1)

    print('study head:\n',study_df.head())
    print('study max:\n', study_df.value_counts()[0:1])

    print('study_df size:',study_df.shape[0])

# Image + Study

In [None]:
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
if commit:
    del sub_df['PredictionString']
    pred_df = pd.concat([image_df, study_df])
    sub_df  = sub_df.merge(pred_df, on='id',how='left')
sub_df.to_csv('/kaggle/working/submission.csv',index=False)
sub_df.head()

# Check None

In [None]:
sub_df.PredictionString.value_counts()[0:2]

# Remove Unncessary Files

In [None]:
!rm -r /kaggle/working/image_cls.csv
!rm -r /kaggle/working/yolov5
if BBOX_FILTER:
    !rm -r /kaggle/working/image-lvl-filter.csv
    !rm -r /kaggle/working/image-lvl-nofilter.csv
    !rm -r /kaggle/working/bbox
!rm -r /kaggle/working/test.csv
!rm -r /kaggle/working/test-raw.csv