## Pytorch Tensorflow ensemble model

#### studylevel : effnetv2m(torch) + effnetv2l(tf)
#### 2class: effnetv2m+b3(torch) + effnetb5(tf)
#### imagelevel : yolov5(3model) + effdet + resdet (wbf ensemble)

### only publish inference notebook 

## reference

### thanks to https://www.kaggle.com/sreevishnudamodaran/siim-effnetv2-l-cascadercnn-mmdetection-infer
### thanks to https://www.kaggle.com/micheomaano/siim-cov19-efnb7-yolov5-infer
### thanks to https://www.kaggle.com/h053473666/siim-cov19-efnb7-yolov5-infer
### thanks to https://www.kaggle.com/shangweichen/siim-efnb7-train-pytorch-xla-tpu


## my public notebook 

### [step1 get_imageinformation](https://www.kaggle.com/kunihikofurugori/siim-step1-get-imginfo).

### [step2 make_dataframe](https://www.kaggle.com/kunihikofurugori/step2-make-dataframe/edit/run/69201903).

### [step3-1 renew-imglev_ds](https://www.kaggle.com/kunihikofurugori/siim-step3-1-renew-imglev-ds)

### [step3-2 renew-studylev_ds](https://www.kaggle.com/kunihikofurugori/siim-step3-1-renew-studylev-ds)



In [None]:
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
import os

from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import cv2
import sys
sys.path.append("/kaggle/usr/lib/siim_infer_helper_func/")
from siim_infer_helper_func import *

In [None]:
df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
if df.shape[0] == 2477:
    tp1 = "/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm"
    tp2 = "/kaggle/input/siim-covid19-detection/train/000c9c05fd14/e555410bd2cd/51759b5579bc.dcm"
    fast_sub = True
    fast_df = pd.DataFrame(([['00086460a852_study', 'negative 1 0 0 1 1'], 
                         ['000c9c05fd14_study', 'negative 1 0 0 1 1'], 
                         ['65761e66de9f_image', 'none 1 0 0 1 1'], 
                         ['51759b5579bc_image', 'none 1 0 0 1 1']]), 
                       columns=['id', 'PredictionString'])
else:
    fast_sub = False

In [None]:
study_size = (768,768)
image_size = (640,640)

split = 'test'
save_dir = f'/kaggle/tmp/{split}/'

os.makedirs(save_dir, exist_ok=True)

save_dir = f'/kaggle/tmp/{split}/study/'
os.makedirs(save_dir, exist_ok=True)
if fast_sub:
    xray = read_xray(tp1)
    im = resize(xray, size1=study_size[0],size2=study_size[1])  
    study = '00086460a852' + '_study.png'
    im.save(os.path.join(save_dir, study))
    assert(tp1.split("/")[-3] == '00086460a852')
    xray = read_xray(tp2)
    im = resize(xray, size1=study_size[0],size2=study_size[1])  
    study = '000c9c05fd14' + '_study.png'
    im.save(os.path.join(save_dir, study))
    assert(tp2.split("/")[-3] == '000c9c05fd14')
else:   
    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = resize(xray, size1=study_size[0], size2=study_size[1])  
            study = dirname.split('/')[-2] + '_study.png'
            im.save(os.path.join(save_dir, study))

In [None]:
image_id = []
dim0 = []
dim1 = []
splits = []
save_dir = f'/kaggle/tmp/{split}/image/'
os.makedirs(save_dir, exist_ok=True)
if fast_sub:
    xray = read_xray(tp1)
    im = cv2.resize(xray,image_size,interpolation = cv2.INTER_AREA)
    cv2.imwrite(os.path.join(save_dir,'65761e66de9f_image.png'),im)
    image_id.append('65761e66de9f.dcm'.replace('.dcm', ''))
    dim0.append(xray.shape[0])
    dim1.append(xray.shape[1])
    splits.append(split)
    xray = read_xray(tp2)
    im = cv2.resize(xray,image_size,interpolation = cv2.INTER_AREA)
    cv2.imwrite(os.path.join(save_dir, '51759b5579bc_image.png'),im)
    image_id.append('51759b5579bc.dcm'.replace('.dcm', ''))
    dim0.append(xray.shape[0])
    dim1.append(xray.shape[1])
    splits.append(split)
else:
    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = cv2.resize(xray,image_size,interpolation = cv2.INTER_AREA)
            cv2.imwrite(os.path.join(save_dir, file.replace('.dcm', '_image.png')),im)
            image_id.append(file.replace('.dcm', ''))
            dim0.append(xray.shape[0])
            dim1.append(xray.shape[1])
            splits.append(split)
meta = pd.DataFrame.from_dict({'image_id': image_id, 'dim0': dim0, 'dim1': dim1, 'split': splits})

# study level and 2-class

## model and weight paths

In [None]:
study_tf_mpaths = [
    "/kaggle/input/bestmodeldataset/fold0v2l_0.831.h5",
    "/kaggle/input/bestmodeldataset/fold0v2l_0.831.h5",
    "/kaggle/input/bestmodeldataset/fold0_v2l_0.828.h5",
    "/kaggle/input/bestmodeldataset/fold1_v2l_0.828.h5"
]

binaryclass_tf_mpaths = [
    "/kaggle/input/2class-tf/model0.h5",
    "/kaggle/input/2class-tf/model1.h5",
    "/kaggle/input/2class-tf/model2.h5",
    "/kaggle/input/2class-tf/model3.h5",
    "/kaggle/input/2class-tf/model4.h5",
]

weightpath = [
              "../input/covid19-effneteffdet/blackout_effv2m_image600/pretrained_model_0_0.848.bin",
              "../input/covid19-effneteffdet/blackout_effv2m_image600/pretrained_model_2_0.841.bin",           
             ]

modelpath = [
             "tf_efficientnetv2_m",
             "tf_efficientnetv2_m",
            ]

nonewpath = ["../input/covid19-effneteffdet/none_effv2m_image600/model_0_0.903.bin",
             "../input/covid19-effneteffdet/effv2mnone_image640model/pretrained_model_1_0.925_imagesize640.bin",
             "../input/covid19-effneteffdet/none_effv2m_image600/model_2_0.925.bin",
             "../input/covid19-effneteffdet/effv2mnone_image640model/pretrained_model_3_0.908_imagesize640.bin",
             "../input/covid19-effneteffdet/effv2mnone_image640model/pretrained_model_4_0.901_centralcrop640to600.bin"]

nonemodelpath = ["tf_efficientnetv2_m",
                 "tf_efficientnetv2_m",
                 "tf_efficientnetv2_m",
                 "tf_efficientnetv2_m",
                 "tf_efficientnetv2_m"]

# tensorflow

In [None]:
import tensorflow as tf
import tensorflow_hub as tfhub

MODEL_ARCH = 'efficientnetv2-l-21k-ft1k'
# Get the TensorFlow Hub model URL
hub_type = 'feature_vector' # ['classification', 'feature_vector']
MODEL_ARCH_PATH = f'/kaggle/input/efficientnetv2-tfhub-weight-files/tfhub_models/{MODEL_ARCH}/{hub_type}'

# Custom wrapper class to load the right pretrained weights explicitly from the local directory
class KerasLayerWrapper(tfhub.KerasLayer):
    def __init__(self, handle, **kwargs):
        handle = tfhub.KerasLayer(tfhub.load(MODEL_ARCH_PATH))
        super().__init__(handle, **kwargs)

In [None]:
import numpy as np 
import pandas as pd
if fast_sub:
    df = fast_df.copy()
else:
    df = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
id_laststr_list  = []
for i in range(df.shape[0]):
    id_laststr_list.append(df.loc[i,'id'][-1])
df['id_last_str'] = id_laststr_list

study_len = df[df['id_last_str'] == 'y'].shape[0]

In [None]:
#prepare tensorflow module

!pip install /kaggle/input/kerasapplications -q
!pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

import os

import efficientnet.tfkeras as efn
import tensorflow as tf

In [None]:
if fast_sub:
    sub_df = fast_df.copy()
else:
    sub_df = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
sub_df = sub_df[:study_len]
test_paths = f'/kaggle/tmp/{split}/study/' + sub_df['id'] +'.png'

sub_df['negative'] = 0
sub_df['typical'] = 0
sub_df['indeterminate'] = 0
sub_df['atypical'] = 0

strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 16
IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 768)

label_cols = sub_df.columns[2:]

test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[8], IMSIZE[8]), ext='png')
dtest = build_dataset(
    test_paths, bsize=BATCH_SIZE, repeat=False, 
    shuffle=False, augment=False, cache=False,
    decode_fn=test_decoder
)

with strategy.scope():
    
    models = []
    for mpath in study_tf_mpaths:
        model = tf.keras.models.load_model(mpath,custom_objects={'KerasLayer': KerasLayerWrapper})
        models.append(model)
        del model

sub_df[label_cols] = sum([model.predict(dtest, verbose=1) for model in models]) / len(models)
sub_df_tf = sub_df.copy()
del models

In [None]:
if fast_sub:
    sub_df = fast_df.copy()
else:
    sub_df = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
sub_df = sub_df[study_len:]
test_paths = f'/kaggle/tmp/{split}/image/' + sub_df['id'] +'.png'
sub_df['none'] = 0

strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 16
IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 512)

label_cols = sub_df.columns[2]

test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[8], IMSIZE[8]), ext='png')
dtest = build_dataset(
    test_paths, bsize=BATCH_SIZE, repeat=False, 
    shuffle=False, augment=False, cache=False,
    decode_fn=test_decoder
)

with strategy.scope():
    
    models = []
    for mpath in binaryclass_tf_mpaths:
        model = tf.keras.models.load_model(mpath)
        models.append(model)
        del model

sub_df[label_cols] = sum([model.predict(dtest, verbose=1) for model in models]) / len(models)
df_2class_tf = sub_df.copy().reset_index(drop=True)
del models

In [None]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()

# Pytorch

In [None]:
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import cv2
import torch
import torch.nn as nn

import sys
sys.path.insert(0, "../input/timm-pytorch-image-models/pytorch-image-models-master/")
import timm

device = 'cuda'

# Pytorch studylevel

In [None]:
if fast_sub:
    sub_df = fast_df.copy()
else:
    sub_df = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
sub_df = sub_df[:study_len]
test_paths = f'/kaggle/tmp/{split}/study/' + sub_df['id'] +'.png'

sub_df['negative'] = 0
sub_df['typical'] = 0
sub_df['indeterminate'] = 0
sub_df['atypical'] = 0

label_cols = sub_df.columns[2:]
sub_df

In [None]:
def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=600, width=600, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        
    )

DATA_ROOT_PATH = '/kaggle/tmp/test/study'

class DatasetRetriever(Dataset):

    def __init__(self, df, path, transforms=None):
        super().__init__()
        self.df = df
        self.path = path
        self.transforms = transforms

    def __getitem__(self, index: int):
        path = self.path[index]
        image_id = self.df.id[index]
        image = cv2.imread(path,cv2.IMREAD_COLOR)
        #print(image.std())
        if image.std() < 20.0:
            histimage = A.Equalize(p=1.0)(image=image)['image']
        else:
            histimage = image
        histimage = ToTensorV2(p=1.0)(image=histimage)["image"]
        image = image.astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        
        assert(image.shape == (3,600,600))
        return sample['image'], histimage, image_id#, dim0, dim1

    def __len__(self) -> int:
        return self.df.shape[0]

In [None]:
dataset = DatasetRetriever(
    df=sub_df,
    path = test_paths,
    transforms=get_valid_transforms()
)

data_loader = DataLoader(
    dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    drop_last=False,
)

In [None]:
class EfficientNetModel(nn.Module):
    """
    Model Class for EfficientNet Model
    """
    def __init__(self, model_name, num_classes=4, pretrained=True):
        super(EfficientNetModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=3)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, num_classes)
        
    def forward(self, x):
        x = self.model(x)
        return x

models = []
mmodels = []

for wpath,mpath in zip(weightpath,modelpath):
    print(wpath,mpath)
    model = EfficientNetModel(mpath,pretrained=False).to(device)
    model.load_state_dict(torch.load(wpath))
    model.eval()
    models.append(model)

In [None]:
image_ids = []
outputs = np.empty((0,4))

with torch.no_grad():
    for image, histimage, image_id, in data_loader:
        image = image.cuda().float()
        histimage = histimage.cuda().float()
        for i,model in enumerate(models):
            if i==0: 
                output = model(image).softmax(dim=1)
            else:
                output += model(image).softmax(dim=1)
    
        output = output/len(models)
        
        outputs = np.append(outputs,output.cpu().detach().numpy(),axis=0)
        image_ids = np.append(image_ids,image_id)
print(outputs.shape)
print(image_ids.shape)
sub_df.id = image_ids
sub_df[label_cols] = outputs
sub_df_torch = sub_df.copy()

In [None]:
display(sub_df_torch,sub_df_tf)
sub_df[label_cols] = (sub_df_tf[label_cols] + sub_df_torch[label_cols])/2
display(sub_df)
df_study = sub_df.copy()

In [None]:
for i in range(study_len):
    negative = df_study.loc[i,'negative']
    typical = df_study.loc[i,'typical']
    indeterminate = df_study.loc[i,'indeterminate']
    atypical = df_study.loc[i,'atypical']
    df_study.loc[i, 'PredictionString'] = f'negative {negative} 0 0 1 1 typical {typical} 0 0 1 1 indeterminate {indeterminate} 0 0 1 1 atypical {atypical} 0 0 1 1'
df_study = df_study[["id","PredictionString"]]
display(df_study)

In [None]:
del model
del models

# Pytorch 2-class

In [None]:
if fast_sub:
    sub_df = fast_df.copy()
else:
    sub_df = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
sub_df = sub_df[study_len:].reset_index(drop=True)
test_paths = f'/kaggle/tmp/{split}/image/' + sub_df['id'] +'.png'
sub_df['none'] = 0
label_cols = sub_df.columns[2]
sub_df

In [None]:
def get_valid_transforms():
    return A.Compose(
        [
            #A.Resize(height=600, width=600, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        
    )

DATA_ROOT_PATH = '/kaggle/tmp/test/study'

class DatasetRetriever(Dataset):

    def __init__(self, df, path, transforms=None):
        super().__init__()
        self.df = df
        self.path = path
        self.transforms = transforms

    def __getitem__(self, index: int):
        path = self.path[index]
        image_id = self.df.id[index]
        image = cv2.imread(path,cv2.IMREAD_COLOR)
        image = image.astype(np.float32)
        image /= 255.0
        cimage = A.CenterCrop(height=600,width=600,p=1.0)(image=image)["image"]
        rimage = cv2.resize(image,(600,600))
        image = ToTensorV2(p=1.0)(image=image)["image"]
        cimage = ToTensorV2(p=1.0)(image=cimage)["image"]
        rimage = ToTensorV2(p=1.0)(image=rimage)["image"]
        
        #assert(image.shape == (3,600,600))
        return image, cimage, rimage, image_id

    def __len__(self) -> int:
        return self.df.shape[0]

In [None]:
nonemodels = []
for wpath,mpath in zip(nonewpath,nonemodelpath):
    model = EfficientNetModel(mpath,pretrained=False,num_classes=1).to(device)
    model.load_state_dict(torch.load(wpath))
    model.eval()
    nonemodels.append(model)

In [None]:
dataset = DatasetRetriever(
    df=sub_df,
    path = test_paths,
    transforms=get_valid_transforms()
)

data_loader = DataLoader(
    dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    drop_last=False,
)

In [None]:
image_ids = []
noneout = []

#outputs = []
with torch.no_grad():
    for image,cimage,rimage, image_id, in data_loader:
        image = image.cuda().float()
        cimage = cimage.cuda().float()
        rimage = rimage.cuda().float()
        output0 = nonemodels[0](rimage).sigmoid() 
        output1 = nonemodels[1](image).sigmoid() 
        output2 = nonemodels[2](rimage).sigmoid() 
        output3 = nonemodels[3](image).sigmoid()
        output4 = nonemodels[4](cimage).sigmoid()

        outputn = (output0+output1+output2+output3+output4)/5
        noneout = np.append(noneout,outputn.cpu().detach().numpy())
        image_ids = np.append(image_ids,image_id)
print(noneout.shape)
print(image_ids.shape)
sub_df[label_cols] = noneout
df_2class_torch = sub_df.reset_index(drop=True).copy()

In [None]:
display(df_2class_tf,df_2class_torch)
sub_df[label_cols] = (df_2class_tf[label_cols] + df_2class_torch[label_cols])/2.0
display(sub_df)
df_2class = sub_df.copy()

In [None]:
del model
del nonemodels

# image classification end 

# image level start

## image level path list

In [None]:
weights_dir = ['/kaggle/input/yolo-bestresult/best_fold0.pt',
               '/kaggle/input/yolo-bestresult/best_fold1.pt',
               '/kaggle/input/yolo-bestresult/best_fold2.pt',
              ]

paths = [
         
         "/kaggle/input/covid19-effneteffdet/d3_image640/fold4-d3_0.660.bin",
         "/kaggle/input/covid19-effneteffdet/fold0-cspresdext50pan_0.650.bin",
        ]

model_names = [
    "tf_efficientdet_d3",
    "cspresdext50pan",
]

det_weights = [1,1]
weight_detyolo = [1,1]

# yolo

In [None]:
import numpy as np, pandas as pd
from glob import glob
import shutil, os
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
import seaborn as sns
import torch


dim = 640 #1024, 256, 'original'
test_dir = f'/kaggle/tmp/{split}/image'


shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5') # install dependencies


In [None]:
!python detect.py \
--weights {weights_dir[0]} {weights_dir[1]} {weights_dir[2]} \
--img 640\
--conf 0.001\
--iou 0.5\
--source $test_dir\
--name infer_fold \
--save-txt --save-conf --exist-ok

labelpaths = glob("runs/detect/infer_fold/labels/*")

In [None]:
meta = meta[meta['split'] == 'test']
if fast_sub:
    test_df = fast_df.copy()
else:
    test_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
test_df = df[study_len:].reset_index(drop=True) 
meta['image_id'] = meta['image_id'] + '_image'
meta.columns = ['id', 'dim0', 'dim1', 'split']
test_df = pd.merge(test_df, meta, on = 'id', how = 'left')

test_df

In [None]:
predbox_ens = []
image_ids = []
score = []
box = []
for file_path in tqdm(labelpaths):
    image_id = file_path.split('/')[-1].split('.')[0]
    w, h = test_df.loc[test_df.id==image_id,['dim1', 'dim0']].values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    data = data[:, [0, 5, 1, 2, 3, 4]]
    data = np.concatenate([data[:, :2], yolo2a(data[:, 2:])],axis=1)
    
    if fast_sub:
        predbox_ens.append(norm2hw(h, w, data[:,2:]))
    
    
    image_ids.append(image_id)
    score.append(data[:,1])
    box.append(data[:,2:])

pred_df_yolo = pd.DataFrame({'id':image_ids,'score':score,'label':1,'box':box})

In [None]:
pred_df_yolo

In [None]:
if fast_sub:
    box_plot(tp1,predbox_ens[0])
    box_plot(tp2,predbox_ens[1])

# effdet

In [None]:
!pip install --no-deps '/kaggle/input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl'
!pip install "/kaggle/input/effdet-latestvinbigdata-wbf-fused/omegaconf-2.0.6-py3-none-any.whl"

In [None]:
import sys
sys.path.insert(0, "/kaggle/input/effdet-latestvinbigdata-wbf-fused/efficientdet-pytorch/")
sys.path.insert(0, "/kaggle/input/timm-pytorch-image-models/pytorch-image-models-master/")
sys.path.insert(0, "/kaggle/input/weightedboxesfusion")

import torch
import os

import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from effdet import get_efficientdet_config, EfficientDet, DetBenchPredict
from effdet.efficientdet import HeadNet
from glob import glob
import gc
from ensemble_boxes import *

In [None]:
#meta = meta[meta['split'] == 'test']
if fast_sub:
    test_df = fast_df.copy()
else:
    test_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
test_df = df[study_len:].reset_index(drop=True) 
#meta['image_id'] = meta['image_id'] + '_image'
#meta.columns = ['id', 'dim0', 'dim1', 'split']
test_df = pd.merge(test_df, meta, on = 'id', how = 'left')

test_df

In [None]:
def get_valid_transforms():
    return A.Compose(
        [
            #A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        
    )

DATA_ROOT_PATH = '/kaggle/tmp/test/image'

class DatasetRetriever(Dataset):

    def __init__(self, df, transforms=None):
        super().__init__()
        self.df = df
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.df.id.values[index]
        image = cv2.imread(f'{DATA_ROOT_PATH}/{image_id}.png',cv2.IMREAD_COLOR)
        dim0 = self.df.dim0.values[index]
        dim1 = self.df.dim1.values[index]
        image = image.astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        
        return sample['image'], image_id, dim0, dim1

    def __len__(self) -> int:
        return self.df.shape[0]

In [None]:
dataset = DatasetRetriever(
    df=test_df,
    transforms=get_valid_transforms()
)

data_loader = DataLoader(
    dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    drop_last=False,
)

In [None]:
def load_net(checkpoint_path,model_name):
    config = get_efficientdet_config(model_name)
    config.image_size = [640,640]
    config.norm_kwargs=dict(eps=.001, momentum=.01)
    net = EfficientDet(config, pretrained_backbone=False)
    net.reset_head(num_classes=1)
    net.class_net = HeadNet(config, num_outputs=config.num_classes)

    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])
    del checkpoint
    gc.collect()

    evalnet = DetBenchPredict(net)
    evalnet.eval();
    return evalnet.cuda()



In [None]:
nets = []
for path, mname in zip(paths,model_names):
    print(path)
    model = load_net(path,mname)
    nets.append(model)

In [None]:
def make_predictions(images, nets, score_threshold=0.001):
    images = images.cuda().float()
    predictions = []
    dets = []
    with torch.no_grad():
        for model in nets:
            dets.append(model(images))
        for i in range(images.shape[0]):
            boxes = [det[i].detach().cpu().numpy()[:,[1,0,3,2]]/640.0 for det in dets]
            scores = [det[i].detach().cpu().numpy()[:,4] for det in dets]
            label = [det[i].detach().cpu().numpy()[:,5] for det in dets]            
            boxes, scores, label  = weighted_boxes_fusion(boxes,
                                                          scores,
                                                          label,
                                                          weights=det_weights,
                                                          iou_thr=0.6,
                                                          conf_type='avg',
                                                          skip_box_thr=0.01)
            
            indexes = np.where(scores > score_threshold)[0]
            boxes = boxes[indexes]
            scores = scores[indexes]
            label = label[indexes]
            predictions.append({
                'boxes': boxes,
                'scores': scores,
                'label': label
            })
    return predictions

def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("opacity {0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))
    return " ".join(pred_strings)

In [None]:
results = []
boxess = []
for j,(images, image_ids, dim0, dim1) in enumerate(data_loader):
    predictions = make_predictions(images, nets)
    for i,prediction in enumerate(predictions):
        boxes = prediction['boxes']
        scores = prediction['scores']
        label = prediction['label']
        image_id = image_ids[i]
        
        index = pred_df_yolo[pred_df_yolo.id==image_id].index
        yolorow = pred_df_yolo.iloc[index]
        boxes_y = yolorow.box.values[0]
        scores_y = yolorow.score.values[0]
        labels_y = label
        ens_box = [boxes,boxes_y]
        ens_score = [scores,scores_y]
        ens_label = [label,labels_y]
        
        
        
        boxes, scores, label  = weighted_boxes_fusion(ens_box,
                                                      ens_score,
                                                      ens_label,
                                                      weights=weight_detyolo,
                                                      iou_thr=0.6,
                                                      conf_type='avg',
                                                      skip_box_thr=0.01)
        
        boxes[:,0], boxes[:,2] = boxes[:,0]*dim1[i].item(), boxes[:,2]*dim1[i].item()
        boxes[:,1], boxes[:,3] = boxes[:,1]*dim0[i].item(), boxes[:,3]*dim0[i].item()  
        boxes = boxes.astype(np.int32)
        if fast_sub:
            boxess.append(boxes)
        result = {
            'id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }
        results.append(result)
pred_df = pd.DataFrame(results, columns=['id', 'PredictionString'])
pred_df

In [None]:
pred_df['none'] = df_2class['none'] 
pred_df

In [None]:
for i in range(sub_df.shape[0]):
    if pred_df.loc[i,'none']< 0.95:
        pred_df.loc[i,'PredictionString'] = pred_df.loc[i,'PredictionString'] + ' none ' + str(pred_df.loc[i,'none']) + ' 0 0 1 1'
    else:
        pred_df.loc[i,'PredictionString'] = 'none 1 0 0 1 1'
pred_df = pred_df[['id', 'PredictionString']]   
pred_df

In [None]:
if fast_sub:
    box_plot(tp1,boxess[0])
    box_plot(tp2,boxess[1])

In [None]:
df_study = df_study.append(pred_df).reset_index(drop=True)
df_study = df_study[['id','PredictionString']]
df_study.to_csv('/kaggle/working/submission.csv',index = False)
df_study