# **EfficientDet Pytorch Starter**

Hi Everyone,

I couldnt see an existing one around, so thought I would create a quick starter for an efficientdet pipeline using pytorch. This kernel uses the package by Ross Wightman available at github [here](https://github.com/rwightman/efficientdet-pytorch) (albeit a rather old version for compatibility)

Thanks to Alex Shonenkov [here](https://www.kaggle.com/shonenkov/training-efficientdet), as this is losely based on his starter for the GWD competition. I had to make some changes to the timm-efficiendet package to get it to work correctly.

For the people who haven't seen it before, EfficientDet is a one shot object detection model originally published by Mingxing Tan, Ruoming Pang, Quoc V. Le from Google Research. The paper is available [here](https://arxiv.org/abs/1911.09070). It's performance compared to other current object detection models is shown below:

![image.png](https://github.com/google/automl/blob/master/efficientdet/g3doc/flops.png?raw=true)

[Source](https://github.com/google/automl/blob/master/efficientdet/g3doc/flops.png)

Right, let's start the code. Lets do our installs:

In [None]:
#Need to install these - Only in the submission version for DICOM processing. Not needed here
#!conda install -c conda-forge pillow -y
#!conda install -c conda-forge pydicom -y
#!conda install -c conda-forge gdcm -y
#!pip install pylibjpeg pylibjpeg-libjpeg
!pip install --no-deps '../input/timm-package/timm-0.1.26-py3-none-any.whl' > /dev/null
!pip install --no-deps '../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl' > /dev/null

Now we build our training bench for EfficientDet B1 (or just replace with whichever EfficientDet level you wish to use). Thanks to user @mathurinache for the pretrained weights [here](https://www.kaggle.com/mathurinache/efficientdet).

In [None]:
import sys
sys.path.insert(0, "../input/timmefficienctdetpytorchstable/archive")
sys.path.insert(0, "../input/omegaconf")
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)  
import torch
import torch.nn.functional as F
import torchvision
import traceback
from tqdm import tqdm
from torch.utils.data import Dataset
import torchvision.models as models
from PIL import Image
from torch import optim
from effdet import *
from effdet.efficientdet import HeadNet
from effdet.anchors import Anchors, AnchorLabeler, generate_detections
from effdet.loss import DetectionLoss

class EfficientDetTrainer(torch.nn.Module):
    
    def __init__(self, model, config, device):
        
        super(EfficientDetTrainer, self).__init__()
        self.model = model
        self.config = config
        self.my_anchors = Anchors(
                config.min_level, config.max_level,
                config.num_scales, config.aspect_ratios,
                config.anchor_scale, config.image_size, device)
        self.a = AnchorLabeler(self.my_anchors, 1, match_threshold=0.5)
        self.loss_fn = DetectionLoss(config)     
            
    def forward(self, x, boxes, classes):
        
        class_out, box_out = self.model(x)
        cls_targets = []
        box_targets = []
        num_positives = []
        
        for i in range(inputs.shape[0]):
            gt_class_out, gt_box_out, num_positive = self.a.label_anchors(boxes[i], classes[i])
            cls_targets.append(gt_class_out)
            box_targets.append(gt_box_out)
            num_positives.append(num_positive)
                
        loss, class_loss, box_loss = self.loss_fn(class_out, box_out, cls_targets, box_targets, num_positives)
        return loss

def get_efficientDet():
    
    config = get_efficientdet_config('tf_efficientdet_d1')
    net = EfficientDet(config, pretrained_backbone=False)
    config.num_classes = 1
    config.image_size = 512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    return net, config

model, config = get_efficientDet()



Next, we create our dataset class and our dataloaders. Thanks to xhulu [here](https://www.kaggle.com/xhlulu/siim-covid-19-convert-to-jpg-256px) for the converted images (DICOM to jpg and re-sized to 512px). I have only used images with bounding boxes for training (i.e. no background only classes). I suspect this may not be a good strategy and needs to be investigated......

In [None]:
#Here we are getting only the images with bounding boxes for training
error_list = ['none 1 0 0 1 1']
import os
files = []
for dirname, _, filenames in os.walk('/kaggle/input/siim-covid19-detection/train/'):
    for filename in filenames:
        files.append(os.path.join(dirname, filename))
        
Normalizer = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                             torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])])


csv_image_trainer = pd.read_csv("/kaggle/input/siim-covid19-detection/train_image_level.csv")
csv_study_trainer = pd.read_csv("/kaggle/input/siim-covid19-detection/train_study_level.csv")
meta_csv = pd.read_csv('/kaggle/input/resize512px/meta.csv')


localizer_files = []
#Remove all the ones with only background for training regression model
for file in files:
    file = file.split('/')
    image_file = file[7].split('.')[0] + '_image'
    image_row = csv_image_trainer.loc[csv_image_trainer['id'] == image_file]
    a = image_row['label'].to_list()
    if a != error_list:
        localizer_files.append(file)
    
    
print(len(files))
print(len(localizer_files))

# ======================
# ======================
# Params
BATCH_SIZE = 2
N_WORKERS = 4
N_EPOCHS = 3 # You will obviously want to train it for more....

def collate_fn(batch):
    return tuple(zip(*batch))

class COVIDXRay_Dataset(Dataset):

    def __init__(self, dcm_file_list, transform, csv_image_trainer, csv_study_trainer, meta_csv):

        self.dcm_file_list = dcm_file_list
        self.transform = transform
        self.csv_image_trainer = csv_image_trainer
        self.csv_study_trainer = csv_study_trainer
        self.meta_csv = meta_csv

    def __len__(self):
        
        return len(self.dcm_file_list)

    def __getitem__(self, idx):
        
        file = self.dcm_file_list[idx]
        #Now our image level ground truth
        image_file = file[7].split('.')[0] + '_image'
        image_dir = '/kaggle/input/resize512px/train/' + file[7].split('.')[0] + '.jpg'
        img =  Image.open(image_dir).convert('RGB')
        
        #Get from Xhulu's Notebook
        metadata_record = self.meta_csv.loc[self.meta_csv['image_id'] == file[7].split('.')[0]]
        #Reversed from what we are expecting....
        width = metadata_record['dim1']
        height = metadata_record['dim0']
        w_factor = 512/width
        h_factor = 512/height
        img = self.transform(img)
        
        #First of all we get our study level ground truth
        study_file = file[5] + '_study'
        study_row = csv_study_trainer.loc[csv_study_trainer['id'] == study_file]
        study_tgt = study_row.values[0][1:5].astype(np.float)
        
        #Now our image level ground truth
        image_file = file[7].split('.')[0] + '_image'
        image_row = csv_image_trainer.loc[csv_image_trainer['id'] == image_file]

        #Parse the boxes
        in_boxes = str(image_row['label'].values[0]).split(' ')
        q = int(len(in_boxes)/6)
        boxes = np.zeros((q,4))
        classes = np.ones((q))
        #Not sure this is the best way to do this
        for i in range(0,q):
            offset = i * 6
            #boxes[i,0] = float(in_boxes[2+offset]) * w_factor #x1
            #boxes[i,1] = float(in_boxes[3+offset]) * h_factor #y1
            #boxes[i,2] = float(in_boxes[4+offset]) * w_factor #x2
            #boxes[i,3] = float(in_boxes[5+offset]) * h_factor #y2
            #yxyx format here due to original tensorflow implementation            
            boxes[i,1] = float(in_boxes[2+offset]) * w_factor #y1
            boxes[i,0] = float(in_boxes[3+offset]) * h_factor #x1
            boxes[i,3] = float(in_boxes[4+offset]) * w_factor #y2
            boxes[i,2] = float(in_boxes[5+offset]) * h_factor #x2
            
        target = {"bbox":torch.Tensor(boxes),"cls":torch.Tensor(classes)}        
        study_tgt = torch.Tensor(study_tgt)
        return img, target, study_tgt
    
    
train_dataset = COVIDXRay_Dataset(
        localizer_files, Normalizer, csv_image_trainer, csv_study_trainer, meta_csv
)
    

overall_length = len(train_dataset)
validation_length = int(len(train_dataset) * 0.2) + 1
train_length = int(len(train_dataset) * 0.8)

train_dataset, validation_dataset = torch.utils.data.random_split(train_dataset, [train_length, validation_length])

data_loader_train = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=N_WORKERS,
    shuffle=True,
    collate_fn=collate_fn
)

data_loader_test = torch.utils.data.DataLoader(
    validation_dataset,
    batch_size=BATCH_SIZE,
    num_workers=N_WORKERS,
    shuffle=True,
    collate_fn=collate_fn
)



Ok, we are nearly ready to go. We create our instance for the trainer and add any callbacks we need. Then we start our training/validation loop for the required number of epochs. We save our weights file when the testing loss decreases.

In [None]:
device = torch.device("cuda:0")
#Move it to the device
model = model.to(device)

#Optimizer
my_trainer = EfficientDetTrainer(model, config, device)
optimizer = optim.Adam(model.parameters(), lr=.00005)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, 
                                           patience=3, threshold=0.0001, threshold_mode='rel', 
                                           cooldown=0, min_lr=0, eps=1e-08, verbose=False)
best_loss = 99999

try:
    for epoch in range(N_EPOCHS):

        print('Epoch {}/{}'.format(epoch, N_EPOCHS - 1))
        print('-' * 10)

        model.train()
        tr_loss = 0
        tst_loss = 0

        tk0 = tqdm(data_loader_train, desc="Iteration")
        
        for batch_idx, (inputs, target, study_tgt) in enumerate(tk0):
            
            boxes = [torch.Tensor(t['bbox']).to(device).float() for t in target]
            classes = [torch.Tensor(t['cls']).to(device).float() for t in target]
            inputs = torch.stack(inputs)
            inputs = inputs.to(device).float()
            loss = my_trainer(inputs,boxes,classes)
            loss.backward()
            tr_loss += loss.item()
            optimizer.step()
            optimizer.zero_grad()
              
        epoch_loss = tr_loss / len(data_loader_train)
        
        print('Training Localization Loss: {:.4f}'.format(epoch_loss))
        #Leave in training mode for now - unless we need to make the switch for Evaluation mode
        #model.eval()

        for batch_idx, (inputs, target, study_tgt) in enumerate(data_loader_test):
            
            boxes = [torch.Tensor(t['bbox']).to(device).float() for t in target]
            classes = [torch.Tensor(t['cls']).to(device).float() for t in target]
            inputs = torch.stack(inputs)
            inputs = inputs.to(device).float()
            loss = my_trainer(inputs,boxes,classes)
            tst_loss += loss.item()
            study_tgt = [t.to(device) for t in study_tgt]
            
        epoch_loss = tst_loss / len(data_loader_test)
        scheduler.step(epoch_loss)
        
        print('Testing Loss: {:.4f}'.format(epoch_loss))
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            model.eval()
            torch.save(model.state_dict(), './weights.pth')
            print('Test Loss Improved....Saving Model')
            
except:
    traceback.print_exc(file=sys.stdout)

Thanks everyone for reading. I hope it's useful. Let me know if any of the steps need to be described in more detail.