# DETR Object Detection for localization and classification of thoracic abnormalities 
Authors: Haris Mpournas & Elena Stamatelou


## 1. Select mode
mode='train' for the training, mode='predict' for the predictions
uncomment the one you want to enable and comment the one you want to enable 

In [1]:
mode = 'train'
#mode = 'predict'

#### For **mode = 'train**',
1) Add data --> Competitions Data --> Search for "VinBigData Chest X-ray Abnormalities Detection" </br>
2) Add data --> Datasets --> Search for "vinbigdata-chest-xray-original-png"</br>
3) Enable the GPU in the Settings --> Accelarator --> GPU</br>
The output of the mode is "detr_model.pth"</br>

#### For **mode = 'predict'**, 
1) Go to the outputs of the previous mode (train mode "detr_model.pth"), select "Add new version" and keep the created URL</br>
2) Go back to Kaggle's notebook --> Add data --> Datasets --> Search by URL with the saved URL from the last step</br>
3) Εnable the CPU in the Settings --> Accelarator --> CPU</br>
 

## 2. Import libraries

In [2]:
# clone github repo of detr
!git clone https://github.com/facebookresearch/detr.git   

# general libraries
import os
import numpy as np 
import pandas as pd 
from datetime import datetime
import time
import random
from tqdm.autonotebook import tqdm
import re
import pydicom
import warnings
warnings.filterwarnings("ignore")
from matplotlib import pyplot as plt
from PIL import Image

# torch.
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader, Dataset

# sklearn
from sklearn.model_selection import StratifiedKFold
from sklearn import model_selection

# CV
import cv2

# DETR FUCNTIONS FOR LOSS
import sys
sys.path.append('./detr/')

from detr.models.matcher import HungarianMatcher
from detr.models.detr import SetCriterion

# albumenatations
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2

# Glob
from glob import glob

# ensembling 
!pip install ensemble_boxes
from tqdm import tqdm
from ensemble_boxes import *

# mAP
!pip install map_boxes 
from map_boxes import mean_average_precision_for_boxes

Cloning into 'detr'...
remote: Enumerating objects: 243, done.[K
remote: Total 243 (delta 0), reused 0 (delta 0), pack-reused 243[K
Receiving objects: 100% (243/243), 12.84 MiB | 13.66 MiB/s, done.
Resolving deltas: 100% (127/127), done.


  # This is added back by InteractiveShellApp.init_path()


Collecting ensemble_boxes
  Downloading ensemble_boxes-1.0.6-py3-none-any.whl (20 kB)
Installing collected packages: ensemble-boxes
Successfully installed ensemble-boxes-1.0.6
Collecting map_boxes
  Downloading map_boxes-1.0.5-py3-none-any.whl (5.1 kB)
Installing collected packages: map-boxes
Successfully installed map-boxes-1.0.5


In [3]:
# thoracic abnormalities (classes)
CLASSES = [
    'Aortic enlargement', 'Atelectasis', 'Calcification', 'Cardiomegaly', 'Consolidation',
    'ILD', 'Infiltration', 'Lung Opacity', 'Nodule/Mass', 'Other lesion', 
    'Pleural effusion', 'Pleural thickening', 'Pneumothorax', 'Pulmonary fibrosis', 'No Finding'
]

# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

## 3. Preprocessing image metadata

In [4]:
def read_images():
    # read the images with size 512x512 
    # add the dataset in the data section if it is not added yet
    train_df = pd.read_csv('../input/vinbigdata-512-image-dataset/vinbigdata/train.csv')
    train_df.fillna(0, inplace=True)
    return train_df


### 3.1 Scale images

In [5]:
def scale_images(train_df):
    # scale the coordinates of the bounding boxes from their initial values to fit the 512x512 images
    # set to the images with no object (class 14), bounding box with coordinates [xmin=0 ymin=0 xmax=1 ymax=1]
    train_df.loc[train_df["class_id"] == 14, ['x_max', 'y_max']] = 1.0
    train_df.loc[train_df["class_id"] == 14, ['x_min', 'y_min']] = 0

    # scale the input image coordinates to fit 512x512 image
    IMG_SIZE = 512
    train_df['xmin'] = (train_df['x_min']/train_df['width'])*IMG_SIZE
    train_df['ymin'] = (train_df['y_min']/train_df['height'])*IMG_SIZE
    train_df['xmax'] = (train_df['x_max']/train_df['width'])*IMG_SIZE
    train_df['ymax'] = (train_df['y_max']/train_df['height'])*IMG_SIZE

    # set to the images with no object (class 14), bounding box with coordinates [xmin=0 ymin=0 xmax=1 ymax=1]
    train_df.loc[train_df["class_id"] == 14, ['xmax', 'ymax']] = 1.0
    train_df.loc[train_df["class_id"] == 14, ['xmin', 'ymin']] = 0
    return train_df

### 3.2 Define folds

In [6]:
def define_folds(train_df):
    unique_images = train_df["image_id"].unique()
    df_split = pd.DataFrame(unique_images, columns = ['unique_images']) 

    # create one column with the number of fold (for the k-fold cross validation)
    df_split["kfold"] = -1
    df_split = df_split.sample(frac=1).reset_index(drop=True)
    y = df_split.unique_images.values
    kf = model_selection.GroupKFold(n_splits=5)
    for f, (t_, v_) in enumerate(kf.split(X=df_split, y=y, groups=df_split.unique_images.values)):
        df_split.loc[v_, "kfold"] = f

    # annotated boxes from same "image id" (image) should be in the same fold [during training each image with its boxes is as one input]
    train_df["kfold"] = -1
    for ind in train_df.index: 
         train_df["kfold"][ind] = df_split.loc[ df_split["unique_images"] ==  train_df["image_id"][ind]]["kfold"]

    train_df.set_index('image_id', inplace=True)
    return train_df

### 3.3 Weight boxes fusion 

In [7]:
def boxes_fusion(df):
    # apply weighted boxes fusion for ensemling overlapping annotated boxes
    # Default WBF config 
    iou_thr = 0.75
    skip_box_thr = 0.0001
    sigma = 0.1
    results = []
    image_ids = df.index.unique()
   
    for image_id in tqdm(image_ids, total=len(image_ids)):
        # All annotations for the current image.
        data = df[df.index == image_id]
        kfold = data['kfold'].unique()[0]
        data = data.reset_index(drop=True)
        
        # WBF expects the coordinates in 0-1 range.
        max_value = data.iloc[:, 4:].values.max()
        data.loc[:, ["xmin", "ymin", "xmax", "ymax"]] = data.iloc[:, 4:] / max_value
        #print("data",data)
        if data.class_id.unique()[0] !=14:
            annotations = {}
            weights = []
            # Loop through all of the annotations
            for idx, row in data.iterrows():
                rad_id = row["rad_id"]
                if rad_id not in annotations:
                    annotations[rad_id] = {
                        "boxes_list": [],
                        "scores_list": [],
                        "labels_list": [],
                    }
                    # We consider all of the radiologists as equal.
                    weights.append(1.0)
                annotations[rad_id]["boxes_list"].append([row["xmin"], row["ymin"], row["xmax"], row["ymax"]])
                annotations[rad_id]["scores_list"].append(1.0)
                annotations[rad_id]["labels_list"].append(row["class_id"])

            boxes_list = []
            scores_list = []
            labels_list = []

            for annotator in annotations.keys():
                boxes_list.append(annotations[annotator]["boxes_list"])
                scores_list.append(annotations[annotator]["scores_list"])
                labels_list.append(annotations[annotator]["labels_list"])

            # Calculate WBF
            boxes, scores, labels = weighted_boxes_fusion(boxes_list,
                scores_list,
                labels_list,
                weights=weights,
                iou_thr=iou_thr,
                skip_box_thr=skip_box_thr
            )
            for idx, box in enumerate(boxes):
                results.append({
                    "image_id": image_id,
                    "class_id": int(labels[idx]),
                    "rad_id": "wbf",
                    "xmin": box[0]* max_value,
                    "ymin": box[1]* max_value,
                    "xmax": box[2]* max_value,
                    "ymax": box[3]* max_value,
                    "kfold":kfold,
                })
        # if class is nothing then have it once (instead of 3 times in the same image)
        if data.class_id.unique()[0] ==14:
            for idx, box in enumerate([0]):
                results.append({
                    "image_id": image_id,
                    "class_id": data.class_id[0],
                    "rad_id": "wbf",
                    "xmin": 0,
                    "ymin": 0,
                    "xmax": 1,
                    "ymax": 1,
                    "kfold":kfold,
                })
            
    results = pd.DataFrame(results)
    return results

### 3.4 Pascal to coco

In [8]:
def pascal_to_coco(train_df):
    # Good exlanation of coco, pascal etc 
    # https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/
    train_df['coco_x'] = train_df['xmin'] + (train_df['xmax'] - train_df['xmin'] )/2
    train_df['coco_y'] = train_df['ymin'] + (train_df['ymax'] - train_df['ymin'] )/2
    train_df['coco_w'] = train_df['xmax'] - train_df['xmin'] 
    train_df['coco_h'] = train_df['ymax'] - train_df['ymin'] 

    train_df.loc[train_df['class_id'] == 14, 'coco_x'] = 1
    train_df.loc[train_df['class_id'] == 14, 'coco_y'] = 1
    train_df.loc[train_df['class_id'] == 14, 'coco_w'] = 0.5
    train_df.loc[train_df['class_id'] == 14, 'coco_h'] = 0.5
    
    return train_df

### 3.5 Main preprocessing function

In [9]:
def preprocessing():
    train_df = read_images()
    train_df = scale_images(train_df)
    train_df = define_folds(train_df)
    train_df = boxes_fusion(train_df)
    train_df.set_index('image_id', inplace=True)
    train_df = pascal_to_coco(train_df)
    return train_df


## 4. Creating Image Dataset class


In [10]:
def get_train_transforms():
    # image augmentations for the training set
    return A.Compose([A.ToGray(p=0.01),
                      A.Cutout(num_holes=10, max_h_size=32, max_w_size=32, fill_value=0, p=0.5),
                      ToTensorV2(p=1.0)],
                      p=1.0,
                      bbox_params=A.BboxParams(format='coco',min_area=0, min_visibility=0,label_fields=['labels'])
                      )

def get_valid_transforms():
    # image augmentations for the validation set
    return A.Compose([ToTensorV2(p=1.0)], 
                      p=1.0, 
                      bbox_params=A.BboxParams(format='coco',min_area=0, min_visibility=0,label_fields=['labels'])
                      )

In [11]:
DIR_TRAIN = '../input/vinbigdata-chest-xray-abnormalities-detection/train'
DIR_TRAIN_PNG = '../input/vinbigdata-512-image-dataset/vinbigdata/train'

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

class VinDataset(Dataset):
    def __init__(self,image_ids,dataframe,transforms=None):
        self.image_ids = image_ids
        self.df = dataframe
        self.transforms = transforms
        
    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def __getitem__(self,index):
        image_id = self.image_ids[index]
        records = self.df.loc[image_id]
        labels = records['class_id']
        
        image = cv2.imread(f'{DIR_TRAIN_PNG}/{image_id}.png', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        
        # DETR takes in data in coco format    
        boxes = records[['coco_x', 'coco_y', 'coco_w', 'coco_h']].values
     
        # AS pointed out by PRVI It works better if the main class is labelled as zero
        labels =  np.array(labels)
    
        if boxes.ndim == 1 : 
            boxes = np.expand_dims(boxes, axis=0)
            labels = np.expand_dims(labels, axis=0)
        
        # AS pointed out by PRVI It works better if the main class is labelled as zero
        labels =  np.array(labels)

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': boxes,
                'labels': labels
            }

        sample = self.transforms(**sample)
        image = sample['image']
        boxes = sample['bboxes']
        labels = sample['labels']
        
        # Normalizing BBOXES
        _,h,w = image.shape
        boxes = A.augmentations.bbox_utils.normalize_bboxes(sample['bboxes'],rows=h,cols=w)

        target = {}
        target['boxes'] = torch.as_tensor(boxes,dtype=torch.float32)
        target['labels'] = torch.as_tensor(labels,dtype=torch.long)
        target['image_id'] = torch.tensor([index])

        return image/255, target, image_id    

## 5. DETR model initialization

In [12]:
import torch.nn.functional as F
class DETRModel(nn.Module):
    def __init__(self,num_classes,num_queries):
        super(DETRModel,self).__init__()
        self.num_classes = num_classes
        self.num_queries = num_queries
        self.model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
        
        for param in self.model.parameters():
            param.requires_grad = True


        self.in_features = self.model.class_embed.in_features
        
        self.model.class_embed = nn.Linear(in_features=self.in_features,out_features=self.num_classes+1)
        self.model.num_queries = self.num_queries
        
    def forward(self,images):
        return self.model(images)

## 6. Modeling functions

### 6.1 Average meter

In [13]:
# AverageMeter - class for averaging loss,metric,etc over epochs
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

### 6.2 Training Function

Training of DETR is unique and different from FasteRRcnn  and EfficientDET , as we train the criterion as well , the training function can be viewed here : https://github.com/facebookresearch/detr/blob/master/engine.py

In [14]:
def train_fn(data_loader,model,criterion,optimizer,device,scheduler,epoch):
    model.train()
    criterion.train()
    
    summary_loss = AverageMeter()
    
    tk0 = tqdm(data_loader, total=len(data_loader))
    
    check_repeats = []
    for step, (images, targets, image_ids) in enumerate(tk0):
            if image_ids in check_repeats:
                continue
            else:
                check_repeats.append(image_ids)

                images = list(image.to(device) for image in images)
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                #print("images : {}".format(images))

                output = model(images)

                loss_dict = criterion(output, targets)
                weight_dict = criterion.weight_dict

                losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)

                optimizer.zero_grad()

                losses.backward()
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

                summary_loss.update(losses.item(),BATCH_SIZE)
                tk0.set_postfix(loss=summary_loss.avg)

    return summary_loss

### 6.3 Evaluation Function

In [15]:
# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32).to(device='cuda')
    return b

In [16]:
def eval_fn(data_loader, model,criterion, device):
    model.eval()
    criterion.eval()
    summary_loss = AverageMeter()
    map_df = pd.DataFrame()
    map_df_target = pd.DataFrame()
    
    with torch.no_grad():
        check_repeats_val = []
        tk0 = tqdm(data_loader, total=len(data_loader))
        for step, (images, targets, image_ids) in enumerate(tk0):
            if image_ids in check_repeats_val:
                continue
            else:
                check_repeats_val.append(image_ids)

                images = list(image.to(device) for image in images)
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                outputs = model(images)

                # MAP targets
                for count, label in enumerate(targets[0]['labels']):
                    text = f'{CLASSES[label]}' 
                    xmin = targets[0]['boxes'][count][0] - (targets[0]['boxes'][count][2])/2
                    xmax = targets[0]['boxes'][count][0] + (targets[0]['boxes'][count][2])/2  
                    ymin = targets[0]['boxes'][count][1] - (targets[0]['boxes'][count][3])/2
                    ymax = targets[0]['boxes'][count][1] + (targets[0]['boxes'][count][3])/2

                    data = pd.DataFrame({"ImageID": [image_ids[0]],"LabelName": [text],
                    "XMin": [xmin.item()], "XMax": [xmax.item()], "YMin": [ymin.item()], "YMax": [ymax.item()]})
                    map_df_target = map_df_target.append(data)                

                probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
                keep = probas.max(-1).values > 0.08
                boxes = rescale_bboxes(outputs['pred_boxes'][0, keep], (512,512))
                prob = probas[keep]

                colors = COLORS * 100
                for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):

                    cl = p.argmax()
                    text = f'{CLASSES[cl]}' 
                    
                    # Dataframe for MAP
                    data = pd.DataFrame({"ImageID": [image_ids[0]],"LabelName": [text], "Conf": [p[cl].item()], "XMin": [xmin/512], "XMax": [xmax/512], "YMin": [ymin/512], "YMax": [ymax/512]})
                    map_df = map_df.append(data)          

                loss_dict = criterion(outputs, targets)
                weight_dict = criterion.weight_dict

                losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)

                summary_loss.update(losses.item(),BATCH_SIZE)
                tk0.set_postfix(loss=summary_loss.avg)
        
        ann = map_df_target[['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']].values
        det = map_df[['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax']].values
        mean_ap, average_precisions = mean_average_precision_for_boxes(ann, det, iou_threshold=0.4)

        print("mean_ap : {}".format(mean_ap))
        print("average_precisions : {}".format(average_precisions))
        
    return summary_loss, mean_ap

### 6.4 Run DETR

In [17]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [18]:
def run(train_df, fold):
            
    df_train = train_df[train_df['kfold'] != fold]
    df_valid = train_df[train_df['kfold'] == fold]

    train_dataset = VinDataset(
    image_ids=df_train.index.values,
    dataframe=df_train,
    transforms=get_train_transforms()
    )

    valid_dataset = VinDataset(
    image_ids=df_valid.index.values,
    dataframe=df_valid,
    transforms=get_valid_transforms()
    )
    
    train_data_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
    )

    valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
    )
    
    # Bipartite Matching Loss
    matcher = HungarianMatcher()
    weight_dict = weight_dict = {'loss_ce': 1, 'loss_bbox': 1 , 'loss_giou': 1}
    losses = ['labels', 'boxes', 'cardinality']

    device = torch.device('cuda')
    model = DETRModel(num_classes=num_classes,num_queries=num_queries)
    model = model.to(device)
    criterion = SetCriterion(num_classes, matcher, weight_dict, eos_coef = null_class_coef, losses=losses)
    criterion = criterion.to(device)
    
    LR = 3e-5
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
   
    best_loss = 0
    val_loss_track_switch = 0
    all_train_losses = []
    all_valid_losses = []
    all_mean_ap = []
    columns = ['train_losses', 'valid_losses', 'mean_ap']
    df_losses = pd.DataFrame(columns = columns )
    df_losses.to_csv("all_losses.csv",mode='a', index=False)
    for epoch in range(EPOCHS):
        optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
        train_loss = train_fn(train_data_loader, model,criterion, optimizer,device,scheduler=None,epoch=epoch)
        if val_loss_track_switch % 2 == 0: 
            LR = LR/1.12        
            valid_loss, map_validation = eval_fn(valid_data_loader, model,criterion, device)
        val_loss_track_switch = val_loss_track_switch + 1
        
        df_losses = df_losses.append({'train_losses': train_loss.avg,'valid_losses': valid_loss.avg,'mean_ap': map_validation}, ignore_index=True)
        df_losses.to_csv("all_losses.csv",index=False, header=False,mode='a')
        df_losses.drop(df_losses.tail(1).index,inplace=True)
        
        print('|EPOCH {}| TRAIN_LOSS {}| VALID_LOSS {}|'.format(epoch+1,train_loss.avg,valid_loss.avg))
        
        if map_validation > best_loss:
            best_loss = map_validation
            print('Best model found for Fold {} in Epoch {}........Saving Model'.format(fold,epoch+1))
            torch.save(model.state_dict(), f'detr_model.pth')
    return model

## 7. Main training function 

In [19]:
n_folds = 5
seed = 42
num_classes = 15
num_queries = 2
null_class_coef = 0.2
BATCH_SIZE = 32
EPOCHS = 16

In [20]:
def model_training():
    train_df = preprocessing()
    import torch, gc
    gc.collect()
    torch.cuda.empty_cache()

    # run this function for training the model
    model = run(train_df, fold=0)
    return

In [21]:
if mode == 'train':
    model_training()

100%|██████████| 15000/15000 [03:19<00:00, 75.06it/s]
Downloading: "https://github.com/facebookresearch/detr/archive/master.zip" to /root/.cache/torch/hub/master.zip
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

Downloading: "https://dl.fbaipublicfiles.com/detr/detr-r50-e632da11.pth" to /root/.cache/torch/hub/checkpoints/detr-r50-e632da11.pth


  0%|          | 0.00/159M [00:00<?, ?B/s]

100%|██████████| 1031/1031 [26:57<00:00,  1.57s/it, loss=1.83]
100%|██████████| 8402/8402 [08:13<00:00, 17.01it/s, loss=1.06]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.166406 |    1100
Atelectasis                    | 0.000000 |      52
Calcification                  | 0.000000 |     164
Cardiomegaly                   | 0.219877 |     763
Consolidation                  | 0.000000 |     102
ILD                            | 0.032242 |     185
Infiltration                   | 0.041741 |     248
Lung Opacity                   | 0.038526 |     503
No Finding                     | 0.000228 |    2099
Nodule/Mass                    | 0.000316 |     502
Other lesion                   | 0.000027 |     393
Pleural effusion               | 0.078557 |     461
Pleural thickening             | 0.029007 |     918
Pneumothorax                   | 0.000000 |      26
Pulmonary fibrosis             | 0.016541 |     886
mAP: 0.041565
mean_ap : 0.04156452498219673
average_precisions : {'Aortic enla

100%|██████████| 1031/1031 [27:18<00:00,  1.59s/it, loss=1.41]
  0%|          | 0/1031 [00:00<?, ?it/s]

|EPOCH 2| TRAIN_LOSS 1.4051933574167763| VALID_LOSS 1.0564408876945575|


100%|██████████| 1031/1031 [27:28<00:00,  1.60s/it, loss=1.25]
100%|██████████| 8402/8402 [07:52<00:00, 17.78it/s, loss=0.674]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.257461 |    1100
Atelectasis                    | 0.038462 |      52
Calcification                  | 0.001505 |     164
Cardiomegaly                   | 0.239384 |     763
Consolidation                  | 0.022701 |     102
ILD                            | 0.048808 |     185
Infiltration                   | 0.087524 |     248
Lung Opacity                   | 0.104214 |     503
No Finding                     | 0.982415 |    2099
Nodule/Mass                    | 0.012309 |     502
Other lesion                   | 0.007792 |     393
Pleural effusion               | 0.160226 |     461
Pleural thickening             | 0.043079 |     918
Pneumothorax                   | 0.000000 |      26
Pulmonary fibrosis             | 0.040762 |     886
mAP: 0.136443
mean_ap : 0.1364428316403826
average_precisions : {'Aortic enlar

100%|██████████| 1031/1031 [27:20<00:00,  1.59s/it, loss=1.15]
  0%|          | 0/1031 [00:00<?, ?it/s]

|EPOCH 4| TRAIN_LOSS 1.1526833414800184| VALID_LOSS 0.6735665024270614|


100%|██████████| 1031/1031 [27:22<00:00,  1.59s/it, loss=1.08]
100%|██████████| 8402/8402 [07:35<00:00, 18.46it/s, loss=0.597]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.249572 |    1100
Atelectasis                    | 0.051676 |      52
Calcification                  | 0.028067 |     164
Cardiomegaly                   | 0.325199 |     763
Consolidation                  | 0.051275 |     102
ILD                            | 0.036273 |     185
Infiltration                   | 0.083420 |     248
Lung Opacity                   | 0.155373 |     503
No Finding                     | 0.991392 |    2099
Nodule/Mass                    | 0.028511 |     502
Other lesion                   | 0.017012 |     393
Pleural effusion               | 0.191579 |     461
Pleural thickening             | 0.053559 |     918
Pneumothorax                   | 0.000000 |      26
Pulmonary fibrosis             | 0.080542 |     886
mAP: 0.156230
mean_ap : 0.15622996355203783
average_precisions : {'Aortic enla

100%|██████████| 1031/1031 [27:30<00:00,  1.60s/it, loss=1.05]
  0%|          | 0/1031 [00:00<?, ?it/s]

|EPOCH 6| TRAIN_LOSS 1.047261801827428| VALID_LOSS 0.5971556752473116|


100%|██████████| 1031/1031 [27:34<00:00,  1.60s/it, loss=0.966]
100%|██████████| 8402/8402 [07:35<00:00, 18.46it/s, loss=0.607]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.315543 |    1100
Atelectasis                    | 0.034349 |      52
Calcification                  | 0.026494 |     164
Cardiomegaly                   | 0.398345 |     763
Consolidation                  | 0.056341 |     102
ILD                            | 0.075108 |     185
Infiltration                   | 0.076515 |     248
Lung Opacity                   | 0.175447 |     503
No Finding                     | 0.991072 |    2099
Nodule/Mass                    | 0.033573 |     502
Other lesion                   | 0.022625 |     393
Pleural effusion               | 0.195345 |     461
Pleural thickening             | 0.066245 |     918
Pneumothorax                   | 0.046154 |      26
Pulmonary fibrosis             | 0.079174 |     886
mAP: 0.172822
mean_ap : 0.17282192677464928
average_precisions : {'Aortic enla

100%|██████████| 1031/1031 [27:21<00:00,  1.59s/it, loss=0.893]
  0%|          | 0/1031 [00:00<?, ?it/s]

|EPOCH 8| TRAIN_LOSS 0.8932075742144358| VALID_LOSS 0.6072202308674653|


100%|██████████| 1031/1031 [27:24<00:00,  1.59s/it, loss=0.857]
100%|██████████| 8402/8402 [07:23<00:00, 18.96it/s, loss=0.597]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.340808 |    1100
Atelectasis                    | 0.033961 |      52
Calcification                  | 0.015439 |     164
Cardiomegaly                   | 0.410594 |     763
Consolidation                  | 0.058950 |     102
ILD                            | 0.062897 |     185
Infiltration                   | 0.089212 |     248
Lung Opacity                   | 0.169096 |     503
No Finding                     | 0.991192 |    2099
Nodule/Mass                    | 0.037584 |     502
Other lesion                   | 0.018841 |     393
Pleural effusion               | 0.162315 |     461
Pleural thickening             | 0.074113 |     918
Pneumothorax                   | 0.144430 |      26
Pulmonary fibrosis             | 0.084799 |     886
mAP: 0.179615
mean_ap : 0.17961539308478794
average_precisions : {'Aortic enla

100%|██████████| 1031/1031 [27:20<00:00,  1.59s/it, loss=0.804]
  0%|          | 0/1031 [00:00<?, ?it/s]

|EPOCH 10| TRAIN_LOSS 0.8041205805094929| VALID_LOSS 0.5970213085313638|


100%|██████████| 1031/1031 [27:23<00:00,  1.59s/it, loss=0.761]
100%|██████████| 8402/8402 [07:09<00:00, 19.56it/s, loss=0.541]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.355310 |    1100
Atelectasis                    | 0.077989 |      52
Calcification                  | 0.027728 |     164
Cardiomegaly                   | 0.404048 |     763
Consolidation                  | 0.066773 |     102
ILD                            | 0.087671 |     185
Infiltration                   | 0.095920 |     248
Lung Opacity                   | 0.159959 |     503
No Finding                     | 0.990638 |    2099
Nodule/Mass                    | 0.056468 |     502
Other lesion                   | 0.027257 |     393
Pleural effusion               | 0.173508 |     461
Pleural thickening             | 0.078069 |     918
Pneumothorax                   | 0.091608 |      26
Pulmonary fibrosis             | 0.128547 |     886
mAP: 0.188100
mean_ap : 0.18809952852511264
average_precisions : {'Aortic enla

100%|██████████| 1031/1031 [27:24<00:00,  1.59s/it, loss=0.721]
  0%|          | 0/1031 [00:00<?, ?it/s]

|EPOCH 12| TRAIN_LOSS 0.7210818411191613| VALID_LOSS 0.5406837738851706|


100%|██████████| 1031/1031 [27:29<00:00,  1.60s/it, loss=0.683]
100%|██████████| 8402/8402 [07:06<00:00, 19.69it/s, loss=0.564]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.302367 |    1100
Atelectasis                    | 0.035000 |      52
Calcification                  | 0.030430 |     164
Cardiomegaly                   | 0.461767 |     763
Consolidation                  | 0.069330 |     102
ILD                            | 0.102472 |     185
Infiltration                   | 0.130909 |     248
Lung Opacity                   | 0.167748 |     503
No Finding                     | 0.990695 |    2099
Nodule/Mass                    | 0.053356 |     502
Other lesion                   | 0.022997 |     393
Pleural effusion               | 0.173616 |     461
Pleural thickening             | 0.081968 |     918
Pneumothorax                   | 0.058861 |      26


  0%|          | 0/1031 [00:00<?, ?it/s]

Pulmonary fibrosis             | 0.113781 |     886
mAP: 0.186353
mean_ap : 0.1863531617615499
average_precisions : {'Aortic enlargement': (0.30236698236948156, 1100.0), 'Atelectasis': (0.035000416896779636, 52.0), 'Calcification': (0.03042971067107458, 164.0), 'Cardiomegaly': (0.461767480195517, 763.0), 'Consolidation': (0.06932994389998458, 102.0), 'ILD': (0.1024715786252545, 185.0), 'Infiltration': (0.13090876288054792, 248.0), 'Lung Opacity': (0.1677483658007046, 503.0), 'No Finding': (0.9906951127772421, 2099.0), 'Nodule/Mass': (0.0533558961311547, 502.0), 'Other lesion': (0.022997466461023627, 393.0), 'Pleural effusion': (0.1736156457146926, 461.0), 'Pleural thickening': (0.0819682516084109, 918.0), 'Pneumothorax': (0.05886098028955172, 26.0), 'Pulmonary fibrosis': (0.11378083210182757, 886.0)}
|EPOCH 13| TRAIN_LOSS 0.6828983644625148| VALID_LOSS 0.5639672694752613|


100%|██████████| 1031/1031 [27:33<00:00,  1.60s/it, loss=0.644]
  0%|          | 0/1031 [00:00<?, ?it/s]

|EPOCH 14| TRAIN_LOSS 0.644037133160433| VALID_LOSS 0.5639672694752613|


100%|██████████| 1031/1031 [27:31<00:00,  1.60s/it, loss=0.61]
100%|██████████| 8402/8402 [07:05<00:00, 19.77it/s, loss=0.577]


Number of files in annotations: 3000
Number of files in predictions: 3000
Unique classes: 15
Detections length: 3000
Annotations length: 3000
Aortic enlargement             | 0.345812 |    1100
Atelectasis                    | 0.067731 |      52
Calcification                  | 0.045830 |     164
Cardiomegaly                   | 0.422661 |     763
Consolidation                  | 0.105194 |     102
ILD                            | 0.098248 |     185
Infiltration                   | 0.145842 |     248
Lung Opacity                   | 0.188113 |     503
No Finding                     | 0.984940 |    2099
Nodule/Mass                    | 0.047277 |     502
Other lesion                   | 0.018440 |     393
Pleural effusion               | 0.202658 |     461
Pleural thickening             | 0.083993 |     918
Pneumothorax                   | 0.099135 |      26
Pulmonary fibrosis             | 0.109607 |     886
mAP: 0.197699
mean_ap : 0.1976985478719129
average_precisions : {'Aortic enlar

100%|██████████| 1031/1031 [27:23<00:00,  1.59s/it, loss=0.576]

|EPOCH 16| TRAIN_LOSS 0.575671928265393| VALID_LOSS 0.5772378597930073|





## 8. Prediction

### 8.1 Load model

After training the model, comment the line "model = run(fold=0)", and load the model to run the predictions in the test set 

In [22]:
def load_model():
    ## Loading a model
    num_classes = 15
    num_queries = 2
    model = DETRModel(num_classes=num_classes,num_queries=num_queries)
    model.load_state_dict(torch.load("../input/detr-model/detr_model.pth", map_location=torch.device('cpu')))
    return model

### 8.2 Plotting results
Plotting expected and predicted boxes with labels


In [23]:
# COCO classes
CLASSES = [
    'Aortic enlargement', 'Atelectasis', 'Calcification', 'Cardiomegaly', 'Consolidation',
    'ILD', 'Infiltration', 'Lung Opacity', 'Nodule/Mass', 'Other lesion', 
    'Pleural effusion', 'Pleural thickening', 'Pneumothorax', 'Pulmonary fibrosis', 'No Finding'
]

# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

In [24]:
# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

In [25]:
def view_sample_check_MAP(df_valid,model,device, index_to_show):
    map_df = pd.DataFrame()
    map_df_target = pd.DataFrame()    
    
    valid_dataset = VinDataset(
    image_ids=df_valid.index.values,
    dataframe=df_valid,
    transforms=get_valid_transforms()
    )
    
    valid_data_loader = DataLoader(valid_dataset,
                                   batch_size=164,
                                   shuffle=False,
                                   num_workers=4,
                                   collate_fn=collate_fn)
    
    images, targets, image_ids = next(iter(valid_data_loader))
    #print("targets[index_to_show] : {}".format(targets[index_to_show]))
    _,h,w = images[index_to_show].shape # for de normalizing images
    print("h,w  : {}".format(h,w))
    print("targets[index_to_show]['labels']  : {}".format(targets[index_to_show]['labels']))
    images = list(img.to(device) for img in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    annotated_boxes = targets[index_to_show]['boxes'].cpu().numpy()
    print("Anottated boxes.shape AFTER picking ""index_to_show .shape""  : {}".format(annotated_boxes.shape))
    print("Anottated boxes[0] AFTER picking ""index_to_show""  : {}".format(annotated_boxes[0]))
    annotated_boxes = [np.array(box).astype(np.int32) for box in A.augmentations.bbox_utils.denormalize_bboxes(annotated_boxes,h,w)]
    print("denormalize_bboxes Anottated boxes[0] AFTER picking ""index_to_show"" (in coco) : {}".format(annotated_boxes[0]))    
    #annotated_boxes = rescale_bboxes(annotated_boxes[index_to_show], (512,512))         
    
    # MAP targets
    for count, label in enumerate(targets[index_to_show]['labels']):
        print("label : {}".format(label))
        text = f'{CLASSES[label]}' 
        print("text : {}".format(text))
        xmin = targets[index_to_show]['boxes'][count][0] - (targets[index_to_show]['boxes'][count][2])/2
        xmax = targets[index_to_show]['boxes'][count][0] + (targets[index_to_show]['boxes'][count][2])/2  
        ymin = targets[index_to_show]['boxes'][count][1] - (targets[index_to_show]['boxes'][count][3])/2
        ymax = targets[index_to_show]['boxes'][count][1] + (targets[index_to_show]['boxes'][count][3])/2

        data = pd.DataFrame({"ImageID": [image_ids[0]],"LabelName": [text],
        "XMin": [xmin.item()], "XMax": [xmax.item()], "YMin": [ymin.item()], "YMax": [ymax.item()]})
        map_df_target = map_df_target.append(data)     
    
    model.eval()
    model.to(device)
    cpu_device = torch.device("cpu")
    
    with torch.no_grad():
        outputs = model(images)   

    # keep only predictions with 0.7+ confidence
    print("outputs['pred_logits'].shape : {}".format(outputs['pred_logits'].shape))
    print("outputs['pred_logits'].softmax(-1).shape : {}".format(outputs['pred_logits'].softmax(-1).shape))
    print("outputs['pred_logits'].softmax(-1)[0, :, :-1].shape : {}".format(outputs['pred_logits'].softmax(-1)[0, :, :-1].shape))
    probas = outputs['pred_logits'].softmax(-1)[index_to_show, :, :-1]
    print("probas.shape : {}".format(probas.shape))
    keep = probas.max(-1).values > 0.08
    print("keep : {}".format(keep))
    # convert boxes from [0; 1] to image scales
    
    print("outputs['pred_boxes'].shape : {}".format(outputs['pred_boxes'].shape))
    #print("outputs['pred_boxes'][index_to_show]: {}".format(outputs['pred_boxes'][index_to_show]))
    
    boxes = rescale_bboxes(outputs['pred_boxes'][index_to_show, keep], (512,512))
    print("Predicted boxes.shape AFTER picking ""index_to_show""  : {}".format(boxes.shape))
    #print("Predicted boxes[0] AFTER picking ""index_to_show"" (in pascal) : {}".format(boxes[0]))
    prob = probas[keep]
    #return probas[keep],bboxes_scaled,image_ids

    string_from_image = f"../input/vinbigdata-512-image-dataset/vinbigdata/train/{image_ids[index_to_show]}.png"
    
    im = Image.open(string_from_image)
    pil_img = im.convert('RGB')
    pil_img.save('colors.jpg')
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    colors = COLORS * 100
    #print("prob : {}".format(prob))
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, color=c, linewidth=3))
        print("xmin : {}".format(xmin))
        print("ymin : {}".format(ymin))
        print("xmax : {}".format(xmax))
        print("ymax : {}".format(ymax))
        
        cl = p.argmax()
        print("cl : {}".format(cl))
        text = f'{CLASSES[cl]}: {p[cl]:0.2f}'
        text_df = f'{CLASSES[cl]}'
        
        # Dataframe for MAP
        data = pd.DataFrame({"ImageID": [image_ids[0]],"LabelName": [text_df], "Conf": [p[cl].item()], "XMin": [xmin/512], "XMax": [xmax/512], "YMin": [ymin/512], "YMax": [ymax/512]})
        map_df = map_df.append(data)
        ax.text(xmin, ymin, text, fontsize=15,
                bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.show()

# Plot the onces annotated by the doctors
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    colors = COLORS * 100
    #print("prob : {}".format(prob))
    for p, (x0, x1, x2, x3), c in zip(targets[index_to_show]['labels'], annotated_boxes, colors):
        ax.add_patch(plt.Rectangle((x0-x2/2, x1-x3/2), x2, x3,
                                   fill=False, color=c, linewidth=3))
        #print("x0 : {}".format(x0))
        #print("x1 : {}".format(x1))
        #print("x2 : {}".format(x2))
        #print("x3 : {}".format(x3))
        #print("annotated_boxes : {}".format(annotated_boxes))
        cl = p
        print("cl : {}".format(cl))
        text = f'{CLASSES[cl]}'
        ax.text(x0, x1, text, fontsize=15,
                bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.show()
    
    print("map_df_target : {}".format(map_df_target))
    print("map_df : {}".format(map_df))
    
    ann = map_df_target[['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']].values
    det = map_df[['ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax']].values
    mean_ap, average_precisions = mean_average_precision_for_boxes(ann, det)

    print("mean_ap : {}".format(mean_ap))
    print("average_precisions : {}".format(average_precisions))

    return

In [26]:
def visualize_results():
    train_df = preprocessing()
    model = load_model()
    view_sample_check_MAP(train_df[train_df['kfold'] == 0],model=model,device=torch.device('cpu'),index_to_show=10)
    return
# uncomment if you want to visualize the training results 
#visualize_results()

### 8.3 Submission


In [27]:
DIR_TEST = f'../input/vinbigdata-512-image-dataset/vinbigdata/test'
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

class VinDataset_for_test(Dataset):
    def __init__(self,image_ids,dataframe,transforms=None):
        self.image_ids = image_ids
        self.df = dataframe
        self.transforms = transforms
    
    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def __getitem__(self,index):
        image_id = self.image_ids[index]
        records = self.df.loc[image_id]
        labels = records['class_id']
        image = cv2.imread(f'{DIR_TEST}/{image_id}.png', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
 
        # DETR takes in data in coco format    
        boxes = records[['coco_x', 'coco_y', 'coco_w', 'coco_h']].values

        #print("boxes : {}".format(boxes))
     
        # AS pointed out by PRVI It works better if the main class is labelled as zero
        labels =  np.array(labels)
    
        if boxes.ndim == 1 : 
            boxes = np.expand_dims(boxes, axis=0)
            labels = np.expand_dims(labels, axis=0)
        
        # AS pointed out by PRVI It works better if the main class is labelled as zero
        labels =  np.array(labels)
        
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': boxes,
                'labels': labels
            }      
        
        sample = self.transforms(**sample)
        image = sample['image']
        boxes = sample['bboxes']
        labels = sample['labels']
   
        # Normalizing BBOXES
        _,h,w = image.shape
        boxes = A.augmentations.bbox_utils.normalize_bboxes(sample['bboxes'],rows=h,cols=w)
        #print("boxes after normalization : {}".format(boxes))  
        target = {}
        target['boxes'] = torch.as_tensor(boxes,dtype=torch.float32)
        target['labels'] = torch.as_tensor(labels,dtype=torch.long)
        target['image_id'] = torch.tensor([index])
        #print("image_id : {}".format(image_id))
        
        return image/255, target, image_id

In [28]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)

    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array

    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    data = np.stack([data]*3).transpose(1,2,0)
    return data

In [29]:
def predictions(model):
    DIR_INPUT = '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection'
    test_df = pd.read_csv(f'{DIR_INPUT}/sample_submission.csv')
    test_df['coco_x']=[1 for i in range(3000)]
    test_df['coco_y']=[1 for i in range(3000)]
    test_df['coco_w']=[1 for i in range(3000)]
    test_df['coco_h']=[1 for i in range(3000)]
    test_df['class_id']=[1 for i in range(3000)]
    test_df['kflod']=[88 for i in range(3000)]
    test_df['class_confidence_box'] = [[] for i in range(3000)]
    test_df.set_index('image_id', inplace=True)
    valid_dataset = VinDataset_for_test(image_ids=test_df.index.values,
                                        dataframe=test_df,
                                        transforms=get_valid_transforms()
                                        )

    valid_data_loader = DataLoader(valid_dataset,
                                   batch_size=1,
                                   shuffle=False,
                                   num_workers=4,
                                   collate_fn=collate_fn)

    dataloader_iterator = iter(valid_data_loader)

    for i in range(test_df.shape[0]):
        images, targets, image_ids = next(dataloader_iterator)
        _,h,w = images[0].shape # for de normalizing images
       
        #print("image_ids[0] : {}".format(image_ids[0]))
        device=torch.device('cpu')
        images = list(img.to(device) for img in images)
        model.eval()
        model.to(device)
        cpu_device = torch.device("cpu")

        with torch.no_grad():
            outputs = model(images) 
        #print("outputs : {}".format(outputs))
        # keep only predictions with 0.7+ confidence
        #print("outputs['pred_logits'].shape : {}".format(outputs['pred_logits'].shape))
        #print("outputs['pred_logits'].softmax(-1) : {}".format(outputs['pred_logits'].softmax(-1)))
        #print("outputs['pred_logits'].softmax(-1)[0, :, :-1].shape : {}".format(outputs['pred_logits'].softmax(-1)[0, :, :-1].shape))
        probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
        keep = probas.max(-1).values > 0.08
        #print("keep : {}".format(keep))
        # convert boxes from [0; 1] to image scales

        #print("outputs['pred_boxes'].shape : {}".format(outputs['pred_boxes'].shape))

        boxes = rescale_bboxes(outputs['pred_boxes'][0, keep], (512,512))
        ####print("boxes : {}".format(boxes))
        #print("Predicted boxes.shape AFTER picking ""0""  : {}".format(boxes.shape))
        #print("Predicted boxes shape (in pascal) : {}".format(boxes.shape))
        #print("Predicted boxes[0] ""0"" (in pascal) : {}".format(boxes[0]))
        prob = probas[keep]
        for i in range(prob.shape[0]) :
            class_pred = prob[i].argmax()
            #print("prob[i].max() : {}".format(prob[i].max()))
            #print("class_pred : {}".format(class_pred))

        # Read the dicom with id "image_ids[0]" to get the actual size
        image_dicom = read_xray(f'../input/vinbigdata-chest-xray-abnormalities-detection/test/{image_ids[0]}.dicom')
        dicom_y = image_dicom.shape[0]
        dicom_x = image_dicom.shape[1]
        ####print("image_dicom.shape : {}".format(image_dicom.shape))        
        ####print("dicom_x : {}".format(dicom_x))   
        ####print("dicom_y : {}".format(dicom_y))   

        #print("prob : {}".format(prob))
        for i in range(prob.shape[0]) :
            class_pred = prob[i].argmax()
            box_list = boxes[i].tolist()
            ####print("box_list : {}".format(box_list))

            # Rescale the box based on the actual size
            box_list[0] = (dicom_x/512) *  box_list[0]
            box_list[2] = (dicom_x/512) *  box_list[2]

            box_list[1] = (dicom_y/512) *  box_list[1]
            box_list[3] = (dicom_y/512) *  box_list[3]
            if class_pred != 14:
                boxz_string= ' '.join(str(e) for e in box_list)
                test_df.loc[image_ids]['class_confidence_box'].append([str(class_pred.numpy()), str(prob[i].max().numpy()),boxz_string])
            else:
                boxz_string= '0 0 1 1'
                test_df.loc[image_ids]['class_confidence_box'].append([str(class_pred.numpy()), str(prob[i].max().numpy()),boxz_string])

        list_of_results = test_df.loc[image_ids]['class_confidence_box']
        list_of_results = [' '.join(im) for im in list_of_results]
    #    print("list_of_results : {}".format(list_of_results))    

        #Assign the final string to "PredictionString"
        if not list_of_results:
            pass
        else:
            test_df.loc[image_ids,'PredictionString'] =' '.join(list_of_results)

    #print("test_df.loc[image_ids]['class_confidence_box'] : {}".format(test_df.loc[image_ids]['class_confidence_box']))
    test_df.drop(['coco_x', 'coco_y','coco_w', 'coco_h','class_id', 'kflod', 'class_confidence_box'], axis=1, inplace=True)
    test_df['image_id'] = test_df.index
    test_df = test_df[['image_id','PredictionString']]
    test_df.to_csv('submission.csv', index=False)
    return 

In [30]:
def prediction_model():
    print("to load model")
    model = load_model()
    print("model loaded")
    predictions(model)
    return

In [31]:
if mode == 'predict':
    prediction_model()