# [IAPR 2019:][iapr2019] Special project

**Group members:**
    1- first name and last name,
    2- first name and last name,
    3- first name and last name

**Due date:** 30.05.2019

[iapr2019]: https://github.com/LTS5/iapr-2019


## Description
Please find the description of this special project via [this link].

[this link]: https://github.com/LTS5/iapr-2019/blob/master/project/special_project_description.pdf

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import os
from PIL import Image, ImageDraw
import math
import cv2 
import xml.etree.ElementTree as ET

In [2]:
src_path_train = './project-data/images/train/'
xml_path_train="./project-data/annotations/train/"
prepared_data_path="./project-data/images/train/prepared_data/"
img_list_dir = os.listdir(src_path_train)
img_list = [names for names in img_list_dir if names.endswith(".jpg")]
img_list_size = len(img_list)

src_path_test = './project-data/images/test/'
xml_path_test="./project-data/annotations/test/"
prepared_data_path_test="./project-data/images/test/prepared_data/"
img_list_dir_test = os.listdir(src_path_test)
img_list_test = [names for names in img_list_dir_test  if names.endswith(".jpg")]
img_list_size_test = len(img_list_test)

src_path_validation = './project-data/images/validation/'
xml_path_validation="./project-data/annotations/validation/"
prepared_data_path_validation="./project-data/images/validation/prepared_data/"
img_list_dir_validation = os.listdir(src_path_validation)
img_list_validation = [names for names in img_list_dir_validation  if names.endswith(".jpg")]
img_list_size_validation = len(img_list_validation)
print("train image size {} , test image size {}, validation image size {}".format(img_list_size,img_list_size_test,img_list_size_validation))

train image size 800 , test image size 50, validation image size 150


In [3]:
def parse_file(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymin').text)),
                              int(float(bbox.find('xmax').text))-int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymax').text))-int(float(bbox.find('ymin').text))]
        objects.append(obj_struct)
    return objects

In [7]:
annotations_xmls = [parse_file(os.path.join(xml_path_train, name[:-4]) + '.xml') for name in img_list]
annotations_xmls_test=[parse_file(os.path.join(xml_path_test, name[:-4]) + '.xml') for name in img_list_test]
annotations_xmls_validation=[parse_file(os.path.join(xml_path_validation, name[:-4]) + '.xml') for name in img_list_validation]

In [8]:
def calculate_IOU(bbox1, bbox2):
    #Calculate overlap between two bounding boxes [x, y, w, h] as the area of intersection over the area of unity
    
    #determine the (x, y)-coordinates of the intersection rectangle
    x1, y1, w1, h1 = bbox1[0], bbox1[1], bbox1[2], bbox1[3]
    x2, y2, w2, h2 = bbox2[0], bbox2[1], bbox2[2], bbox2[3]
    
    
    xA = max(x1, x2)
    yA = max(y1, y2)
    xB = min(x1+w1, x2+w2)
    yB = min(y1+h1, y2+h2)
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA ) * max(0, yB - yA )
    
    # compute the area of both the prediction and ground-truth
    boxAArea = (w1 ) * (h1 )
    boxBArea = (w2 ) * (h2 )

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    return iou

In [9]:
def split_and_label(prepared_data_path,img,img_name,annotations): 
    #Mirror padding
    w,h,d = img.shape 
    w_i=int(w/32)+1
    h_i=int(h/32)+1
    new_w=w_i*32
    new_h=h_i*32
    count=0
    new_img=img
    l=0
    for i in range(w_i): 
        for j in range(h_i): 
            count+=1 
            name=img_name+"_"+str(count)
            sub_img=new_img[0:64,0:64,:]
            label=0
            for box in annotations:
                if (box[0]>=i*32  and box[0]< i*32+64 and box[1]>=j*32 and  box[1]< j*32+64) :
                    if ( box[0]+box[2]<i*32+64 and box[1]+box[3]<j*32+64):
                        l=l+1
                        label=1
                """"
                iou=calculate_IOU(box,[i*32,j*32,64,64])
                if (iou>0.5): 
                    label=1 
                    j=j+1
                    break
                """
            name+="_"+str(label)
            im=Image.fromarray(sub_img)
            b, g, r = im.split()
            im= Image.merge("RGB", (r, g, b))
            im.save(prepared_data_path+name+".jpg")         
            new_img=np.roll(new_img,-32,axis=1)
        new_img=np.roll(new_img,-32,axis=0)
    return l 

In [10]:
ground_truth_1=[]
for j in range(len(annotations_xmls)):
    for i in range(len(annotations_xmls[j])): 
        ground_truth_1.append(annotations_xmls[j][i]['bbox'][2:3])
max_box_size=max(ground_truth_1)
ground_truth_1=[]

for j in range(len(annotations_xmls_test)):
    for i in range(len(annotations_xmls_test[j])): 
        ground_truth_1.append(annotations_xmls_test[j][i]['bbox'][2:3])
max_box_size_test=max(ground_truth_1)

for j in range(len(annotations_xmls_validation)):
    for i in range(len(annotations_xmls_validation[j])): 
        ground_truth_1.append(annotations_xmls_validation[j][i]['bbox'][2:3])
max_box_size_validation=max(ground_truth_1)
print("train max box size {} , test max box size {}, validation max box size {}".format(max_box_size,max_box_size_test,max_box_size_validation))

train max box size [56] , test max box size [52], validation max box size [52]


In [11]:
import random
def split_and_label_v2(prepared_data_path,max_box_size,img,img_name,annotations): 
    count=0
    width,height=img.shape[0],img.shape[1]
    bbox=[]
    for box in annotations: 
        name=img_name+"_"+str(count)+"_1"

        xmin,ymin,w,h=box[0],box[1],box[2],box[3] 
        max_size=int(max_box_size[0]/2)
        new_img_=img[max(0,ymin-10):min(ymin+max_size+10,width),max(0,xmin-10):min(xmin+max_size+10,height),:]
        im=Image.fromarray(new_img_)
        bbox.append([10,10,box[2],box[3],1])
        b, g, r = im.split()
        im= Image.merge("RGB", (r, g, b))
        im.save(prepared_data_path+name+".jpg") 
        name=img_name+"_"+str(count)+"_0"
        if (xmin-56)>0 and (ymin-56)>0:
            x1 = random.randint(0, xmin-56)
            y1 = random.randint(0, ymin-56)
        else :
            x1 = random.randint(0, min(xmin+56,height))
            y1 = random.randint(0, min(ymin+56,width))
        new_img=img[y1:min(y1+48,width),x1:min(height,x1+48),:]
        bbox.append([0,0,0,0,0])
        im=Image.fromarray(new_img)
        b, g, r = im.split()
        im= Image.merge("RGB", (r, g, b))
        im.save(prepared_data_path+name+".jpg") 
        count+=1
    return bbox

In [12]:
def make_dataset(img_list,src_path,max_box_size,prepared_data_path,annotations_xmls):
    bbox=[]
    for j in range(len(img_list)):
        name=img_list[j]
        img = cv2.imread(src_path+name)
        ground_truth_1=[]
        for i in range(len(annotations_xmls[j])): 
            ground_truth_1.append(annotations_xmls[j][i]['bbox'])
        ground_truth_1
        bbox.append(split_and_label_v2(prepared_data_path,max_box_size,img,name[:-4],ground_truth_1))
    return bbox

In [13]:
print(len(annotations_xmls_test))

50


In [14]:
bbox=make_dataset(img_list,src_path_train,max_box_size,prepared_data_path,annotations_xmls)
bbox_test=make_dataset(img_list_test,src_path_test,max_box_size_test,prepared_data_path_test,annotations_xmls_test)
bbox_validation=make_dataset(img_list_validation,src_path_validation,max_box_size_validation,prepared_data_path_validation,annotations_xmls_validation)

In [15]:
img_list_dir_prep = os.listdir(prepared_data_path)
number_label_non_varroa_train=0
number_label_varroa_train=0
for prep_name in img_list_dir_prep: 
    if prep_name.endswith('_1.jpg'):
        number_label_varroa_train+=1
    if prep_name.endswith('_0.jpg'):
        number_label_non_varroa_train+=1
print("FOR TRAINING")
print("number of images with label =1 (varroas) is ",number_label_non_varroa_train)
print("number of images with label =0 (non varroas) is ",number_label_varroa_train)
print("Total number of files = ",number_label_varroa_train+number_label_non_varroa_train)    

img_list_dir_prep_test = os.listdir(prepared_data_path_test)
number_label_non_varroa_test=0
number_label_varroa_test=0
for prep_name in img_list_dir_prep_test: 
    if prep_name.endswith('_1.jpg'):
        number_label_varroa_test+=1
    if prep_name.endswith('_0.jpg'):
        number_label_non_varroa_test+=1
print("FOR TESTING")
print("number of images with label =1 (varroas) is ",number_label_non_varroa_test)
print("number of images with label =0 (non varroas) is ",number_label_varroa_test)
print("Total number of files = ",number_label_varroa_test+number_label_non_varroa_test)

img_list_dir_prep_validation = os.listdir(prepared_data_path_validation)
number_label_non_varroa_val=0
number_label_varroa_val=0
for prep_name in img_list_dir_prep_validation: 
    if prep_name.endswith('_1.jpg'):
        number_label_varroa_val+=1
    if prep_name.endswith('_0.jpg'):
        number_label_non_varroa_val+=1
print("FOR VALIDATION")
print("number of images with label =1 (varroas) is ",number_label_non_varroa_val)
print("number of images with label =0 (non varroas) is ",number_label_varroa_val)
print("Total number of files = ",number_label_varroa_val+number_label_non_varroa_val)

FOR TRAINING
number of images with label =1 (varroas) is  7483
number of images with label =0 (non varroas) is  7483
Total number of files =  14966
FOR TESTING
number of images with label =1 (varroas) is  582
number of images with label =0 (non varroas) is  582
Total number of files =  1164
FOR VALIDATION
number of images with label =1 (varroas) is  1539
number of images with label =0 (non varroas) is  1539
Total number of files =  3078


In [19]:
def resize(img_list_dir_prep,prepared_data_path):
    img_list_prep = [names for names in img_list_dir_prep if names.endswith(".jpg")]
    sizes = [Image.open(prepared_data_path+f, 'r').size for f in img_list_prep]
    max_width,max_height=max(sizes)
    for item in img_list_dir_prep:
        if os.path.isfile(prepared_data_path+item):
            im = Image.open(prepared_data_path+item)
            f, e = os.path.splitext(prepared_data_path+item)
            imResize = im.resize((48,48), Image.ANTIALIAS)
            imResize.save(f + '.jpg', 'JPEG', quality=90)

In [20]:
resize(img_list_dir_prep,prepared_data_path)
resize(img_list_dir_prep_test,prepared_data_path_test)
resize(img_list_dir_prep_validation,prepared_data_path_validation)

In [21]:
img_list_prep_train = [names for names in img_list_dir_prep if names.endswith(".jpg")]
img_list_prep_test = [names for names in img_list_dir_prep_test if names.endswith(".jpg")]
img_list_prep_validation = [names for names in img_list_dir_prep_validation if names.endswith(".jpg")]

In [23]:
num_imgs_train=number_label_varroa_train+number_label_non_varroa_train
num_imgs_test=number_label_varroa_test+number_label_non_varroa_test
num_imgs_validation=number_label_varroa_val+number_label_non_varroa_val

X_train=np.zeros((num_imgs_train,48,48,3))
for i in range(len(img_list_prep_train)):
    name=img_list_prep_train[i]
    img = cv2.imread(prepared_data_path+name)
    X_train[i,:,:,:]=img

X_test=np.zeros((num_imgs_test,48,48,3))
for i in range(len(img_list_prep_test)):
    name=img_list_prep_test[i]
    img = cv2.imread(prepared_data_path_test+name)
    X_test[i,:,:,:]=img

X_val=np.zeros((num_imgs_validation,48,48,3))
for i in range(len(img_list_prep_validation)):
    name=img_list_prep_validation[i]
    img = cv2.imread(prepared_data_path_validation+name)
    X_val[i,:,:,:]=img


In [24]:
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

(14966, 48, 48, 3)
(1164, 48, 48, 3)
(3078, 48, 48, 3)


In [25]:
count=0
bbox_train_=np.zeros((X_train.shape[0],5))
for image_box in bbox: 
    for box in image_box:
        bbox_train_[count,0]=box[0]
        bbox_train_[count,1]=box[1]
        bbox_train_[count,2]=box[2]
        bbox_train_[count,3]=box[3]
        bbox_train_[count,4]=box[4]
        count+=1  

count=0
bbox_test_=np.zeros((X_test.shape[0],5))
for image_box in bbox_test: 
    for box in image_box:
        bbox_test_[count,0]=box[0]
        bbox_test_[count,1]=box[1]
        bbox_test_[count,2]=box[2]
        bbox_test_[count,3]=box[3]
        bbox_test_[count,4]=box[4]
        count+=1  

count=0
bbox_validation_=np.zeros((X_val.shape[0],5))
for image_box in bbox_validation: 
    for box in image_box:
        bbox_validation_[count,0]=box[0]
        bbox_validation_[count,1]=box[1]
        bbox_validation_[count,2]=box[2]
        bbox_validation_[count,3]=box[3]
        bbox_validation_[count,4]=box[4]
        count+=1  

In [26]:
Y_train=bbox_train_
Y_test=bbox_test_
Y_val=bbox_validation_

In [27]:
batch_size = 50
num_epoch = 10
learning_rate = 0.0001

In [28]:
# define network architecture
import torch
import torch.nn.functional as F

class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output, n_c):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
        self.box = torch.nn.Linear(n_hidden, n_output-1)   # output layer
        self.logit = torch.nn.Linear(n_hidden, 1)
        
        self.conv1 = torch.nn.Sequential(         # 
            torch.nn.Conv2d(
                in_channels = n_c,            # input height
                out_channels = 8,             # n_filters
                kernel_size = 2,              # filter size
                stride = 2,                   # filter movement/step
                padding = 0,                  
            ),                              
            torch.nn.ReLU(),                      # activation
            #torch.nn.MaxPool2d(kernel_size = 2),    
        )
        self.conv2 = torch.nn.Sequential(       
            torch.nn.Conv2d(in_channels = 8, 
                            out_channels = 16, 
                            kernel_size = 2, 
                            stride = 2, 
                            padding = 0),      
            torch.nn.ReLU(),                      # activation
            #torch.nn.MaxPool2d(2),                
        )
        
        self.conv3 = torch.nn.Sequential(       
            torch.nn.Conv2d(in_channels = 16, 
                            out_channels = 8, 
                            kernel_size = 1, 
                            stride = 1, 
                            padding = 0),      
            torch.nn.ReLU(),                      # activation
            #torch.nn.MaxPool2d(2),                
        )
    def forward(self, x):
        feat = self.conv1(x)
        feat = self.conv2(feat)
       
        feat = self.conv3(feat)
        feat = feat.view(feat.size(0), -1)
        
        x2 = F.relu(self.hidden(feat))      # activation function for hidden layer
       
        out_box = F.relu(self.box(x2))            # linear output
        out_logit = torch.sigmoid(self.logit(x2))
        
        return out_box, out_logit
      
net = Net(n_feature = 1152, n_hidden = 256, n_output = 5, n_c = 3)     # define the network
print(net)  # net architecture


Net(
  (hidden): Linear(in_features=1152, out_features=256, bias=True)
  (box): Linear(in_features=256, out_features=4, bias=True)
  (logit): Linear(in_features=256, out_features=1, bias=True)
  (conv1): Sequential(
    (0): Conv2d(3, 8, kernel_size=(2, 2), stride=(2, 2))
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(2, 2), stride=(2, 2))
    (1): ReLU()
  )
  (conv3): Sequential(
    (0): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU()
  )
)


In [29]:
regression_criterion =  torch.nn.MSELoss()
classification_criterion =  torch.nn.BCELoss()# Hint: Consider that we only one class to predict
gamma =0.9

In [30]:
# Instanciate the network and define the optimizer
num_channels=3
net = Net(n_feature = 1152, n_hidden = 256, n_output = 5, n_c = 3)
optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
if(X_train.shape[1]!=num_channels): #dim1==channel
    X_train = X_train.transpose((0,3,1,2))
n_batch = X_train.shape[0]//batch_size

for epoch in range(num_epoch):
    for batch in range(n_batch):
        batch_X = X_train[batch*batch_size:min((batch+1)*batch_size, X_train.shape[0])]
        batch_y = Y_train[batch*batch_size:min((batch+1)*batch_size, X_train.shape[0])]
        out_box, out_logit = net(torch.tensor(batch_X, dtype=torch.float32))
        
        mask_arr = np.argwhere(batch_y[:,-1]==1).reshape((-1,))
        regression_loss = regression_criterion(out_box[mask_arr], torch.tensor(batch_y[mask_arr,:-1], dtype=torch.float32))
        classification_loss = classification_criterion(out_logit, torch.tensor(batch_y[:,-1:], dtype=torch.float32))
        
        # Compose the 2 loss functions using the weight gamma (1 line)
        loss = gamma*(regression_loss)+(1-gamma)*classification_loss 
        
        optimizer.zero_grad()   # clear gradients for next train
        loss.backward()         # backpropagation, compute gradients
        optimizer.step()        # apply gradients
    
    print('epoch: {}  Total loss -> {:.5f}   classif_loss -> {:.5f}  regress_loss -> {:.5f}'
          .format(epoch, loss.item(), classification_loss.item(), regression_loss.item()))

epoch: 0  Total loss -> 13.25483   classif_loss -> 0.45155  regress_loss -> 14.67742
epoch: 1  Total loss -> 9.41509   classif_loss -> 0.37795  regress_loss -> 10.41922
epoch: 2  Total loss -> 10.13451   classif_loss -> 0.32755  regress_loss -> 11.22417
epoch: 3  Total loss -> 10.67117   classif_loss -> 0.28011  regress_loss -> 11.82574
epoch: 4  Total loss -> 10.99352   classif_loss -> 0.24921  regress_loss -> 12.18733
epoch: 5  Total loss -> 11.17108   classif_loss -> 0.23658  regress_loss -> 12.38602
epoch: 6  Total loss -> 11.76861   classif_loss -> 0.24594  regress_loss -> 13.04890
epoch: 7  Total loss -> 11.65050   classif_loss -> 0.21178  regress_loss -> 12.92147
epoch: 8  Total loss -> 11.39344   classif_loss -> 0.22867  regress_loss -> 12.63397
epoch: 9  Total loss -> 11.22220   classif_loss -> 0.18945  regress_loss -> 12.44806


In [31]:
def calculate_accuracy(pred_class, pred_bboxes, test_bboxes):
    # Calculate the mean IOU (overlap) between the predicted and expected bounding boxes on the test dataset. 
    summed_IOU = 0.
    l =0
    for pred_bbox, test_bbox in zip(pred_bboxes.reshape(-1, 4), test_bboxes.reshape(-1, 5)):
        if(test_bbox[4]==1): # the ones that have black boxes
            summed_IOU += calculate_IOU(pred_bbox, test_bbox)
            l+=1
    mean_IOU = summed_IOU / l
    print("mean IOU: ", mean_IOU)

    # classification accuracy
    print("classification acc: ", np.mean(test_bboxes[:,4:]==pred_class))

In [32]:
if(X_train.shape[1]!=num_channels):
    X_train = train_X.transpose((0,3,1,2))
# Predict bounding boxes on the train images.
with torch.no_grad():
    pred_y_train_box, pred_y_train_logit = net.forward(torch.tensor(X_train, dtype=torch.float32))
    pred_y_train_box, pred_y_train_logit = pred_y_train_box.numpy(), pred_y_train_logit.numpy()
    pred_y_train_label = pred_y_train_logit>0.5
    pred_bboxes_train = pred_y_train_box
    pred_bboxes_train = pred_bboxes_train.reshape(len(pred_bboxes_train), 1, -1)
    pred_bboxes_train.shape

calculate_accuracy(pred_y_train_label, pred_bboxes_train, Y_train)

mean IOU:  0.6023324235574279
classification acc:  0.9061205398904183


In [33]:
if(X_test.shape[1]!=num_channels):
    X_test = X_test.transpose((0,3,1,2))
# Predict bounding boxes on the train images.
with torch.no_grad():
    pred_y_test_box, pred_y_test_logit = net.forward(torch.tensor(X_test, dtype=torch.float32))
    pred_y_test_box, pred_y_test_logit = pred_y_test_box.numpy(), pred_y_test_logit.numpy()
    pred_y_test_label = pred_y_test_logit>0.5
    pred_bboxes_test = pred_y_test_box
    pred_bboxes_test = pred_bboxes_test.reshape(len(pred_bboxes_test), 1, -1)
    pred_bboxes_test.shape

calculate_accuracy(pred_y_test_label, pred_bboxes_test, Y_test)

mean IOU:  0.6020962485741526
classification acc:  0.8530927835051546


In [34]:
if(X_val.shape[1]!=num_channels):
    X_val = X_val.transpose((0,3,1,2))
# Predict bounding boxes on the train images.
with torch.no_grad():
    pred_y_val_box, pred_y_val_logit = net.forward(torch.tensor(X_val, dtype=torch.float32))
    pred_y_val_box, pred_y_val_logit = pred_y_val_box.numpy(), pred_y_val_logit.numpy()
    pred_y_val_label = pred_y_val_logit>0.5
    pred_bboxes_val = pred_y_val_box
    pred_bboxes_val = pred_bboxes_val.reshape(len(pred_bboxes_val), 1, -1)
    pred_bboxes_val.shape

calculate_accuracy(pred_y_val_label, pred_bboxes_val, Y_val)

mean IOU:  0.586792329312264
classification acc:  0.9252761533463287


## Part 1: Finding varroas by segmentation
Add your implementation for ''**detect_by_segmentation**'' function. Please make sure the input and output follows the mentioned format.

In [1]:
def detect_by_segmentation(img):
    '''
    Input: One single image
    Output: A numpy array containing coordonates of all detected varroas, with the following format: 
            [[x_1, y_1, w_1, h_2], [x_2, y_2, w_1, h_2], ..., [x_n, y_n, w_n, h_n]] 
            where ''n'' is the number of detected varroas.
    '''

    #Your code

Add your implementation. Report the Precision, Recall and F1-score, by using all 50 images of the test-set, and considering 0.3 as the IoU threshold.

In [2]:
#Your code

## Part 2: Implement your first detector

Write your function(s) for the second part. Feel free to change the name of the function and add your additional functions, but please make sure their input and output follows the mentioned format.

In [3]:
def detect_by_method_1(img):
    '''
    Input: One single image
    Output: A numpy array containing coordonates of all detected varroas, with the following format: 
            [[x_1, y_1, w_1, h_2], [x_2, y_2, w_1, h_2], ..., [x_n, y_n, w_n, h_n]] 
            where ''n'' is the number of detected varroas.
    '''

    #Your code

Add your implementation. Report the Precision, Recall and F1-score, by using all 50 images of the test-set, and considering 0.3 as the IoU threshold.

In [4]:
#Your code

## Part 3: Using MLP and CNNs

Add your implementation for the thrid part. Feel free to add your desirable functions, but please make sure you have proper functions for the final detection, where their input and output follows the same format as the previous parts.

## Challenge

You can generate a json submission file by using the function ''**generate_pred_json**''. This prediction file can be uploaded online for evaluation (Please refer to section 3 of the project description for more details).

In [6]:
import numpy as np
import json

def generate_pred_json(data, tag='baseline'):
    '''
    Input
    - data: Is a dictionary d, such that:
          d = { 
              "ID_1": [], 
              "ID_2": [[x_21, y_21, w_21, h_21], [x_22, y_22, w_22, h_22]], 
              ... 
              "ID_i": [[x_i1, y_i1, w_i1, h_i1], ..., [x_iJ, y_iJ, w_iJ, h_iJ]],
              ... 
              "ID_N": [[x_N1, y_N1, w_N1, h_N1]],
          }
          where ID is the string id of the image (e.i. 5a05e86fa07d56baef59b1cb_32.00px_1) and the value the Kx4 
          array of intergers for the K predicted bounding boxes (e.g. [[170, 120, 15, 15]])
    - tag: (optional) string that will be added to the name of the json file.
    Output
      Create a json file, "prediction_[tag].json", conatining the prediction to EvalAI format.
    '''
    unvalid_key = []
    _data = data.copy()
    for key, value in _data.items():
        try:
            # Try to convert to numpy array and cast as closest int
            print(key)
            v = np.around(np.array(value)).astype(int)
            # Check is it is a 2d array with 4 columns (x,y,w,h)
            if v.ndim != 2 or v.shape[1] != 4:
                unvalid_key.append(key)
            # Id must be a string
            if not isinstance(key, str):
                unvalid_key.append(key)
            _data[key] = v.tolist()
        # Deal with not consistant array size and empty predictions
        except (ValueError, TypeError):
            unvalid_key.append(key)
    # Remove unvalid key from dictionnary
    for key in unvalid_key: del _data[key]
    
    with open('prediction_{}.json'.format(tag), 'w') as outfile:
        json.dump(_data, outfile)

In [7]:
#Your code