In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = str(0)
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import torch

#### Specify location of COCO data. If location corresponds to training or validation data then there should be an annotations file<br>for creating training targets. For testing data there is no annotation file 

In [2]:
DATA_DIR = '/home/Data/COCO dataset/'
DATA_TYPE = 'val2017'
ANN_FILE =f'{DATA_DIR}/annotations/instances_{DATA_TYPE}.json'
OUTPUT_FILE = f'{DATA_DIR}results/result_{DATA_TYPE}.json'

#### Create Dataset class for making data from images. Images don't have to be all same size so we must return lists of tensors

In [3]:
class COCO_Dataset(torch.utils.data.Dataset):
    def __init__(self,dataDir=DATA_DIR,dataType=DATA_TYPE):
        try:
            self.annFile = f'{dataDir}/annotations/instances_{dataType}.json'
            self.coco = COCO(self.annFile)
            self.catIds = self.coco.getCatIds()
            self.have_ann = True
        except:
            print('No annotations file found')
            self.have_ann = False
        self.img_names = os.listdir(f'{dataDir}{dataType}')
        self.pref = os.path.join(dataDir,dataType)
    def __getitem__(self,idx):
        img_name = self.img_names[idx].partition('.')[0]
        img_id = int(img_name)
        if self.have_ann:
            img_info = self.coco.imgs[img_id]
            fname = img_info['file_name']
        else:
            fname = str(img_id)
            fname = '0'*(12-len(fname))+fname+'.jpg'
        # load in image data
        img_mat = io.imread(os.path.join(self.pref,fname))
        if img_mat.ndim != 3:
            return None,None
        img_torch = torch.tensor(img_mat.transpose((2,0,1))).cuda()/255
        # load in annotations if annotation file exists
        if self.have_ann:
            annIds = self.coco.getAnnIds(imgIds=img_id, catIds=self.catIds, iscrowd=None)
            anns = self.coco.loadAnns(annIds)
            if len(anns) == 0:
                return None,None
            masks = [self.coco.annToMask(ann) for ann in anns]
            targets = {k:[] for k in anns[0].keys()}
            targets.pop('segmentation')
            for ann in anns:
                for k,v in ann.items():
                    if k != 'segmentation':
                        targets[k].append(v)
            targets2 = {k:torch.tensor(v).cuda() for k,v in targets.items()}
            targets2['bbox'][:,2:] += targets2['bbox'][:,:2]
            targets2['boxes'] = targets2.pop('bbox')
            targets2['labels'] = targets2.pop('category_id')
        else:
            ## if annotations are not available make dummy target that keeps image identification informatoin
            targets2 = {'img_id':torch.tensor([img_id])}
        return img_torch,targets2
    def __len__(self):
        return len(self.img_names)

#### Create dataloader for COCO data. Since data is in lists we must make a custom collate_fn to prevent default collate_fn <br> from trying to stack lists into a single tensor

In [4]:
def collate_fn(batch):
    data = [v[0] for v in batch if v[0] is not None]
    targets = [v[1] for v in batch if v[0] is not None]
    return data,targets

In [5]:
coco_data = COCO_Dataset(DATA_DIR,DATA_TYPE)

loading annotations into memory...
Done (t=0.42s)
creating index...
index created!


In [6]:
coco_loader = torch.utils.data.DataLoader(coco_data,batch_size=8,collate_fn=collate_fn)

#### Function that takes model output and converts it to format specified by COCO to evaluate model performance

In [7]:
import pycocotools
def process_output(outs,targets,results):
    for out,target in zip(outs,targets):
        image_id = target['image_id'][0].item()
        boxes = out['boxes'].detach().cpu().numpy()
        boxes[:,2:] -= boxes[:,:2]
        if 'masks' in out:
            masks = np.asfortranarray(out['masks'].detach().cpu().numpy().transpose((1,2,0)).astype(bool).astype(np.uint8))
            have_masks = True
        else:
            have_masks = False                                
        for ind in range(len(boxes)):
            results.append({'image_id':image_id,'score':out['scores'][ind].item(),
                            'category_id':out['labels'][ind].item(),'bbox':boxes[ind].tolist(),
                            'segmentation':None if not have_masks else masks[ind]})

#### Load in the appropriate model

In [11]:
import torchvision
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_Weights, FasterRCNN_ResNet50_FPN_Weights
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1).cuda()

#### Evaluation step. Apply model to every data sample, convert to COCO output format and save in json file

In [12]:
import time
t1 = time.time()
results = []
model.eval();
for imgs,targets in coco_loader:
    model_outputs = model(imgs,targets)
    torch.cuda.empty_cache()
    process_output(model_outputs,targets,results)
print(time.time()-t1)

225.59921836853027


In [13]:
import json
with open(OUTPUT_FILE,'w') as ff:
    json.dump(results,ff)

#### Use pycocotools evaluation functionality to evaluate model performance

In [15]:
from pycocotools.cocoeval import COCOeval
annType = 'bbox' # annType should be 'bbox' for bounding box and 'segm' for segmentation
cocoGt = COCO(ANN_FILE)
cocoDt = cocoGt.loadRes(OUTPUT_FILE)
cocoEval = COCOeval(cocoGt,cocoDt,annType)
cocoEval.params.imgIds  = cocoGt.getImgIds()
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.53s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.74s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=17.28s).
Accumulating evaluation results...
DONE (t=2.87s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.369
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.585
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.397
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.212
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.403
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.478
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.307
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.484
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet