# Data Collection (Knives)

---

### Assumptions:
1. [COCO API](https://github.com/cocodataset/cocoapi) is installed.
2. [2014 Train images](http://cocodataset.org/#download) has been downloaded and extracted to desktop.
3. [2014 Train/Val annotations](http://cocodataset.org/#download) has been downloaded and extracted to desktop.

In [1]:
# Module check/download

try:
    from pip import main as pip_main
except:
    from pip._internal import main as pip_main

def import_or_install(package):
    
    i = package[0]
    
    try:
        __import__(package[0])
        print(i+": "+"ok")
        
    except ImportError:
        print("\n"+i+": "+"installing...")
        pip_main(['install', package[1]])
        print(i+": "+"ok")

modules = [('cv2', 'opencv-python'), ('matplotlib','matplotlib'), 
           ('numpy', 'numpy'), ('pycocotools', 'pycocotools'),
           ('scipy', 'scipy'),]

for i in modules:
    import_or_install(i)

cv2: ok
matplotlib: ok
numpy: ok
pycocotools: ok
scipy: ok


In [2]:
# Relevant libraries

%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import matplotlib.pyplot as plt
import pylab
import json
import cv2
import os
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [3]:
# Initialize COCO API for instance annotations

dataDir = os.path.expanduser("~/Desktop")
dataType='train2014'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)

coco=COCO(annFile)

loading annotations into memory...
Done (t=11.36s)
creating index...
index created!


In [4]:
# Display COCO categories and supercategories

cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))

nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))

COCO categories: 
person bicycle car motorcycle airplane bus train truck boat traffic light fire hydrant stop sign parking meter bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard sports ball kite baseball bat baseball glove skateboard surfboard tennis racket bottle wine glass cup fork knife spoon bowl banana apple sandwich orange broccoli carrot hot dog pizza donut cake chair couch potted plant bed dining table toilet tv laptop mouse remote keyboard cell phone microwave oven toaster sink refrigerator book clock vase scissors teddy bear hair drier toothbrush

COCO supercategories: 
accessory food indoor person animal sports outdoor appliance furniture vehicle electronic kitchen


In [5]:
# Pick category

catIds = coco.getCatIds(catNms=['knife']);
imgIds = coco.getImgIds(catIds=catIds );

In [10]:
# Save image ids

file = open(os.path.expanduser("~/Desktop/knifeIDs.txt"),'w')

for num in imgIds:
    file.write(str(num)+'\n')
    
file.close()

In [6]:
# Data prep for RetinaNet

store = []
f = open(os.path.expanduser("~/Desktop/knifeIDs.txt"),'r')

for rec in f:
    
    rec = rec.split()
    N = len(rec[0])
    
    if N != 12: 
        m = 12 - N
        rec = '0'*m + rec[0]
        store.append(rec)
        
f.close()
store_int = list(map(int,store))

f =  open(annFile)
d = json.load(f)
f.close()

annotations = d['annotations']
d = {}

for k in range(len(annotations)):
    
    img_id = annotations[k]['image_id']
    cat_id = annotations[k]['category_id']
        
    if img_id in store_int and cat_id == 49:
        
        if img_id in d:
            d[img_id].append(annotations[k]['bbox'])
            
        else:
            d[img_id] = []
            d[img_id].append(annotations[k]['bbox'])

path = os.path.expanduser(str("~/Desktop/{}/COCO_{}_").format(dataType, dataType))
output = []

for k in range(len(store_int)):
    
    terms = len(d[store_int[k]])
    
    if terms > 1:
        
        for i in range(terms):
            
            file = path + store[k] + '.jpg'
            bbox = d[store_int[k]][i]
            
            xmin = int(bbox[0])
            xmax = int(bbox[0] + bbox[2])
            ymax = int(bbox[1] + bbox[3])
            ymin = int(bbox[1])
            
            output.append([file, xmin, ymin, xmax, ymax, 'knife'])
            
    else:
        file = path + store[k] + '.jpg'
        bbox = d[store_int[k]][0]
            
        xmin = int(bbox[0])
        xmax = int(bbox[0] + bbox[2])
        ymax = int(bbox[1] + bbox[3])
        ymin = int(bbox[1])
            
        output.append([file, xmin, ymin, xmax, ymax, 'knife'])

#for rec in output:   
#    print(str(rec)[1:-1])

In [None]:
# Sanity check

img = cv2.imread(output[1][0],1)
cv2.rectangle(img,(output[1][1], output[1][2]), (output[1][3], output[1][4]), (0,255,0), 2)
cv2.imshow('image',img)
cv2.waitKey(0)

In [11]:
# Save in RetinaNet format

file = open(os.path.expanduser("~/Desktop/knifeData.txt"),'w')

for rec in output:
    file.write(", ".join(str(x) for x in rec)+'\n')
    
file.close()