In [93]:
import pdb
import os
from pathlib import Path
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
import datetime
import random
import math

In [111]:
"""
Prepare Data:
Create directories, annotation.json in COCO format for non-random images and
crop raw images for pollen and foragers plus random image-sections as counterclasses

Directory Structure:

basedirectory/
 ├───raw_data_directory/
 │
 ├───Data/
 │     ├───ForagerData
 │     │     ├───Fake/
 │     │     └───Real/
 │     └───PollenData
 │           ├───Fake/
 │           └───Real/
 │
 └──annotation.json

"""

DATA_CREATED = True
IMG_POLLEN_SIZE = 32
IMG_FORAGER_SIZE = 200
BASE_DIR = os.getcwd() + '/'
POLLEN_DIR = 'Data/PollenData/Real/'
FORAGER_DIR = 'Data/ForagerData/Real/'
RND_POLLEN_DIR = 'Data/PollenData/Fake/'
RND_FORAGER_DIR = 'Data/ForagerData/Fake/'
RAW_DATA_DIR = 'raw_data/BeesBook/'
IMG_DIR = 'img/'
ANN_DIR = 'ann/'

if not os.path.isdir('Data'):
    os.makedirs(POLLEN_DIR)
    os.makedirs(FORAGER_DIR)
    os.mkdir(RND_POLLEN_DIR)
    os.mkdir(RND_FORAGER_DIR)


In [95]:
def eukl_dist(p1, p2):
    d = math.sqrt(((p1[0]-p2[0])**2)+((p1[1]-p2[1])**2))
    return d < max(IMG_FORAGER_SIZE, IMG_POLLEN_SIZE)

In [102]:
def create_rnd_coord(coords):
    too_close = True
    while too_close:
        rnd_x = random.randint(180, 2635)
        if rnd_x > 2360:
            coin = random.randint(0,1)
            if coin:
                rnd_y = random.randint(90, 600)
            else:
                rnd_y = random.randint(3500, 3950)
        else:
            rnd_y = random.randint(230, 2675)
        rnd_coord = [rnd_y, rnd_x]
        for coord in coords:
            too_close = eukl_dist(rnd_coord,coord)
            if too_close:
                break
    return rnd_coord

In [103]:
def crop_img(img, coord, size, img_id, anno, anno_id=None, category=None, iscrowd=None):
    filename = (str(img_id)).zfill(10) + '.png'
    y = coord[0]-(size//2)
    x = coord[1]-(size//2)
    h = size
    w = size
    crop_img = img[x:x+h, y:y+w]
    
    if anno:
        time1 = str(datetime.datetime.utcnow())
        anno_img = {'id':img_id,'width':size,'height':size,'file_name':filename,'license':None,'flickr_url':None,'coco_url':None,'date_captured':time1}
        anno_anno = {'id':anno_id,'image_id':img_id,'category_id':category,'segmentation':[],'area':size**2,'bbox':[],'iscrowd':iscrowd}
        img_id += 1
        anno_id += 1
        return crop_img, filename, anno_img, anno_anno, img_id, anno_id
    
    img_id += 1
    return crop_img, filename, img_id

In [104]:
def create_cropped_img(filename, coords, img_counter, anno_counter, orig_dir):
    try:
        img = cv2.imread(filename, 0)
    except Exception as e:
        print(filename, str(e))
    
    anno_images = []
    anno_annos = []
    
    for coord in coords:
        # random point
        rnd_coord = create_rnd_coord(coords)
        
        # Pollen:
        pollen_crop_img, pollen_filename, anno_image1, anno_anno1, img_counter, anno_counter = crop_img(img, coord,IMG_POLLEN_SIZE, img_counter, True, anno_counter, 1, 0)
        anno_images.append(anno_image1)
        anno_annos.append(anno_anno1)
        # Non-Pollen
        rnd_pollen_crop_img, rnd_pollen_filename, img_counter = crop_img(img, rnd_coord, IMG_POLLEN_SIZE, img_counter, False)
        try:
            os.chdir(BASE_DIR + POLLEN_DIR)
            cv2.imwrite(pollen_filename, pollen_crop_img)
            os.chdir(BASE_DIR + RND_POLLEN_DIR)
            cv2.imwrite(rnd_pollen_filename, rnd_pollen_crop_img)
        except Exception as e:
            print(pollen_filename, rnd_pollen_filename, str(e))
        
        # Forager:
        forager_crop_img, forager_filename, anno_image2, anno_anno2, img_counter, anno_counter = crop_img(img, coord,IMG_FORAGER_SIZE, img_counter, True, anno_counter, 2, 0)
        anno_images.append(anno_image2)
        anno_annos.append(anno_anno2)
        # Non-Forager
        rnd_forager_crop_img, rnd_forager_filename, img_counter = crop_img(img, rnd_coord, IMG_FORAGER_SIZE, img_counter, False)
        try:
            os.chdir(BASE_DIR + FORAGER_DIR)
            cv2.imwrite(forager_filename, forager_crop_img)
            os.chdir(BASE_DIR + RND_FORAGER_DIR)
            cv2.imwrite(rnd_forager_filename, rnd_forager_crop_img)
        except Exception as e:
            print(forager_filename, rnd_forager_filename, str(e))
        
        os.chdir(orig_dir)
        
    return (anno_images, anno_annos, img_counter, anno_counter)

In [110]:
if not DATA_CREATED:
    today = str(datetime.datetime.utcnow())
    annotation = {
        'info': {'year': 2020,'version': None,'description': 'Pollenforager Detection','contributor': 'Mara Kortenkamp, Tim Feige','url': 'https://github.com/marakortenkamp/pollen-detection','date_created': today},
        'images': [],
        'annotations': [],
        'licenses': {'id': None,'name': None,'url': None,},
        'category': [{'supercategory': 'oject','id': 1,'name': 'Pollen'},{'supercategory': 'animal','id': 2,'name': 'Bee'}]
        }
    total_img_counter, total_anno_counter = 0, 9000000000
    os.chdir(BASE_DIR + RAW_DATA_DIR)
        folders = next(os.walk('.'))[1]
    for folder in folders:
        os.chdir(folder + '/' + ANN_DIR)
        files = os.listdir()
        for file in files:
            if 'json' in file:
                try:
                    with open(file) as json_file:
                        data = json.load(json_file)
                    if len(data['objects']):
                        try:
                            file_name = Path(file).stem
                            pollen_coords = []
                            for obj in data['objects']:
                                pollen_coords.append(obj['points']['exterior'][0])
                            os.chdir('../' + IMG_DIR)
                        except Exception as e:
                            print(folder, file, obj, str(e))
                        current_dir = os.getcwd()
                        anno_imgs, anno_annos, total_img_counter, total_anno_counter = create_cropped_img(file_name, pollen_coords, total_img_counter, total_anno_counter, current_dir)
                        os.chdir('../' + ANN_DIR)
                        for ann_img in anno_imgs:
                            annotation['images'].append(ann_img)
                        for ann_ann in anno_annos:
                            annotation['annotations'].append(ann_ann)
                except Exception as e:
                    print(folder, file, str(e))
        os.chdir(BASE_DIR + RAW_DATA_DIR)
    os.chdir(BASE_DIR)    
    with open('annotation.json', 'w') as fp:
        json.dump(annotation, fp)

0000006350.png 0000006351.png OpenCV(4.2.0) /io/opencv/modules/imgcodecs/src/loadsave.cpp:715: error: (-215:Assertion failed) !_img.empty() in function 'imwrite'

