## Dataset Parameters

In [67]:
num_images = 1 # Number of images to create for the dataset
num_bees = 4 # Maximum number of bees to include per image

print("Done")

Done


## Library Imports

In [33]:
from datetime import datetime
import tensorflow_datasets as tfds
import json
import os
import glob
import random
from pathlib import Path

print("Done")

Done


## YOLO Dataset Definition

In [57]:
'''
{
"info": info, "images": [image], "annotations": [annotation], "licenses": [license],
}
 
info{
"year": int, "version": str, "description": str, "contributor": str, "url": str, "date_created": datetime,
}
 
image{
"id": int, "width": int, "height": int, "file_name": str, "license": int, "flickr_url": str, "coco_url": str, "date_captured": datetime,
}
 
license{
"id": int, "name": str, "url": str,
}
 
annotation{
"id": int, "image_id": int, "category_id": int, "segmentation": RLE or [polygon], "area": float, "bbox": [x,y,width,height], "iscrowd": 0 or 1,
}
 
categories[{
"id": int, "name": str, "supercategory": str,
}]
'''

class image:
    def __init__(self, img_id: int, width: int, height: int, file_name: str, license: int, flickr_url: str, coco_url: str, date_captured: datetime):
        self.img_id = img_id
        self.width = width
        self.height = height
        self.file_name = file_name
        self.license = license
        self.flickr_url = flickr_url
        self.coco_url = coco_url
        self.date_captured = date_captured
 
class license:
    def __init__(self, lic_id: int, name: str, url: str):
        self.lic_id = lic_id
        self.name = name
        self.url = url
 
class bbox:
    def __init__(self, x: int, y: int, width: int, height: int):
        self.x = x
        self.y = y
        self.width = width
        self.height = height
 
class annotation:
    def __init__(self, ann_id: int, image_id: int, category_id: int, segmentation, area: float, bbox: bbox, iscrowd: bool):
        self.ann_id = ann_id
        self.image_id = image_id
        self.category_id: category_id
        self.segmentation = segmentation
        self.area = area
        self.bbox = list(vars(bbox).values())
        self.iscrowd = iscrowd
 
class category:
    def __init__(self, cat_id: int, name: str, supercategory: str):
        self.cat_id = cat_id
        self.name = name
        self.supercategory = supercategory
 
info = {
        "year": int,
        "version": str,
        "description": str,
        "contributor": str,
        "url": str,
        "date_created": datetime,
        }

# ds = tfds.load('bee_dataset', split='train')
# ds = ds.take(1)  # Only take a single example

# for example in ds:  # example is `{'image': tf.Tensor, 'label': tf.Tensor}`
#     print(list(example.keys()))
#     image = example["input"]
#     label = example["output"]
#     print(image.shape, label)

# Id =
# Width =
# Height =
# File_name =
# License =
# Flickr_url = None
# Coco_url = None
# Date_captured = datetime.now()
 
# images.append(image(Id, Width, Height, File_name, License, Flickr_url, Coco_url, Date_captured))
 
# Annotation_Id =
# Image_id =
# Category_id =
# Segmentation = None
# X =
# Y =
# Width =
# Height =
# Area = Width * Height
# Bbox = bbox(X, Y, Width, Height)
# Iscrowd =
 
# annotations.append(annotation(Annotation_Id, Image_id, Category_id, Segmentation, Area, Bbox, Iscrowd))
 
# License_Id =
# Name =
# Url =
 
# licenses.append(license(License_Id, Name, Url))
 
# Bbox = bbox(3, 2, 6, 5)
# print(vars(Bbox))
# print(list(vars(Bbox).values()))
 
# Annotation = annotation(0, 0, 0, None, 100, Bbox, True)
# print(vars(Annotation))

print("Done")

Done


## Feeder Image Array Creation

In [19]:
train_augmented_fp = "Dataset/TrainAugmented"
val_augmented_fp = "Dataset/ValAugmented"

try:
    files = os.listdir(train_augmented_fp)
    for file in files:
        file_path = os.path.join(train_augmented_fp, file)
        if os.path.isfile(file_path):
            os.remove(file_path)
    files = os.listdir(val_augmented_fp)
    for file in files:
        file_path = os.path.join(val_augmented_fp, file)
        if os.path.isfile(file_path):
            os.remove(file_path)
except OSError:
    print("Error")

dataset_paths = {
                 'PD': {'none': {'path': 'Dataset/PollenDataset/None', 'label': [0, 0, 0]},
                        'pollen': {'path': 'Dataset/PollenDataset/Pollen', 'label': [1, 0, 0]}},
                 'BA': {'none': {'path': 'Dataset/BeeAlarmed/None', 'label': [0, 0, 0]},
                        'pollen': {'path': 'Dataset/BeeAlarmed/Pollen', 'label': [1, 0, 0]},
                        'varroa': {'path': 'Dataset/BeeAlarmed/Varroa', 'label': [0, 1, 0]},
                        'wasps': {'path': 'Dataset/BeeAlarmed/Wasps', 'label': [0, 0, 1]}},
                 'YM': {'none': {'path': 'Dataset/YangModel/None', 'label': [0, 0, 0]},
                        'pollen': {'path': 'Dataset/YangModel/Pollen', 'label': [1, 0, 0]},
                        'varroa': {'path': 'Dataset/YangModel/Varroa', 'label': [0, 1, 0]}},
                }
                 # 'USU': {'none': {'path': 'Dataset/USU/None', 'label': [0, 0, 0]},
                 #         'pollen': {'path': 'Dataset/USU/Pollen', 'label': [1, 0, 0]},
                 #         'varroa': {'path': 'Dataset/USU/Varroa', 'label': [0, 1, 0]}}}

file_list = []
for dataset_path in dataset_paths:
    for dataset in dataset_paths[dataset_path]:
        [file_list.append(i) for i in glob.glob(dataset_paths[dataset_path][dataset]['path'] + '/*')]

print("Done")

Done


## Dataset Creation Loop

In [106]:
# NOTE: COCO Dataset IDs start with 1

images = []
annotations = []
licenses = []

categories = [category(1, "None", None), category(2, "Pollen", None), category(3, "Varroa", None), category(4, "Wasps", None)]

# License creation "loop"
License_Id = 1
Name = 'Attribution-NonCommercial-ShareAlike 4.0 International'
Url = 'https://creativecommons.org/licenses/by-nc-sa/4.0/'
licenses.append(license(License_Id, Name, Url))

Width = 640
Height = 640
License = 1
Flickr_url = None
Coco_url = None

for image_id in range(num_images):
    Id = image_id + 1
    # File_name =
    Date_captured = datetime.now()

    # Feeder image usage sub-loop (annotation sub-loop)
    for bee in range(random.randint(1, num_bees)): # number of bees to include in image
        Annotation_Id = bee
        Image_id = Id

        current_bee = os.path.normpath(random.choice(file_list)) # Grab a random bee from feeder images
        category_name = os.path.dirname(current_bee).split('\\')[-1] # Get category of feeder image from containing directory
    
        Category_id = [i.cat_id for i in categories if i.name == category_name][0]
        # Segmentation = None
        # X =
        # Y =
        # Width =
        # Height =
        # Area = Width * Height
        # Bbox = bbox(X, Y, Width, Height)
        # Iscrowd =

        #annotations.append(annotation(Annotation_Id, Image_id, Category_id, Segmentation, Area, Bbox, Iscrowd))
    #images.append(image(Id, Width, Height, File_name, License, Flickr_url, Coco_url, Date_captured))

# with open("custom_bee_dataset.json", "w") as outfile:
#     json.dump({"info": info, "images": images, "annotations": annotations, "licenses": licenses}, outfile, indent = 4)

4
None 1
Varroa 3
None 1
None 1
