In [1]:
from dataclasses import dataclass
import pandas as pd
import typing as T
import numpy as np
import shutil
import json
import math
import cv2
import os

df = pd.read_csv("acfr-fruit-dataset/almonds/annotations/fromEast_56_04_IMG_4328_i900j2700.csv")

In [2]:
df

Unnamed: 0,#item,x,y,dx,dy,label
0,0,107.142857,258.5714,16.0,14.0,1
1,1,133.333333,212.381,16.0,14.0,1
2,2,98.571429,211.4286,14.0,18.0,1
3,3,89.047619,210.9524,14.0,12.0,1
4,4,75.714286,219.5238,18.0,14.0,1
5,5,40.952381,215.7143,12.0,14.0,1
6,6,42.857143,227.1429,14.0,16.0,1
7,7,31.428571,171.4286,14.0,14.0,1
8,8,36.190476,166.6667,12.0,14.0,1
9,9,38.095238,90.0,14.0,18.0,1


In [12]:


@dataclass
class Segment:
    xmin: int;
    ymin: int;
    xmax: int;
    ymax: int;
    name: str;
    pose: str;
    # Not always present in dataset, so I just set to "None"
    segmented: int;
    truncated: int;
    difficult: int;

@dataclass
class Image:
    img_path: str;
    # xml_path: str;
    # xml_data: any;
    b_boxes:  T.List[Segment];

def get_images(i_dir: str, annotation, img_ext = ".png", radius = False) -> T.List[Image]:
    """
    Returns all images within a directory (not recursive?)
    """
    images: T.List[Image] = [];

    entries = os.listdir(i_dir); # Throws if doesn't exist
    for file in entries:
        if (not file.endswith(".csv")): continue;

        csv_pth = os.path.join(i_dir, file);
        img_pth = os.path.abspath(os.path.join(i_dir, "../images", os.path.splitext(file)[0]) + img_ext);
        if (not os.path.exists(img_pth)):
            raise RuntimeError(f"Invalid image found\n  csv: {csv_pth}\n  img: {img_pth}");
        
        # print(csv_pth);
        df = pd.read_csv(csv_pth);

        # Read boxes from the csv
        b_boxes: T.List[Segment] = [];

        for row, data in df.iterrows():
            if (data["label"] != 1):
                print(f"Other label found at {csv_pth}:{row}, ignoring");
                continue;
            
            x=0, y=0, dx=0, dy=0;
            
            if (radius):
                rad = data["radius"];
                cx = data["c-x"];
                cy = data["c-y"];
            
                
            else:

            b_boxes.append(Segment(
                xmin=math.floor(data["x"]),
                ymin=math.floor(data["y"]),
                xmax=math.floor(data["x"] + data["dx"]),
                ymax=math.floor(data["y"] + data["dy"]),
                name=annotation,
                pose=None,
                segmented=None,
                truncated=None,
                difficult=None
            ));
        
        # We now have boxes + image pair so add it to our collection
        images.append(Image(
            img_path=img_pth,
            b_boxes=b_boxes
        ));
    
    return images;



images = get_images("./acfr-fruit-dataset/almonds/annotations", "almond") + get_images("./acfr-fruit-dataset/mangoes/annotations", "mangoes") + get_images("./acfr-fruit-dataset/mangoes/annotations", "apples", radius=True);

images

[Image(img_path='/mnt/4b92d4ee-d89f-4ff6-9584-a00287776fe8/doi_10_5061_dryad_63xsj3v34__v20210831/acfr-fruit-dataset/almonds/images/fromEast_59_27_IMG_4463_i2400j3900.png', b_boxes=[Segment(xmin=51, ymin=232, xmax=75, ymax=248, name='almond', pose=None, segmented=None, truncated=None, difficult=None), Segment(xmin=35, ymin=240, xmax=51, ymax=262, name='almond', pose=None, segmented=None, truncated=None, difficult=None)]),
 Image(img_path='/mnt/4b92d4ee-d89f-4ff6-9584-a00287776fe8/doi_10_5061_dryad_63xsj3v34__v20210831/acfr-fruit-dataset/almonds/images/fromWest_65_21_IMG_4774_i2700j3900.png', b_boxes=[Segment(xmin=16, ymin=175, xmax=50, ymax=213, name='almond', pose=None, segmented=None, truncated=None, difficult=None), Segment(xmin=159, ymin=211, xmax=193, ymax=257, name='almond', pose=None, segmented=None, truncated=None, difficult=None), Segment(xmin=230, ymin=254, xmax=272, ymax=298, name='almond', pose=None, segmented=None, truncated=None, difficult=None), Segment(xmin=196, ymin=94

In [9]:
# Define a struct fot the dataset json used in Adrian's code

@dataclass
class A_Annotation:
    # ID of this annotation?
    id: int;
    # ID of the relevant image
    image_id: int;
    # List of 4 ints
    bbox: T.List[int];
    # Id of the category
    category_id: int;

    # This is seemingly always 1.0
    # Need to look further into code
    area: float;
    # Unsure
    iscrowd: int;

@dataclass
class A_Category:
    # ID of category
    id: int;
    name: str;
    supercategory: str;

@dataclass
class A_Image:
    id: int;
    file_name: str;
    width: int;
    height: int;
    
@dataclass
class A_Dataset:
    annotations: T.List[A_Annotation];
    categories: T.List[A_Category];
    images: T.List[A_Image];

species_map = {"almond": 0};
species_list = ["almond"];

In [22]:
# Create the dataset and add the categories
categories = [];
for i, sp in enumerate(species_list):
    categories.append( A_Category(i, sp, sp) );

# Add all of the images and their bounding boxes

new_dataset_path = "./almond_ds";
if (os.path.exists(new_dataset_path)):
    # rm -r in python taken from https://stackoverflow.com/a/43757091
    shutil.rmtree(new_dataset_path);

os.mkdir(new_dataset_path);
image_dir = os.path.join(new_dataset_path, "all")
os.mkdir(image_dir);


def make_json_list(name: str, images):
    imlist = [];

    for img in images:
        imlist.append(os.path.basename(img.img_path));

    json_ds_handle = open(os.path.join( new_dataset_path, name ), "w");

    # JSON serialise class solution from this S/O comment: https://stackoverflow.com/questions/10252010/serializing-class-instance-to-json#comment82536156_10252010
    json_ds_handle.write(json.dumps(imlist, default=lambda x: x.__dict__, indent=4));
    json_ds_handle.flush();
    json_ds_handle.close();

def make_adrian_ds(name: str, images):
    ads = A_Dataset([], [], []);
    ads.categories = categories;
    
    bbox_id = 0;

    for img_id, img in enumerate(images):
        # Read image for size
        cv_image = cv2.imread(img.img_path);

        # Create entries in our new dataset format
        ads.images.append(A_Image(
            img_id,
            os.path.basename(img.img_path),
            cv_image.shape[1],
            cv_image.shape[0]
        ));

        if (not os.path.exists(os.path.join(os.path.abspath(new_dataset_path), "all", os.path.basename(img.img_path)))):
            os.symlink(
                os.path.abspath(img.img_path),
                os.path.join(os.path.abspath(new_dataset_path), "all", os.path.basename(img.img_path))
            );

        # Add this image's bounding boxes
        density_bboxes = [];
        for bbox in img.b_boxes:
            density_bboxes.append([bbox.xmin, bbox.ymin]);
            density_bboxes.append([bbox.xmax, bbox.ymax]);
            ads.annotations.append(A_Annotation(
                id=bbox_id,
                image_id=img_id,
                bbox=[ bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax ],
                category_id=species_map[bbox.name],
                area=1.0,
                iscrowd=0
            ));
            
            bbox_id += 1;
        
        np.savetxt(os.path.join(new_dataset_path, "all", os.path.basename(img.img_path)) + ".txt", density_bboxes);


    json_ds_handle = open(os.path.join( new_dataset_path, name ), "w");

    # JSON serialise class solution from this S/O comment: https://stackoverflow.com/questions/10252010/serializing-class-instance-to-json#comment82536156_10252010
    json_ds_handle.write(json.dumps(ads, default=lambda x: x.__dict__, indent=4));
    json_ds_handle.flush();
    json_ds_handle.close();

In [23]:
# Shuffle the images
imagenp = np.array(images);
np.random.shuffle(imagenp);

make_adrian_ds("dataset.json", imagenp);
# Split into train test validation

img_count = len(imagenp);
train_max = math.ceil(img_count * 0.7);
test_max  = math.ceil(img_count * 0.2) + train_max;
val_max   = img_count;

print(f"train: [0:{train_max}], test: [{train_max}:{test_max}], val: [{test_max}:{val_max}]");

make_json_list("json_train_set.json", imagenp[0:train_max]);
make_json_list("json_test_set.json", imagenp[train_max:test_max]);
make_json_list("json_val_set.json", imagenp[test_max:val_max]);

train: [0:434], test: [434:558], val: [558:620]
