In [1]:
import os
import shutil
import json

In [2]:
TRAIN_DIR = "data/rflow_data/train/"
TRAIN_ANNOTS = "data/rflow_data/train/_annotations.coco.json"
SORTED_DIR = "data/RF_sorted/"

In [3]:
len(os.listdir(TRAIN_DIR))

138

In [4]:
def read_json(file) -> dict:
    """
    Reads json file and returns python dictionary.
        Args: 
            file(str): path to json file.
    """
    with open(file, 'r') as f:
        data = json.load(f)
    return data

In [5]:
rflow_annots = read_json(TRAIN_ANNOTS)
rflow_annots.keys()

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])

In [6]:
num_cats = len(rflow_annots['categories'])
num_imgs = len(rflow_annots['images'])
num_annots = len(rflow_annots['annotations'])
num_cats, num_imgs, num_annots

(48, 137, 1057)

In [7]:
def sep_annots_by_category(annots) -> dict:
    """
    Segregates annotation data by category. Returns a python dictionary with key value pairs indicating category-id : annotation.
        Args:
            annots(dict): Python dictionary containing annotation data to be segregated.
    """
    categories = {}
    
    for annotation in annots['annotations']:
        category_id = annotation['category_id']
        
        if category_id not in categories:
            categories[category_id] = []
        
        categories[category_id].append(annotation)
    return categories

In [8]:
data_by_cat = sep_annots_by_category(rflow_annots)
data_by_cat.keys()

dict_keys([34, 18, 25, 30, 13, 45, 22, 36, 39, 44, 37, 19, 32, 5, 31, 46, 40, 47, 43, 33, 3, 23, 21, 20, 28, 16, 29, 6, 42, 38, 9, 27, 14, 12, 4, 7, 24, 8, 2, 10, 17, 15, 11, 26, 35, 41, 1])

In [9]:
data_by_cat[33][0:3]

[{'id': 33,
  'image_id': 4,
  'category_id': 33,
  'bbox': [144, 151, 282, 269],
  'area': 75858,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 48,
  'image_id': 7,
  'category_id': 33,
  'bbox': [106, 186, 387.5, 353],
  'area': 136787.5,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 139,
  'image_id': 17,
  'category_id': 33,
  'bbox': [133, 110, 421, 405.5],
  'area': 170715.5,
  'segmentation': [],
  'iscrowd': 0}]

In [10]:
def create_id2cat(annots) -> dict:
    """
    Returns a python dictionary mapping category_id to their corresponding category name.
    Args:
        annots(dict): Python dictionary containing annotation data of dataset.
    """
    id2cat = {}
    
    for category in annots['categories']:
        id2cat[category['id']] = category['name']
    return id2cat

In [11]:
rflow_id2cat = create_id2cat(rflow_annots)
rflow_id2cat

{0: 'Cattle',
 1: 'BCS-1',
 2: 'BCS-1.25',
 3: 'BCS-1.5',
 4: 'BCS-1.75',
 5: 'BCS-2',
 6: 'BCS-2.25',
 7: 'BCS-2.5',
 8: 'BCS-2.75',
 9: 'BCS-3',
 10: 'BCS-3.25',
 11: 'BCS-3.5',
 12: 'BCS-3.75',
 13: 'BCS-4',
 14: 'BCS-4.25',
 15: 'BCS-4.5',
 16: 'BCS-4.75',
 17: 'Breed-Grade-A2',
 18: 'Breed-Grade-A3',
 19: 'Breed-Grade-B',
 20: 'Breed-Grade-C',
 21: 'Clefted-Deep',
 22: 'Clefted-Moderate',
 23: 'Clefted-Slightly',
 24: 'Cow-Gir',
 25: 'Cow-HF-Crossbreed',
 26: 'Cow-Hallikar',
 27: 'Cow-Jersey-Crossbreed',
 28: 'Cow-Non-Descript-Breed',
 29: 'Horn-Polled',
 30: 'Horn-Present',
 31: 'Pot-Belly-Present',
 32: 'Skin-Coat-Moderately-Rough',
 33: 'Skin-Coat-Rough',
 34: 'Skin-Coat-Shiny',
 35: 'Teat-Score-3',
 36: 'Teat-Score-5',
 37: 'Teat-Score-7',
 38: 'Teat-Score-9',
 39: 'Udder-Compact',
 40: 'Udder-Moderately-Pendulous',
 41: 'Udder-Pendulous',
 42: 'Udder-Small-Tight',
 43: 'Worm-Load-Moderate',
 44: 'Worm-Load-No',
 45: 'Wound-Dry',
 46: 'Wound-No',
 47: 'Wound-Ulcerated'}

In [12]:
def create_id_2_imgname(annots) -> dict:
    """
    Returns a python dictionary mapping image_ids to their corresponding image names.
    Args:
        annots(dict): Python dictionary containing annotation data of dataset.
    """
    id2name = {}
    
    for image in annots['images']:
        id2name[image['id']] = image['file_name']
    return id2name

In [13]:
rflow_id2name = create_id_2_imgname(rflow_annots)
len(rflow_id2name)

137

In [14]:
def create_categorized_dset(og_img_dir, new_img_dir, data_by_cat, id2cat, id2name) -> None:
    """
    Segregates categorized annotation data into folders, each with its corresponding images and json annotation file.
    Args:
        og_img_dir(str): Folder container original image data to be split into categories.
        data_by_cat(dict): Python dictionary containing category-wise annotations for the dataset.
        id2cat(dict): Python dictionary mapping category id to corresponding category name.
        id2name(dict): Python dictionary mapping image_id to corresponding image name.
        
    Returns:
        None
    """
    for id, cat in id2cat.items():
        cat_annots = {}
        cat_dir = f"{new_img_dir}/{cat}/"
        
        if not os.path.exists(cat_dir):
            os.makedirs(cat_dir)
        
        if id in data_by_cat:
            for img_annot in data_by_cat[id]:
                img_name = id2name[img_annot['image_id']]
                og_img_path = os.path.join(og_img_dir, img_name)
                new_img_path = os.path.join(cat_dir, img_name)
                cat_annots[img_name] = img_annot
                shutil.copy(og_img_path, new_img_path)
        with open(os.path.join(cat_dir, f"annotations.{cat}.json"), 'w+') as f:
            json.dump(cat_annots, f)

In [15]:
create_categorized_dset(TRAIN_DIR, SORTED_DIR, data_by_cat, rflow_id2cat, rflow_id2name)

In [17]:
def get_dir_filecount(directory):
    """
    Prints the count of files present in each subdirectory of a given main directory.
    Args:
        directory(str): Path to directory containing files to be counted.

    Returns:
        None
    """
    for subdir in os.listdir(directory):
        subdir_path = os.path.join(directory, subdir)
        print(f"Sub-Dir: {subdir} | No. of files: {len(os.listdir(subdir_path))}")
get_dir_filecount("data/RF_sorted/")

Sub-Dir: Breed-Grade-A3 | No. of files: 39
Sub-Dir: Teat-Score-5 | No. of files: 55
Sub-Dir: BCS-1.25 | No. of files: 3
Sub-Dir: Breed-Grade-B | No. of files: 38
Sub-Dir: Skin-Coat-Shiny | No. of files: 36
Sub-Dir: Cattle | No. of files: 1
Sub-Dir: Skin-Coat-Moderately-Rough | No. of files: 54
Sub-Dir: Breed-Grade-C | No. of files: 18
Sub-Dir: Breed-Grade-A2 | No. of files: 11
Sub-Dir: Horn-Present | No. of files: 91
Sub-Dir: Teat-Score-7 | No. of files: 55
Sub-Dir: Cow-Hallikar | No. of files: 7
Sub-Dir: BCS-1 | No. of files: 2
Sub-Dir: Worm-Load-No | No. of files: 24
Sub-Dir: BCS-4.75 | No. of files: 3
Sub-Dir: BCS-4 | No. of files: 10
Sub-Dir: BCS-1.5 | No. of files: 9
Sub-Dir: Udder-Pendulous | No. of files: 4
Sub-Dir: Udder-Small-Tight | No. of files: 8
Sub-Dir: BCS-2.75 | No. of files: 4
Sub-Dir: Cow-HF-Crossbreed | No. of files: 70
Sub-Dir: Udder-Moderately-Pendulous | No. of files: 25
Sub-Dir: BCS-4.25 | No. of files: 7
Sub-Dir: Skin-Coat-Rough | No. of files: 15
Sub-Dir: Wound