In [1]:
import os
import json
import shutil

In [30]:
SOURCE_DATA_DIR = "data/RF_sorted"

BCS_OUT_DIR = 'data/RF_sorted/agg_data/BCS'
BCS_YOLO_DIR = 'data/YOLOv8/BCS'

UTYPE_OUT_DIR = 'data/RF_sorted/agg_data/UdderType'
UTYPE_YOLO_DIR = 'data/YOLOv8/UdderType'

CLEFT_OUT_DIR = 'data/RF_sorted/agg_data/Cleft'
CLEFT_YOLO_DIR = 'data/YOLOv8/Cleft'

TSCORE_OUT_DIR = 'data/RF_sorted/agg_data/TScore'
TSCORE_YOLO_DIR = 'data/YOLOv8/TScore'

BREED_OUT_DIR = 'data/RF_sorted/agg_data/CowBreed'
BREED_YOLO_DIR = 'data/YOLOv8/CowBreed'

GRADE_OUT_DIR = 'data/RF_sorted/agg_data/Grade'
GRADE_YOLO_DIR = 'data/YOLOv8/Grade'

In [31]:
def convert_coco_to_yolov8(bbox, img_width, img_height):
    x_min, y_min, bbox_width, bbox_height = bbox
    x_center = (x_min + bbox_width / 2) / img_width
    y_center = (y_min + bbox_height / 2) / img_height
    width = bbox_width / img_width
    height = bbox_height / img_height
    return x_center, y_center, width, height

In [32]:
def aggregate_class_folders(source_dir, target_dir, main_class):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir, exist_ok=True)
    subdirs = sorted(os.listdir(source_dir))
    for subdir in subdirs:
        if subdir.startswith(main_class) or subdir.endswith(main_class):
            shutil.copytree(os.path.join(source_dir, subdir), os.path.join(target_dir, subdir))
            print(f'Copied {subdir} to {target_dir}')

In [33]:
def reformat_coco_annots(source_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    subdirs = sorted(os.listdir(source_dir))
    img_dir = os.path.join(output_dir, 'train/images')
    labels_dir = os.path.join(output_dir, 'train/labels')

    if not os.path.exists(labels_dir):
        os.makedirs(labels_dir, exist_ok=True)
    if not os.path.exists(img_dir):
        os.makedirs(img_dir, exist_ok=True)

    img_width, img_height = 640, 640

    for cat_id, cat_dir in enumerate(subdirs):
        cat_path = os.path.join(source_dir, cat_dir)
        if os.path.isdir(cat_path):
            annotation_file = [f for f in os.listdir(cat_path) if f.endswith('.json')][0]
            annotation_path = os.path.join(cat_path, annotation_file)

            with open(annotation_path, 'r') as f:
                annotations = json.load(f)

            for img_file, annots in annotations.items():
                src_img_path = os.path.join(cat_path, img_file)
                trgt_img_path = os.path.join(img_dir, img_file)

                shutil.copy(src_img_path, trgt_img_path)
                # print(img_file, annots)
                bbox = annots['bbox']
                x_center, y_center, width, height = convert_coco_to_yolov8(bbox, img_width, img_height)

                yolo_annotation = f"{cat_id} {x_center} {y_center} {width} {height}"
                label_file_name = img_file.replace('.jpg', '.txt')
                label_file_path = os.path.join(labels_dir, label_file_name)
                # print(yolo_annotation, label_file_path)
                with open(label_file_path, 'w') as label_file:
                    label_file.write(yolo_annotation + "\n")
        print(f"Successfully converted {cat_dir} to yolov8 annotation format.")
    print(f"Finished conversion of {source_dir} to yolov8 annotation format.")

In [34]:
def compare_file_counts(src_dir, agg_dir, yolo_dir, main_class) -> None:
    init_cnt, post_cnt, yolo_cnt = 0, 0, 0
    for src_subdir in os.listdir(src_dir):
        if src_subdir.startswith(main_class) or src_subdir.endswith(main_class):
            subdir_path = os.path.join(src_dir, src_subdir)
            for file in os.listdir(subdir_path):
                if file.endswith(".jpg"):
                    init_cnt += 1
    for agg_subdir in os.listdir(agg_dir):
        if agg_subdir.startswith(main_class) or agg_subdir.endswith(main_class):
            subdir_path = os.path.join(agg_dir, agg_subdir)
            for file in os.listdir(subdir_path):
                if file.endswith(".jpg"):
                    post_cnt += 1
                    
    for yolo_file in os.listdir(yolo_dir + "/train/images/"):
        if yolo_file.endswith(".jpg"):
            yolo_cnt += 1
            
    print(f"Pre-aggregation: {init_cnt}, Post-aggregation: {post_cnt}, Post-conversion: {yolo_cnt}")

# Conversion of BCS data

In [35]:
aggregate_class_folders(SOURCE_DATA_DIR, BCS_OUT_DIR, main_class='BCS-')

Copied BCS-1 to data/RF_sorted/agg_data/BCS
Copied BCS-1.25 to data/RF_sorted/agg_data/BCS
Copied BCS-1.5 to data/RF_sorted/agg_data/BCS
Copied BCS-1.75 to data/RF_sorted/agg_data/BCS
Copied BCS-2 to data/RF_sorted/agg_data/BCS
Copied BCS-2.25 to data/RF_sorted/agg_data/BCS
Copied BCS-2.5 to data/RF_sorted/agg_data/BCS
Copied BCS-2.75 to data/RF_sorted/agg_data/BCS
Copied BCS-3 to data/RF_sorted/agg_data/BCS
Copied BCS-3.25 to data/RF_sorted/agg_data/BCS
Copied BCS-3.5 to data/RF_sorted/agg_data/BCS
Copied BCS-3.75 to data/RF_sorted/agg_data/BCS
Copied BCS-4 to data/RF_sorted/agg_data/BCS
Copied BCS-4.25 to data/RF_sorted/agg_data/BCS
Copied BCS-4.5 to data/RF_sorted/agg_data/BCS
Copied BCS-4.75 to data/RF_sorted/agg_data/BCS


In [36]:
reformat_coco_annots(BCS_OUT_DIR, BCS_YOLO_DIR)

Successfully converted BCS-1 to yolov8 annotation format.
Successfully converted BCS-1.25 to yolov8 annotation format.
Successfully converted BCS-1.5 to yolov8 annotation format.
Successfully converted BCS-1.75 to yolov8 annotation format.
Successfully converted BCS-2 to yolov8 annotation format.
Successfully converted BCS-2.25 to yolov8 annotation format.
Successfully converted BCS-2.5 to yolov8 annotation format.
Successfully converted BCS-2.75 to yolov8 annotation format.
Successfully converted BCS-3 to yolov8 annotation format.
Successfully converted BCS-3.25 to yolov8 annotation format.
Successfully converted BCS-3.5 to yolov8 annotation format.
Successfully converted BCS-3.75 to yolov8 annotation format.
Successfully converted BCS-4 to yolov8 annotation format.
Successfully converted BCS-4.25 to yolov8 annotation format.
Successfully converted BCS-4.5 to yolov8 annotation format.
Successfully converted BCS-4.75 to yolov8 annotation format.
Finished conversion of data/RF_sorted/ag

In [37]:
compare_file_counts(SOURCE_DATA_DIR, BCS_OUT_DIR, BCS_YOLO_DIR, main_class='BCS-')

Pre-aggregation: 101, Post-aggregation: 101, Post-conversion: 101


# Conversion of Type of Udder data

In [38]:
aggregate_class_folders(SOURCE_DATA_DIR, UTYPE_OUT_DIR, main_class='Udder-')

Copied Udder-Compact to data/RF_sorted/agg_data/UdderType
Copied Udder-Moderately-Pendulous to data/RF_sorted/agg_data/UdderType
Copied Udder-Pendulous to data/RF_sorted/agg_data/UdderType
Copied Udder-Small-Tight to data/RF_sorted/agg_data/UdderType


In [39]:
reformat_coco_annots(UTYPE_OUT_DIR, UTYPE_YOLO_DIR)

Successfully converted Udder-Compact to yolov8 annotation format.
Successfully converted Udder-Moderately-Pendulous to yolov8 annotation format.
Successfully converted Udder-Pendulous to yolov8 annotation format.
Successfully converted Udder-Small-Tight to yolov8 annotation format.
Finished conversion of data/RF_sorted/agg_data/UdderType to yolov8 annotation format.


In [40]:
compare_file_counts(SOURCE_DATA_DIR, UTYPE_OUT_DIR, UTYPE_YOLO_DIR, main_class='Udder-')

Pre-aggregation: 105, Post-aggregation: 105, Post-conversion: 105


# Conversion of Cleft data

In [41]:
aggregate_class_folders(SOURCE_DATA_DIR, CLEFT_OUT_DIR, main_class='Clefted-')

Copied Clefted-Deep to data/RF_sorted/agg_data/Cleft
Copied Clefted-Moderate to data/RF_sorted/agg_data/Cleft
Copied Clefted-Slightly to data/RF_sorted/agg_data/Cleft


In [42]:
reformat_coco_annots(CLEFT_OUT_DIR, CLEFT_YOLO_DIR)

Successfully converted Clefted-Deep to yolov8 annotation format.
Successfully converted Clefted-Moderate to yolov8 annotation format.
Successfully converted Clefted-Slightly to yolov8 annotation format.
Finished conversion of data/RF_sorted/agg_data/Cleft to yolov8 annotation format.


In [43]:
compare_file_counts(SOURCE_DATA_DIR, CLEFT_OUT_DIR, CLEFT_YOLO_DIR, main_class="Clefted-")

Pre-aggregation: 116, Post-aggregation: 116, Post-conversion: 116


# Conversion of Teat Score data

In [44]:
aggregate_class_folders(SOURCE_DATA_DIR, TSCORE_OUT_DIR, main_class="Teat-Score-")

Copied Teat-Score-3 to data/RF_sorted/agg_data/TScore
Copied Teat-Score-5 to data/RF_sorted/agg_data/TScore
Copied Teat-Score-7 to data/RF_sorted/agg_data/TScore
Copied Teat-Score-9 to data/RF_sorted/agg_data/TScore


In [45]:
reformat_coco_annots(TSCORE_OUT_DIR, TSCORE_YOLO_DIR)

Successfully converted Teat-Score-3 to yolov8 annotation format.
Successfully converted Teat-Score-5 to yolov8 annotation format.
Successfully converted Teat-Score-7 to yolov8 annotation format.
Successfully converted Teat-Score-9 to yolov8 annotation format.
Finished conversion of data/RF_sorted/agg_data/TScore to yolov8 annotation format.


In [46]:
compare_file_counts(SOURCE_DATA_DIR, TSCORE_OUT_DIR, TSCORE_YOLO_DIR, main_class="Teat-Score-")

Pre-aggregation: 118, Post-aggregation: 118, Post-conversion: 118


# Conversion of Cow Breed data

In [47]:
aggregate_class_folders(SOURCE_DATA_DIR, BREED_OUT_DIR, main_class='Cow-')

Copied Cow-Gir to data/RF_sorted/agg_data/CowBreed
Copied Cow-HF-Crossbreed to data/RF_sorted/agg_data/CowBreed
Copied Cow-Hallikar to data/RF_sorted/agg_data/CowBreed
Copied Cow-Jersey-Crossbreed to data/RF_sorted/agg_data/CowBreed
Copied Cow-Non-Descript-Breed to data/RF_sorted/agg_data/CowBreed


In [48]:
reformat_coco_annots(BREED_OUT_DIR, BREED_YOLO_DIR)

Successfully converted Cow-Gir to yolov8 annotation format.
Successfully converted Cow-HF-Crossbreed to yolov8 annotation format.
Successfully converted Cow-Hallikar to yolov8 annotation format.
Successfully converted Cow-Jersey-Crossbreed to yolov8 annotation format.
Successfully converted Cow-Non-Descript-Breed to yolov8 annotation format.
Finished conversion of data/RF_sorted/agg_data/CowBreed to yolov8 annotation format.


In [49]:
compare_file_counts(SOURCE_DATA_DIR, BREED_OUT_DIR, BREED_YOLO_DIR, main_class='Cow-')

Pre-aggregation: 106, Post-aggregation: 106, Post-conversion: 106


# Conversion of Breed Grade data

In [50]:
aggregate_class_folders(SOURCE_DATA_DIR, GRADE_OUT_DIR, main_class='Breed-Grade-')

Copied Breed-Grade-A2 to data/RF_sorted/agg_data/Grade
Copied Breed-Grade-A3 to data/RF_sorted/agg_data/Grade
Copied Breed-Grade-B to data/RF_sorted/agg_data/Grade
Copied Breed-Grade-C to data/RF_sorted/agg_data/Grade


In [51]:
reformat_coco_annots(GRADE_OUT_DIR, GRADE_YOLO_DIR)

Successfully converted Breed-Grade-A2 to yolov8 annotation format.
Successfully converted Breed-Grade-A3 to yolov8 annotation format.
Successfully converted Breed-Grade-B to yolov8 annotation format.
Successfully converted Breed-Grade-C to yolov8 annotation format.
Finished conversion of data/RF_sorted/agg_data/Grade to yolov8 annotation format.


In [52]:
compare_file_counts(SOURCE_DATA_DIR, GRADE_OUT_DIR, GRADE_YOLO_DIR, main_class='Breed-Grade-')

Pre-aggregation: 102, Post-aggregation: 102, Post-conversion: 102
