# **PREPARATION OF CUSTOM OBJECT DETECTION FOR YOLO TRAINING**


****

# **AIM**:
- Prepare object dataset from COCO-style annotations with image, label, visible & occluded masks
- Then split into train/val/test and format for YOLO training

**Download Dataset from:**  [LINK](https://drive.google.com/drive/folders/1D5hHFDtgd5RnX__55MmpfOAM83qdGYf0?usp=sharing)

**Load required libraries**

In [9]:
import os
import json
import shutil
from PIL import Image
from sklearn.model_selection import train_test_split

**Define Annotation file path**

In [10]:
annotation_file_path=os.path.join("/kaggle/input/new-dataset-object-detection/val_split/coco_anns_uoais_sim_val.json")

**Load JSON annotations**

In [11]:
with open(annotation_file_path,"r") as annotation_file:
        data=json.load(annotation_file)
annotations=data['annotations']

**Create necessary folders for image-label-mask storage**

In [4]:
dataset_path="image_label"
images_path=os.path.join(dataset_path,"images")
labels_path=os.path.join(dataset_path,"labels")
visiblemask_path=os.path.join(dataset_path,"visiblemask")
occludedmask_path=os.path.join(dataset_path,"occludedmask")

os.makedirs(os.path.join(images_path),exist_ok=True)
os.makedirs(os.path.join(labels_path),exist_ok=True)
os.makedirs(os.path.join(visiblemask_path),exist_ok=True)
os.makedirs(os.path.join(occludedmask_path),exist_ok=True)

In [5]:
#Helper function to write content to files
def add_content(path,content):
    mode='a' if os.path.exists(path) else 'w'
    with open(path,mode) as f:
        if mode=='a':
            f.write('\n')
        f.writelines(content)

# Extract and save labels + masks in YOLO format
for ann in annotations:
    image_id=ann['image_id']+1
    class_id=ann['category_id']
    x,y,w,h=ann['bbox']
    height,width=ann['height'],ann['width']
    visible_mask=ann['visible_mask']['counts']
    occluded_mask=ann['occluded_mask']['counts']

    txt_file_name=f"{image_id}.txt"
    label_file_path=os.path.join("/kaggle/working/image_label/labels",txt_file_name)
    visible_file_path=os.path.join("/kaggle/working/image_label/visiblemask",txt_file_name)
    occluded_file_path=os.path.join("/kaggle/working/image_label/occludedmask",txt_file_name)

    (x_center,y_center)=(x+w/2,y+h/2)
    label_content=f"{class_id} {x_center/width:.6} {y_center/height:.6} {w/width:.6} {h/height:.6}"

    add_content(label_file_path,label_content)
    add_content(visible_file_path,visible_mask)
    add_content(occluded_file_path,occluded_mask)

**Copy and convert images from dataset folders**

In [75]:
def copy_images(mainfolder_path,destination_folder_path,image_type):
    for img_type in image_type:
        images_folder_path=os.path.join(mainfolder_path,img_type,"color")
        images=os.listdir(images_folder_path)
        for imagepath in images:
            source_path=os.path.join(images_folder_path,imagepath)
            image_id=int(os.path.splitext(imagepath)[0])
            if (img_type=='tabletop'):
                image_id=image_id+2500
            destination_path=os.path.join(destination_folder_path,f"{image_id}.jpg")
            image=Image.open(source_path)
            if image.mode in ('RGBA','LA'):
                background=Image.new('RGB',image.size,(255,255,255))
                background.paste(image,mask=image.split()[-1])
                image=background
            image.convert("RGB").save(destination_path,"JPEG", quality=95)

**Apply image copy for all image format**

In [65]:
mainfolder_path=os.path.join("/kaggle/input/new-dataset-object-detection/val_split/val")
destination_images_folder=os.path.join("/kaggle/working/image_label/images")
type_image=['bin','tabletop']

copy_images(mainfolder_path,destination_images_folder,type_image)

**ZIP the image_label directory for backup**

In [76]:
shutil.make_archive("dataset","zip","/kaggle/working/image_label")

'/kaggle/working/dataset.zip'

**Create full dataset folder structure for YOLO**

In [77]:
dataset_path="dataset"

images_path=os.path.join(dataset_path,"images")
labels_path=os.path.join(dataset_path,"labels")
visiblemask_path=os.path.join(dataset_path,"visiblemask")
occludedmask_path=os.path.join(dataset_path,"occludedmask")

os.makedirs(os.path.join(images_path,"train"),exist_ok=True)
os.makedirs(os.path.join(images_path,"val"),exist_ok=True)
os.makedirs(os.path.join(images_path,"test"),exist_ok=True)
os.makedirs(os.path.join(labels_path,"train"),exist_ok=True)
os.makedirs(os.path.join(labels_path,"val"),exist_ok=True)
os.makedirs(os.path.join(labels_path,"test"),exist_ok=True)
os.makedirs(os.path.join(visiblemask_path,"train"),exist_ok=True)
os.makedirs(os.path.join(visiblemask_path,"val"),exist_ok=True)
os.makedirs(os.path.join(visiblemask_path,"test"),exist_ok=True)
os.makedirs(os.path.join(occludedmask_path,"train"),exist_ok=True)
os.makedirs(os.path.join(occludedmask_path,"val"),exist_ok=True)
os.makedirs(os.path.join(occludedmask_path,"test"),exist_ok=True)

**Train/Val/Test Split for image files**

In [86]:
images=os.listdir(os.path.join("/kaggle/working/image_label/images"))
train,test=train_test_split(images,test_size=0.05,random_state=42)
train,valid=train_test_split(train,test_size=0.1,random_state=42)

**Function to copy split data into respective folders**

In [108]:
def copy_files(folder,sampletype,source_folder,destination_folder):
    contenttype=os.listdir(source_folder)
    for sample in folder:
        for content in contenttype:
            if content == 'images':
                source_file=sample
            else:
                source_file=sample.replace(".jpg",".txt")
            source_path=os.path.join(source_folder,content,source_file)
            destination_path=os.path.join(destination_folder,content,sampletype,source_file)
            shutil.copy(source_path,destination_path)

**Copy files into train, test, val folders**

In [109]:
source_folder=os.path.join("/kaggle/working/image_label")
destination_folder=os.path.join("/kaggle/working/dataset")

copy_files(train,"train",source_folder,destination_folder)
copy_files(test,"test",source_folder,destination_folder)
copy_files(valid,"val",source_folder,destination_folder)

**Create dataset.yaml for YOLO training**

In [110]:
dataset_path=os.path.join("/kaggle/working/dataset")

dataset_yaml_content=f"""
path: {os.path.abspath(dataset_path)}
train: images/train
val: images/val
test: images/test

nc: 1
names: ['Class_0']
"""
with open(os.path.join(dataset_path,"dataset.yaml"),"w") as f:
    f.write(dataset_yaml_content)

**ZIP final YOLO dataset folder**

In [111]:
shutil.make_archive("dataset","zip","/kaggle/working/dataset")

'/kaggle/working/dataset.zip'