## Convert COCO format YOLO format 
### YOLOv5 accepts the bboxes in format (x0,y0,w,h)

In [1]:
# One should do that for both train, validation, and test json files

In [None]:
import json
import os

def coco_to_yolo(coco_file, output_dir):
    with open(coco_file) as f:
        data = json.load(f)

    images = data['images']
    annotations = data['annotations']
    categories = {category['id']: category['name'] for category in data['categories']}

    for image in images:
        image_id = image['id']
        image_name = image['file_name']
        image_width = image['width']
        image_height = image['height']

        image_annotations = [ann for ann in annotations if ann['image_id'] == image_id]

        yolo_annotations = []
        for ann in image_annotations:
            category_id = ann['category_id']
            bbox = ann['bbox']
            x, y, w, h = bbox
            x_center = (x + w / 2) / image_width
            y_center = (y + h / 2) / image_height
            width = w / image_width
            height = h / image_height

            yolo_annotations.append(f"{category_id} {x_center} {y_center} {width} {height}")

        yolo_annotations_str = '\n'.join(yolo_annotations)

        output_path = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}.txt")
        with open(output_path, 'w') as f:
            f.write(yolo_annotations_str)

    print("Conversion complete!")


coco_file = "/home/ec2-user/SageMaker/GROTOAP2-data/test.json"
output_dir = "/home/ec2-user/SageMaker/GROTOAP2-data/yolo_test"
os.makedirs(output_dir, exist_ok=True)
coco_to_yolo(coco_file, output_dir)

## For YOLOv5 the order needs to start from 0 

In [None]:
import os

input_folder = '/home/ec2-user/SageMaker/GROTOAP2_FOR_YOLO/test/labels'  
output_folder = '/home/ec2-user/SageMaker/GROTOAP2_FOR_YOLO/test/labelscorrect'


if not os.path.exists(output_folder):
    os.makedirs(output_folder)

txt_files = [f for f in os.listdir(input_folder) if f.endswith(".txt")]

for txt_file in txt_files:
    input_file = os.path.join(input_folder, txt_file)
    output_file = os.path.join(output_folder, txt_file)

    with open(input_file, "r") as file:
        lines = file.readlines()

    modified_lines = []
    for line in lines:
        parts = line.strip().split()
        if len(parts) > 0:
            class_label = int(parts[0]) - 1
            modified_line = " ".join([str(class_label)] + parts[1:])
            modified_lines.append(modified_line)

    with open(output_file, "w") as file:
        file.write("\n".join(modified_lines))

print("Conversion complete!")

In [2]:
#start experiment

In [None]:
!git clone https://github.com/ultralytics/yolov5.git

In [None]:
cd yolov5/

In [None]:
!pip install -r requirements.txt

In [None]:
!python train.py --epochs 80 --data /home/ec2-user/SageMaker/yolov5/data/groto.yaml --weights yolov5s.pt

In [1]:
'''
groto.yaml
train: /home/ec2-user/SageMaker/GROTOAP2_FOR_YOLO/train
val:  /home/ec2-user/SageMaker/GROTOAP2_FOR_YOLO/validation
test: /home/ec2-user/SageMaker/validation

nc: 22

names: ['BIB_INFO','BODY_CONTENT','REFERENCES','AFFILIATION','PAGE_NUMBER','ABSTRACT','AUTHOR', 'DATES','TITLE','COPYRIGHT','ACKNOWLEDGMENT','UNKNOWN','FIGURE','CORRESPONDENCE','CONFLICT_STATEMENT','TABLE','TYPE','KEYWORDS',
'EDITOR','AUTHOR_TITLE','GLOSSARY','EQUATION']
#names: ['abstract', 'acknowledgments', 'affiliation', 'author', #'bib_info','body_content','conflict_statement','copyright','correspondence','dates','editor','equation','figure','glossary',
#'keywords','page_number','references','table','title','title_author','type','unknown']
'''

"\ngroto.yaml\ntrain: /home/ec2-user/SageMaker/GROTOAP2_FOR_YOLO/train\nval:  /home/ec2-user/SageMaker/GROTOAP2_FOR_YOLO/validation\ntest: /home/ec2-user/SageMaker/validation\n\nnc: 22\n\nnames: ['BIB_INFO','BODY_CONTENT','REFERENCES','AFFILIATION','PAGE_NUMBER','ABSTRACT','AUTHOR', 'DATES','TITLE','COPYRIGHT','ACKNOWLEDGMENT','UNKNOWN','FIGURE','CORRESPONDENCE','CONFLICT_STATEMENT','TABLE','TYPE','KEYWORDS',\n'EDITOR','AUTHOR_TITLE','GLOSSARY','EQUATION']\n#names: ['abstract', 'acknowledgments', 'affiliation', 'author', #'bib_info','body_content','conflict_statement','copyright','correspondence','dates','editor','equation','figure','glossary',\n#'keywords','page_number','references','table','title','title_author','type','unknown']\n"