In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os 
import json


### Load data

In [19]:
path_to_annotations = '../football-dataset/annotations/instances_default.json'
image_folder = '../football-dataset/images/'


num_images = len(os.listdir(image_folder))
print(f'Number of images: {num_images}')

with open(path_to_annotations) as f:
    data = json.load(f)

first_image = data['images'][0]
first_annotation = data['annotations'][0]
print(f"Example image: {first_image}")
print(f"Example annotation: {first_annotation}")

Number of images: 512
Example image: {'id': 1, 'width': 1920, 'height': 1080, 'file_name': '0.jpg', 'license': 0, 'flickr_url': '', 'coco_url': '', 'date_captured': 0}
Example annotation: {'id': 1, 'image_id': 1, 'category_id': 1, 'segmentation': [[1630.5, 686.9, 1632.0, 683.6, 1633.4, 679.7, 1634.2, 676.1, 1634.2, 673.0, 1634.8, 669.8, 1634.8, 667.5, 1636.7, 665.0, 1637.6, 664.8, 1638.0, 665.9, 1639.1, 669.3, 1641.2, 673.4, 1643.5, 678.4, 1645.4, 682.1, 1648.1, 686.4, 1650.2, 688.8, 1650.9, 691.6, 1650.0, 693.9, 1647.4, 695.9, 1644.6, 697.6, 1643.9, 699.1, 1646.1, 700.1, 1650.6, 700.0, 1655.2, 699.9, 1658.5, 699.5, 1661.6, 698.5, 1662.1, 697.2, 1661.5, 694.9, 1660.7, 693.0, 1659.9, 691.7, 1659.1, 689.6, 1658.3, 687.1, 1657.1, 685.2, 1656.1, 683.1, 1654.8, 680.1, 1653.8, 676.5, 1652.6, 672.3, 1650.5, 669.2, 1649.1, 667.3, 1648.2, 664.9, 1647.5, 662.1, 1647.1, 658.5, 1647.0, 654.4, 1648.8, 649.0, 1651.0, 645.5, 1652.3, 640.6, 1652.5, 636.2, 1652.0, 631.8, 1653.1, 626.7, 1653.2, 621.6, 1

### Create annotations


In [20]:
# Create labels from the annotations in yolo format and put them in labels folder

width, height = 1920, 1080

for image in data['images']:
    image_name = image['file_name']
    image_id = image['id']
    image_annotations = [annotation for annotation in data['annotations'] if annotation['image_id'] == image_id]
    with open(f'../football-dataset/labels/{image_name.split(".")[0]}.txt', 'w') as f:
        for annotation in image_annotations:
            category_id = annotation['category_id']
            bbox = annotation['bbox']
            segmentation = annotation['segmentation']
            # Convert the segmentation polygon to yolo format
            segmentation = np.array(segmentation).reshape(-1, 2)   
            segmentation[:, 0] = segmentation[:, 0] / width
            segmentation[:, 1] = segmentation[:, 1] / height
            segmentation = segmentation.flatten()

            # Unbox segmentation into string
            segmentation = [str(x) for x in segmentation]
            segmentation = ' '.join(segmentation)
            category_id = int(category_id) - 1
            f.write(f"{str(category_id)} {segmentation}\n")


### Put annotations in correct folder structure

In [21]:
# Split the images and labels folder into train, valid and test folders
images = os.listdir('../football-dataset/images/')
labels = os.listdir('../football-dataset/labels/')
images.sort()
labels.sort()

train_images = images[:int(0.8 * len(images))]
train_labels = labels[:int(0.8 * len(labels))]
valid_images = images[int(0.8 * len(images)):int(0.9 * len(images))]
valid_labels = labels[int(0.8 * len(labels)):int(0.9 * len(labels))]
test_images = images[int(0.9 * len(images)):]
test_labels = labels[int(0.9 * len(labels)):]
print(f"Number of train images: {len(train_images)}")
print(f"Number of valid images: {len(valid_images)}")
print(f"Number of test images: {len(test_images)}")

# Create the train, valid and test folders
os.makedirs('../football-dataset-yolo/train/images', exist_ok=True)
os.makedirs('../football-dataset-yolo/train/labels', exist_ok=True)
os.makedirs('../football-dataset-yolo/valid/images', exist_ok=True)
os.makedirs('../football-dataset-yolo/valid/labels', exist_ok=True)
os.makedirs('../football-dataset-yolo/test/images', exist_ok=True)
os.makedirs('../football-dataset-yolo/test/labels', exist_ok=True)

print(train_images)

# Move the images and labels to the respective folders
for image, label in zip(train_images, train_labels):
    os.rename(f'../football-dataset/images/{image}', f'../football-dataset-yolo/train/images/{image}')
    os.rename(f'../football-dataset/labels/{label}', f'../football-dataset-yolo/train/labels/{label}')

for image, label in zip(valid_images, valid_labels):
    os.rename(f'../football-dataset/images/{image}', f'../football-dataset-yolo/valid/images/{image}')
    os.rename(f'../football-dataset/labels/{label}', f'../football-dataset-yolo/valid/labels/{label}')

for image, label in zip(test_images, test_labels):
    os.rename(f'../football-dataset/images/{image}', f'../football-dataset-yolo/test/images/{image}')
    os.rename(f'../football-dataset/labels/{label}', f'../football-dataset-yolo/test/labels/{label}')
                      

Number of train images: 409
Number of valid images: 51
Number of test images: 52
['0.jpg', '1.jpg', '10.jpg', '100.jpg', '101.jpg', '102.jpg', '103.jpg', '104.jpg', '106.jpg', '107.jpg', '108.jpg', '109.jpg', '11.jpg', '110.jpg', '111.jpg', '112.jpg', '113.jpg', '114.jpg', '115.jpg', '116.jpg', '117.jpg', '118.jpg', '119.jpg', '12.jpg', '120.jpg', '121.jpg', '122.jpg', '123.jpg', '124.jpg', '125.jpg', '126.jpg', '127.jpg', '128.jpg', '129.jpg', '13.jpg', '130.jpg', '131.jpg', '132.jpg', '133.jpg', '134.jpg', '135.jpg', '136.jpg', '137.jpg', '138.jpg', '139.jpg', '14.jpg', '140.jpg', '141.jpg', '142.jpg', '143.jpg', '144.jpg', '146.jpg', '147.jpg', '148.jpg', '149.jpg', '15.jpg', '150.jpg', '151.jpg', '153.jpg', '154.jpg', '155.jpg', '156.jpg', '157.jpg', '158.jpg', '16.jpg', '160.jpg', '161.jpg', '162.jpg', '163.jpg', '164.jpg', '165.jpg', '166.jpg', '167.jpg', '168.jpg', '169.jpg', '17.jpg', '170.jpg', '171.jpg', '172.jpg', '173.jpg', '174.jpg', '175.jpg', '176.jpg', '177.jpg', '178.j