# Convert Coco JSON Annotations to YOLO TXT Files
Use this notebook to try out importing, analyzing, and exporting datasets of image annotations. 

In [1]:
import logging
logging.getLogger().setLevel(logging.CRITICAL)
!pip install pylabel > /dev/null

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aicrowd-cli 0.1.15 requires click<8,>=7.1.2, but you have click 8.0.4 which is incompatible.
aicrowd-cli 0.1.15 requires pyzmq==22.1.0, but you have pyzmq 25.1.1 which is incompatible.[0m[31m
[0m

## Import coco annotations 
First we will import annotations from the coco dataset, which are in coco json format. 

In [7]:
import os 
import zipfile
from pylabel import importer
#Download sample  dataset 
os.makedirs("data", exist_ok=True)


#Specify path to the coco.json file
path_to_annotations = "datasets/phase2_train_v0/coco_val_annotation.json"
#Specify the path to the images (if they are in a different folder than the annotations)
path_to_images = "datasets/phase2_train_v0/images"

#Import the dataset into the pylable schema 
dataset = importer.ImportCoco(path_to_annotations, path_to_images=path_to_images, name="BCCD_coco")
dataset.df.head(5)


Unnamed: 0_level_0,img_folder,img_filename,img_path,img_id,img_width,img_height,img_depth,ann_segmented,ann_bbox_xmin,ann_bbox_ymin,...,ann_iscrowd,ann_keypoints,ann_pose,ann_truncated,ann_difficult,cat_id,cat_name,cat_supercategory,split,annotated
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,datasets/phase2_train_v0/images,train_00071.jpeg,,71,3024,4032,,,931.0,2208.0,...,0,,,,,1,albopictus,,,1
1,datasets/phase2_train_v0/images,train_00106.jpeg,,106,1094,1675,,,232.0,443.0,...,0,,,,,1,albopictus,,,1
2,datasets/phase2_train_v0/images,train_00228.jpeg,,228,4128,2322,,,1914.0,1048.0,...,0,,,,,1,albopictus,,,1
3,datasets/phase2_train_v0/images,train_00381.jpeg,,381,2448,3264,,,1201.0,1593.0,...,0,,,,,1,albopictus,,,1
4,datasets/phase2_train_v0/images,train_00406.jpeg,,406,718,958,,,60.0,188.0,...,0,,,,,1,albopictus,,,1


## Analyze annotations
Pylabel can calculate basic summary statisticts about the dataset such as the number of files and the classes. 
The dataset is stored as a pandas frame so the developer can do additional exploratory analysis on the dataset. 

In [8]:
print(f"Number of images: {dataset.analyze.num_images}")
print(f"Number of classes: {dataset.analyze.num_classes}")
print(f"Classes:{dataset.analyze.classes}")
print(f"Class counts:\n{dataset.analyze.class_counts}")
print(f"Path to annotations:\n{dataset.path_to_annotations}")


Number of images: 516
Number of classes: 6
Classes:['aegypti', 'albopictus', 'anopheles', 'culex', 'culiseta', 'japonicus/koreicus']
Class counts:
cat_name
albopictus            230
culex                 228
culiseta               31
japonicus/koreicus     21
anopheles               4
aegypti                 2
Name: count, dtype: int64
Path to annotations:
datasets/phase2_train_v0


## Visualize Annotations 
You can render the bounding boxes for your image to inspect them and confirm that they imported correctly.  

In [None]:
from IPython.display import Image, display
display(dataset.visualize.ShowBoundingBoxes(100))
display(dataset.visualize.ShowBoundingBoxes("datasets/train/trimmed_images/0a1dba18-4767-4526-a3b3-70ba7d0560fb.jpeg"))

# Export to Yolo v5
The PyLabel exporter will export all of the annotations in the dataframe to the desired target format.
Yolo creates one text file for each image in the dataset. 

In [None]:
dataset.path_to_annotations = "datasets/yolo"
dataset.export.ExportToYoloV5()

# Moving Folders (Try to seaprate them in main folder in future)

In [10]:
import os
import shutil

train_folder = 'datasets/phase2_train_v0/images'
labels_folder = 'datasets/val/labels'
val_folder = 'datasets/val/images' # Destination folder

# Create the val folder if it doesn't exist
if not os.path.exists(val_folder):
    os.makedirs(val_folder)

# Get a list of txt files in the labels folder
txt_files = [file for file in os.listdir(labels_folder) if file.endswith('.txt')]

# Copy corresponding image files to the val folder
for txt_file in txt_files:
    image_filename = os.path.splitext(txt_file)[0] + '.jpeg'  # Assuming images have the same name with .jpg extension
    src_image_path = os.path.join(train_folder, image_filename)
    dest_image_path = os.path.join(val_folder, image_filename)

    if os.path.exists(src_image_path):
        shutil.copy(src_image_path, dest_image_path)
        

print("Copying completed.")


Copying completed.


# fiftyone data visualisation

In [None]:
import fiftyone as fo
name = "my-dataset"
dataset_dir = "datasets/"

# The splits to load
splits = ["train", "val"]
try:
    dataset = fo.load_dataset(name)
    dataset.delete()
except:
    pass
dataset = fo.Dataset(name)    
for split in splits:
    dataset.add_dir(
        dataset_dir=dataset_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        split=split,
        tags=split,
)

# Get some summary information about the dataset
print(dataset.info)
print(dataset.stats)
#session = fo.Session(dataset=dataset)

In [None]:
from fiftyone import ViewField as F

# Include only samples with the given IDs in the view
small_images_view = dataset.match(F("filepath") == '/home/saidinesh/Desktop/Projects/yolov5/datasets/train/images/train_10160.jpeg' )
session = fo.Session(dataset=small_images_view)