In [None]:
from google.colab import drive

drive.mount('/content/drive')                                                   

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import shutil
import math
import random
import numpy as np

from glob import glob

In [None]:
def check_unique(lst):
    # use the unique function from numpy to find the unique elements in the list
    unique_elements, counts = np.unique(lst, return_counts=True)
    # return True if all elements in the list are unique (i.e., the counts are all 1)
    return all(counts == 1)

def convert_files(labels_file_path, output_folder):
  written_ids = []
  def write_annotations(currentFrameID, perImageAnnotation):
    if len(perImageAnnotation) != 0:
      newImageGroundTruth = open(f"{output_folder}/{currentFrameID}.txt", "w")
      written_ids.append(currentFrameID)
      newImageGroundTruth.writelines(perImageAnnotation)
      newImageGroundTruth.close()

  # Open ground truth file and set parameter to read (r)
  groundTruthFile = open(labels_file_path, 'r') 
  Lines = groundTruthFile.readlines()

  perImageAnnotation = ''
  currentFrameID = None
  imageWidth = 810
  imageHeight = 1080
  # line variable defined is each list of strings
  for line in Lines:
    bounding_box_parameters = line.strip().split(",")
    frameID, _, x, y, w, h =  bounding_box_parameters[:-3]

    if currentFrameID is None:
      currentFrameID = frameID
    elif frameID != currentFrameID:
      write_annotations(currentFrameID, perImageAnnotation)
      perImageAnnotation = ''
      currentFrameID = frameID
  
    x = int(x)
    y = int(y)
    w = int(w)
    h = int(h)
    # convert from top left corner to center of image for bounding box
    # Normalize between 0 and 1 for YOLOv8 
    x += w/2
    y += h/2
    y = round(y/imageHeight, 2)
    h = round(h/imageHeight, 2)
    x = round(x/imageWidth, 2)
    w = round(w/imageWidth, 2) 
    # Denote each annotation as belonging the same class with the class ID 0,
    # because the dataset only contains pictures of lettuce.
    perImageAnnotation += f"0 {x} {y} {w} {h}\n"
    
  write_annotations(currentFrameID, perImageAnnotation)


def organize_datasets(label_path, image_path, output_dataset_path,
                      starting_frame_id):
  def move_files(label_image_pairs, output_label_path,
                 output_image_path, current_id):
      if not os.path.exists(output_label_path):
        os.makedirs(output_label_path)

      if not os.path.exists(output_image_path):
        os.makedirs(output_image_path)

      for label_file, image_file in label_image_pairs:
        label_file_id = os.path.splitext(os.path.basename(label_file))[0]
        image_file_id = os.path.splitext(os.path.basename(image_file))[0]
        if int(label_file_id) != int(image_file_id):
          raise ValueError("Mismatch in ground truth and image file IDs")

        # Rename and move the ground truth and image files.
        output_label_file_name = os.path.join(output_label_path, f"{current_id}.txt")
        output_image_file_name = os.path.join(output_image_path, f"{current_id}.png")
        shutil.copy(label_file, output_label_file_name)
        shutil.copy(image_file, output_image_file_name)

        current_id += 1

      return current_id


  # Rename the files to unique ID numbers.
  label_files = glob(f"{label_path}/*.txt")
  image_files = glob(f"{image_path}/*.png")
  label_files = sorted(
      label_files, 
      key=lambda f: int(os.path.splitext(os.path.basename(f))[0])
  )
  image_files = sorted(
      image_files, 
      key=lambda f: int(os.path.splitext(os.path.basename(f))[0])
  )
  label_image_pairs = list(
      map(lambda label_f, image_f: (label_f, image_f),
          label_files, image_files)
  )
  dataset_size = len(label_image_pairs)

  # Split the dataset into train and validation subsets.
  TRAIN_PORTION = 0.9
  train_subset_size = math.floor(dataset_size * TRAIN_PORTION)
  val_subset_size = dataset_size - train_subset_size
  val_subset = set(random.sample(label_image_pairs, val_subset_size))
  train_subset = set(label_image_pairs) - val_subset
  
  # Copy the files to the new dataset directory
  output_val_label_path = os.path.join(output_dataset_path, "val", "labels")
  output_val_image_path = os.path.join(output_dataset_path, "val", "images")
  current_id = move_files(val_subset, output_val_label_path,
                          output_val_image_path, starting_frame_id)

  output_train_ground_truth_path = os.path.join(output_dataset_path, "train", "labels")
  output_train_image_path = os.path.join(output_dataset_path, "train", "images")
  current_id = move_files(train_subset, output_train_ground_truth_path,
                          output_train_image_path, current_id)
  
  return current_id


Convert the LettuceMOT dataset to YOLO format.



In [None]:
DATASET_ROOT_DIR = "/content/drive/MyDrive/BRAE_428_Colab/LettuceMOT"
YOLO_DATASET_PATH = "/content/drive/MyDrive/BRAE_428_Colab/YoloDataset_v2"
data_subsets = glob(f"{DATASET_ROOT_DIR}/*")

if os.path.exists(YOLO_DATASET_PATH):
  raise ValueError("Make a new dir to avoid overwriting dataset")
else:
  os.makedirs(YOLO_DATASET_PATH)

data_count = 0
all_ids = []
for subset_path in data_subsets:
  mot_label_file_path = os.path.join(subset_path, "gt", "gt.txt")
  output_label_folder = os.path.join(subset_path, "yolo_gt")
  
  if not os.path.exists(output_label_folder):
    print(f"Converting file {mot_label_file_path} to {output_label_folder}...")
    convert_files(mot_label_file_path, YOLO_DATASET_PATH)

  image_path = os.path.join(subset_path, "img")
  print(f"Moving files from {image_path} to {YOLO_DATASET_PATH}...")
  data_count = organize_datasets(output_label_folder, image_path,
                                 YOLO_DATASET_PATH, data_count)
 
  

Moving files from /content/drive/MyDrive/BRAE_428_Colab/LettuceMOT/straight1/img to /content/drive/MyDrive/BRAE_428_Colab/YoloDataset_v2...
Moving files from /content/drive/MyDrive/BRAE_428_Colab/LettuceMOT/B&F2/img to /content/drive/MyDrive/BRAE_428_Colab/YoloDataset_v2...
Moving files from /content/drive/MyDrive/BRAE_428_Colab/LettuceMOT/B&F1/img to /content/drive/MyDrive/BRAE_428_Colab/YoloDataset_v2...
Moving files from /content/drive/MyDrive/BRAE_428_Colab/LettuceMOT/straight2/img to /content/drive/MyDrive/BRAE_428_Colab/YoloDataset_v2...
Moving files from /content/drive/MyDrive/BRAE_428_Colab/LettuceMOT/straight4/img to /content/drive/MyDrive/BRAE_428_Colab/YoloDataset_v2...
Moving files from /content/drive/MyDrive/BRAE_428_Colab/LettuceMOT/straight3/img to /content/drive/MyDrive/BRAE_428_Colab/YoloDataset_v2...


Verify that the correstness of the genereted YOLO-format dataset.

In [None]:
all_label_fs = []
all_image_fs = []

# Check that the number of labels matches the number of images
# in each subset.
YOLO_DATASET_PATH = "/content/drive/MyDrive/BRAE_428_Colab/YoloDataset"
for data_subset in ["train", "val"]:
  label_path = os.path.join(YOLO_DATASET_PATH, data_subset, "labels")
  image_path = os.path.join(YOLO_DATASET_PATH, data_subset, "images")
  
  label_fs = glob(f"{label_path}/*.txt")
  image_fs = glob(f"{image_path}/*.png")
  if len(label_fs) == len(image_fs):
    print(f"{data_subset} subset: ✅")
  else:
    print(f"{data_subset} subset: ❌")
    print(f"{len(label_fs)} labels, {len(image_fs)}")

  all_label_fs += label_fs
  all_image_fs += image_fs

# Check that no file names are repeated.
all_label_fs = [os.path.basename(f) for f in all_label_fs]
all_image_fs = [os.path.basename(f) for f in all_image_fs]
if check_unique(all_label_fs):
  print("No label file names reapeated: ✅")
else:
  print("Label file names repeated: ❌")

if check_unique(all_image_fs):
  print("No image file names repeated: ✅")
else:
  print("Image file names repeated: ❌")

print(f"Dataset size: {len(all_label_fs)}")

train subset: ✅
val subset: ✅
No label file names reapeated: ✅
No image file names repeated: ✅
Dataset size: 3163
