In [34]:
# Download the dataset to __dataset__ folder.
# http://fsoco.cs.uni-freiburg.de/datasets/fsoco_segmentation_train.zip


def download_dataset(url: str, destination_folder: str):
    import os
    import requests
    import zipfile

    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # If meta.json exists, then the dataset has been downloaded.
    if os.path.exists(os.path.join(destination_folder, "meta.json")):
        print("Dataset has been downloaded.")
        return

    filename = url.split("/")[-1]
    filepath = os.path.join(destination_folder, filename)

    if not os.path.exists(filepath):
        print("Downloading dataset...")
        r = requests.get(url, stream=True)
        with open(filepath, "wb") as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)

    print("Extracting dataset...")
    with zipfile.ZipFile(filepath) as zf:
        zf.extractall(destination_folder)

    print("Done.")

    # Remove the zip file.
    os.remove(filepath)


download_dataset(
    "http://fsoco.cs.uni-freiburg.de/datasets/fsoco_bounding_boxes_train.zip",
    "__dataset__/boxes/supervisely",
)

Dataset has been downloaded.


In [35]:
from supervisely_yolo_multiple import S2Y
import os
import glob


# Convert the dataset to YOLO format.
def convert_to_yolo(source_folder: str, destination_folder: str):


    # Create data.yaml file.
    if os.path.exists(destination_folder):
        print("YOLO structure already exists.")
        return

    converter = S2Y(source_folder, destination_folder, skip_copy=True)
    class_names_array = converter.get_class_names_from_supervisely()
    converter.create_yolo_file_structure()
    converter.create_class_file(class_names_array)

    dataset_folders = [
        folder for folder in os.listdir(converter.supervisely_path)
    ]

    for folder in dataset_folders:
        labels_path = converter.supervisely_path + '/' + folder + "//ann"
        print("labels_path", labels_path)
        for file_path in glob.glob(os.path.join(labels_path, "*.json")):
            with open(file_path) as file:
                file_name = os.path.basename(file.name)[:-5]
                converter.create_text_file(folder, file_name, class_names_array)
        print("Yolo structure created at => {}".format(converter.yolo_path))


convert_to_yolo("__dataset__/boxes/supervisely", "__dataset__/boxes/yolo")

YOLO structure already exists.


In [36]:
import random
import shutil
import yaml
import os

# Create the data.yaml file and split into train, test and validate.
def set_up_dataset(yolo_folder: str):
    if os.path.exists(yolo_folder + "/data.yaml"):
        print("data.yaml already exists.")
        return

    # Create the train, test and validate folders.
    folders = ["train", "test", "validate"]
    for folder in folders:
        if not os.path.exists(yolo_folder + '/' + folder):
            os.makedirs(yolo_folder + "/" + folder)
            os.makedirs(yolo_folder + "/" + folder + "/images")
            os.makedirs(yolo_folder + "/" + folder + "/labels")

    # Copy the images and labels to the train, test and validate folders.
    if os.path.exists(yolo_folder + "/images"):
        for file in os.listdir(yolo_folder + "/images"):

            # Take a random number between 0 and 2.
            random_number = random.randint(0, 2)
            destination_folder = yolo_folder + '/' + folders[random_number]

            # Copy the image and the label to the destination folder.
            shutil.move(yolo_folder + "/images/" + file, destination_folder + "/images/" + file)
            file_name_without_extension = os.path.splitext(file)[0]
            shutil.move(yolo_folder + "/labels/" + file_name_without_extension + ".txt", destination_folder + "/labels/" + file_name_without_extension + ".txt")

    # Move classes.txt to the base folder.
    if not os.path.exists(yolo_folder + "/classes.txt"):
        shutil.move(yolo_folder + "/labels/classes.txt", yolo_folder + "/classes.txt")

    # Remove the images and labels folder.
    if os.path.exists(yolo_folder + "/images"):
        shutil.rmtree(yolo_folder + "/images")

    if os.path.exists(yolo_folder + "/labels"):
        shutil.rmtree(yolo_folder + "/labels")

    # Read the classes from the classes.txt file.
    with open(yolo_folder + "/classes.txt", "r") as file:
        classes = file.read().splitlines()

    # Create the data.yaml file.
    data = dict(
        names = classes,
        nc = len(classes),
        test = yolo_folder + "/test/images",
        train = yolo_folder + "/train/images",
        val = yolo_folder + "/validate/images"
    )

    with open(yolo_folder + "/data.yaml", 'w') as outfile:
        yaml.dump(data, outfile)


set_up_dataset("__dataset__/boxes/yolo")

data.yaml already exists.


In [12]:
import os
from ultralytics import YOLO

# Training.
# Check if best weights exist.
notebook_path = os.getcwd()
best_path = notebook_path + '/__models__/cones/weights/best.pt'
if os.path.exists(best_path):
    print("Best weights already exist.")
    model = YOLO(best_path)
else:
    model = YOLO("yolov8n.yaml")

results = model.train(data=notebook_path + '/__dataset__/boxes/yolo/data.yaml', epochs=100, imgsz=128, resume=True, project=notebook_path + '/__models__/', name='cones')


Best weights already exist.
Ultralytics YOLOv8.1.5 🚀 Python-3.12.1 torch-2.3.0.dev20240123 CPU (Apple M1)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/Users/riccardopersello/Developer/daml-notebooks/exam/__models__/cones/weights/best.pt, data=/Users/riccardopersello/Developer/daml-notebooks/exam/__dataset__/boxes/yolo/data.yaml, epochs=100, time=None, patience=50, batch=16, imgsz=128, save=True, save_period=-1, cache=False, device=None, workers=0, project=/Users/riccardopersello/Developer/daml-notebooks/exam/__models__/, name=cones, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=None, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buff

[34m[1mtrain: [0mScanning /Users/riccardopersello/Developer/daml-notebooks/exam/__dataset__/boxes/yolo/train/labels.cache... 3865 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3865/3865 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/riccardopersello/Developer/daml-notebooks/exam/__dataset__/boxes/yolo/validate/labels.cache... 3944 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3944/3944 [00:00<?, ?it/s]

Plotting labels to /Users/riccardopersello/Developer/daml-notebooks/exam/__models__/cones/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Resuming training from /Users/riccardopersello/Developer/daml-notebooks/exam/__models__/cones/weights/best.pt from epoch 2 to 100 total epochs
Image sizes 128 train, 128 val
Using 0 dataloader workers
Logging results to [1m/Users/riccardopersello/Developer/daml-notebooks/exam/__models__/cones[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100         0G      2.628      4.938      1.074        103        128:  38%|███▊      | 93/242 [01:36<02:24,  1.03it/s]