# Benchmarking YOLOv8 on EVERYTHING using ClearML

In [1]:
from clearml import Task

# Get the base task to clone
template_task = Task.get_task(task_id='f7f8cbc0eb4e405a902ca3103a5859f6')

In [2]:
queues = [
    # AWS
    "g4dn.xlarge",
    "g4dn.2xlarge",
    "g4dn.4xlarge",
    "g4dn.8xlarge",
    "p3.2xlarge",
    "g5.xlarge",
    "g5.2xlarge",
    # Genesis
    "3060Ti",
    "3080",
    "3090",
    "3080_Optimized",
    "3090_Optimized"
]

In [3]:
ideal_batch_size = {
    # AWS
    "g4dn.xlarge": 16,
    "g4dn.2xlarge": 16,
    "g4dn.4xlarge": 16,
    "g4dn.8xlarge": 16,
    "p3.2xlarge": 32,
    "g5.xlarge": 32,
    "g5.2xlarge": 32,
    # Genesis
    "3060Ti": 4,
    "3080": 8,
    "3090": 16,
    "3080_Optimized": 8,
    "3090_Optimized": 16
}

In [4]:
# Europe
# aws_cost_mapping = {
#     "g4dn.xlarge": 0.5870,
#     "g4dn.2xlarge": 0.8380,
#     "g4dn.4xlarge": 1.3420,
#     "g4dn.8xlarge": 2.4260,
#     "p3.2xlarge": 3.3050,
#     "g5.xlarge": 1.123,
#     "g5.2xlarge": 1.353
# }

# Murica
aws_cost_mapping = {
    "g4dn.xlarge": 0.526,
    "g4dn.2xlarge": 0.752,
    "g4dn.4xlarge": 1.204,
    "g4dn.8xlarge": 2.176,
    "p3.2xlarge": 3.060,
    "g5.xlarge": 1.006,
    "g5.2xlarge": 1.212
}

aws_spots_cost_mapping = {
    "g4dn.xlarge": 0.1765,
    "g4dn.2xlarge": 0.2458,
    "g4dn.4xlarge": 0.4062,
    "g4dn.8xlarge": 0.6528,
    "p3.2xlarge": 1.651,
    "g5.xlarge": 0.3018,
    "g5.2xlarge": 0.3710
}

genesis_cost_mapping = {
    "3060Ti": 0.65,
    "3080": 0.90,
    "3090": 1.30,
    "3080_Optimized": 1,
    "3090_Optimized": 1.40
}

clearml_cost_mapping = {
    "3060Ti": 0.39,
    "3080": 0.54,
    "3090": 0.78,
    "3080_Optimized": 0.6,
    "3090_Optimized": 0.84
}

In [17]:
import numpy as np
import pickle
import time

import plotly.graph_objects as go


# def get_task_runtimes(tasks):
#     runtimes = {
#         task.name.replace("YOLOv8_", ""): (task.get_reported_scalars()['Epoch Time']['Epoch Time']['y'][2:-2], task.get_reported_single_values()["Inference speed (ms/img)"])
#         for task in tasks
#         if 'Epoch Time' in task.get_reported_scalars() and
#         "Inference speed (ms/img)" in task.get_reported_single_values()
#     }
#     return runtimes

def get_task_runtimes(tasks):
    runtimes = {
        task.name.replace("YOLOv8_", ""): task.get_reported_scalars()['Epoch Time']['Epoch Time']['y'][2:-2]
        for task in tasks
        if 'Epoch Time' in task.get_reported_scalars()
    }
    return runtimes

def get_lastest_tasks(queues, project_name="YOLOv8"):
    tasks = []
    for queue_name in queues:
        t = Task.get_task(project_name=project_name, task_name=f"YOLOv8_{queue_name}", task_filter={'status': ['completed'], 'order_by': ['-hyperparams.General.batch']})
        if t:
            tasks.append(t)
    return tasks

def clone_and_enqueue(task, queue, epochs=100, batch_size=None, tags=None, dataset=None):
    new_task = Task.clone(source_task=task)
    new_task.update_parameters({"General/epochs": epochs})
    if batch_size:
        new_task.update_parameters({"General/batch": batch_size})
    if tags:
        new_task.add_tags(tags)
    if dataset:
        new_task.update_parameters({"General/data": dataset})
    new_task.rename(f"YOLOv8_{queue}")
    Task.enqueue(task=new_task, queue_name=queue)
    return new_task

def poll_status(tasks):
    flag = False
    while flag == False:
        flag = True
        for task in tasks:
            # Check if succeeded
            print(task.status)
            if task.status == 'failure':
                print(f"[FAILED] Task {task.name} has failed!!")
            elif task.status != 'completed':
                flag = False
        print("===============")
        time.sleep(5)
    print("All done!")

def calc_cost(runtimes):
    cost = {}
    for instance_name, timings in runtimes.items():
        avg_timing = np.mean(timings)
        epochs_per_hour = 3600 / avg_timing
        price = aws_cost_mapping.get(instance_name, None)
        if price:
            cost[instance_name] = (price, price / epochs_per_hour)
            cost[f"{instance_name}_spot"] = (aws_spots_cost_mapping[instance_name], aws_spots_cost_mapping[instance_name] / epochs_per_hour)
        else:
            cost[f"genesis_{instance_name}"] = (genesis_cost_mapping[instance_name], genesis_cost_mapping[instance_name] / epochs_per_hour)
            cost[instance_name.replace("_O", ".o")] = (clearml_cost_mapping[instance_name], clearml_cost_mapping[instance_name] / epochs_per_hour)

    return cost

def plot_runtimes(runtimes):
    instances = []
    speeds = []
    for instance_name, timings in runtimes.items():
        avg_timing = np.mean(timings)
        epochs_per_hour = 3600 / avg_timing
        instances.append(instance_name.replace("_O", ".o"))
        speeds.append(epochs_per_hour)
    
    fig = go.Figure(
        data=[
            go.Bar(name='Training Speed', x=instances, y=speeds, marker={"color": "#14aa8c"})
        ]
    )
    # Change the bar mode
    fig.update_layout(
        title="Model Training Speed (Epochs/Hour - Higher is Better)",
        barmode='group',
        autosize=False,
        width=1920,
        height=1080,
        font_size=30)
    fig.show()

# def plot_runtimes(runtimes):
#     instances = []
#     training_speeds = []
#     inference_speeds = []
#     for instance_name, timings in runtimes.items():
#         # Training
#         avg_timing = np.mean(timings[0])
#         epochs_per_hour = 3600 / avg_timing
#         instances.append(instance_name)
#         training_speeds.append(epochs_per_hour)

#         # Inference
#         images_per_second = (1 / timings[1]) * 1000
#         inference_speeds.append(images_per_second)
    
#     fig = go.Figure(
#         data=[
#             go.Bar(name='Training Speed (Epochs/Hour)', x=instances, y=training_speeds, marker={"color": "#0b2471"}, yaxis="y", offsetgroup=1),
#             go.Bar(name='Inference Speed (Images/Second)', x=instances, y=inference_speeds, marker={"color": "#14aa8c"}, yaxis="y2", offsetgroup=2)
#         ],
#         layout={
#             'yaxis': {'title': 'Epochs/Hour'},
#             'yaxis2': {'title': 'Images/Second', 'overlaying': 'y', 'side': 'right'}
#         }
#     )
#     # Change the bar mode
#     fig.update_layout(
#         title="Model Training Speed (Higher is Better)",
#         barmode='group',
#         legend=dict(
#             orientation="h",
#             yanchor="bottom",
#             y=1.02,
#             xanchor="right",
#             x=1
#         )
#     )
#     fig.show()
    
def plot_performance_per_dollar(performance_per_dollar):
    x, y = zip(*sorted(performance_per_dollar.items(), key=lambda x: -x[1][1])) # unpack a list of pairs into two tuples
    rt = [r[0] for r in y]
    ef = [e[1] for e in y]

    # fig = go.Figure(data=[
    #     go.Bar(name='Average Epoch Runtime (In seconds, lower is better)', x=x, y=rt),
    #     go.Bar(name='Cost Efficiency (Epochs/Dollar - higher is better)', x=x, y=ef)
    # ])
    # # Change the bar mode
    # fig.update_layout(title="Model Training Cost Efficiency", barmode='group')
    # fig.show()

    fig = go.Figure(
        data=[
            go.Bar(name='Cost Efficiency (Epochs/Dollar - higher is better)', x=x, y=ef)
        ]
    )
    # Change the bar mode
    fig.update_layout(
        title="Model Training Cost Efficiency",
        barmode='group',
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        width=1920,
        height=1080,
        font_size=30
    )
    fig.show()

def plot_cost(cost):
    instance_names = []
    prices = []
    efficiencies = []
    for instance_name, value in cost.items():
        instance_names.append(instance_name)
        prices.append(value[0])
        efficiencies.append(value[1])

    fig = go.Figure(
        data=[
            go.Bar(name='$/Hour', yaxis="y", x=instance_names, y=prices, offsetgroup=1, marker={"color": "#f6f6f6"}),
            go.Bar(name='$/Epoch', yaxis="y2", x=instance_names, y=efficiencies, offsetgroup=2, marker={"color": "#14aa8c"})
        ],
        layout={
            'yaxis': {'title': '$/Hour'},
            'yaxis2': {'title': '$/Epoch', 'overlaying': 'y', 'side': 'right'}
        }
    )
    # Change the bar mode
    fig.update_layout(
        title="Model Training Cost vs Performance (Lower is Better)",
        barmode='group',
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        width=1920,
        height=1080,
        font_size=30
    )
    fig.show()

# Enqueue all the things!

In [73]:
# Fill all the queues that we need with 2 tasks: 2 different batch sizes, same code.
tasks = []
for queue in queues:
    tasks.append(clone_and_enqueue(template_task, queue, epochs=10, batch_size=ideal_batch_size[queue], tags=['VISDRONE'], dataset="VisDrone.yaml"))
    tasks.append(clone_and_enqueue(template_task, queue, epochs=10, batch_size=ideal_batch_size[queue]//2, tags=['VISDRONE'], dataset="VisDrone.yaml"))

In [None]:
poll_status(tasks)

# Crunch them results!

In [9]:
from clearml import Dataset
ds = Dataset.get(dataset_id="258d83dcb36c4ee8ad5c69a9d0041666")

In [6]:
# Get all the tasks from ClearML if you just want to analyse existing tasks
# OTHERWISE SKIP THIS CELL TO USE THE TASKS YOU JUST ENQUEUED
tasks = get_lastest_tasks(queues, project_name="YOLOv8")
[(task.name, task.id) for task in tasks]

[('YOLOv8_g4dn.xlarge', '3ac3a02ba9054175a43aff9629a9bbe5'),
 ('YOLOv8_g4dn.2xlarge', '07561120abb7453cbfc1b450439e7e38'),
 ('YOLOv8_g4dn.4xlarge', 'b627eefb805347199004348ffcfb1eba'),
 ('YOLOv8_g4dn.8xlarge', '5a74a6fe11d345d9a717a16a2bd38d21'),
 ('YOLOv8_p3.2xlarge', 'd01ff6d3b97145c4a3d967f41704ae2c'),
 ('YOLOv8_g5.xlarge', 'd832ae23b5d64a058bf0fed131352ffd'),
 ('YOLOv8_g5.2xlarge', 'a5aed688e3514d72b1fde8c9af5ca9f0'),
 ('YOLOv8_3060Ti', 'fd62020a0c5c48c6a277815e3620338b'),
 ('YOLOv8_3080', 'b4d060065f7f4e4cbf87a80309f6a61c'),
 ('YOLOv8_3090', 'ad20be20753043da82055c2906ead8ab'),
 ('YOLOv8_3080_Optimized', 'a665b2aca522403fa944ec932451e2a0'),
 ('YOLOv8_3090_Optimized', '1cb273016bbc4ec4be02184f314742ac')]

In [18]:
runtimes = get_task_runtimes(tasks)
plot_runtimes(runtimes)

In [19]:
performance_per_dollar = calc_cost(runtimes)
performance_per_dollar

{'g4dn.xlarge': (0.526, 0.01738282791985406),
 'g4dn.xlarge_spot': (0.1765, 0.005832831041547987),
 'g4dn.2xlarge': (0.752, 0.02128476609971788),
 'g4dn.2xlarge_spot': (0.2458, 0.006957174876742892),
 'g4dn.4xlarge': (1.204, 0.03301475227779812),
 'g4dn.4xlarge_spot': (0.4062, 0.01113836576016744),
 'g4dn.8xlarge': (2.176, 0.058840871853298615),
 'g4dn.8xlarge_spot': (0.6528, 0.017652261555989587),
 'p3.2xlarge': (3.06, 0.08810512015024821),
 'p3.2xlarge_spot': (1.651, 0.04753645534903915),
 'g5.xlarge': (1.006, 0.02354834675824201),
 'g5.xlarge_spot': (0.3018, 0.007064504027472602),
 'g5.2xlarge': (1.212, 0.020815802877214216),
 'g5.2xlarge_spot': (0.371, 0.006371834049048246),
 'genesis_3060Ti': (0.65, 0.033214018574467416),
 '3060Ti': (0.39, 0.019928411144680448),
 'genesis_3080': (0.9, 0.031674277623494465),
 '3080': (0.54, 0.01900456657409668),
 'genesis_3090': (1.3, 0.039732342472782844),
 '3090': (0.78, 0.023839405483669706),
 'genesis_3080_Optimized': (1, 0.02664920383029514),


In [20]:
not_show = ["genesis_", "_spot"]
plot_cost({k: v for k, v in performance_per_dollar.items() if not any([n in k for n in not_show]) })

In [21]:
closeup_cost = {}
for key in ["g5.xlarge", "g5.2xlarge", "clearml_3080_Optimized", "clearml_3090_Optimized"]:
    closeup_cost[key] = performance_per_dollar[key]
plot_cost(closeup_cost)


KeyError: 'clearml_3080_Optimized'

# Spots

In [38]:
not_show = ["genesis_"]
plot_cost({k: v for k, v in performance_per_dollar.items() if not any([n in k for n in not_show]) })

# Getting the right batch size

In [79]:
for task in tasks:
    max_gpu_util = max(task.get_reported_scalars()[":monitor:gpu"]["gpu_0_mem_usage"]["y"])
    print(task.name, task.id, max_gpu_util)

YOLOv8_g4dn.xlarge 3ac3a02ba9054175a43aff9629a9bbe5 97.75399780273438
YOLOv8_g4dn.2xlarge 07561120abb7453cbfc1b450439e7e38 97.44100189208984
YOLOv8_g4dn.4xlarge b627eefb805347199004348ffcfb1eba 97.44100189208984
YOLOv8_g4dn.8xlarge 5a74a6fe11d345d9a717a16a2bd38d21 97.44100189208984
YOLOv8_p3.2xlarge d01ff6d3b97145c4a3d967f41704ae2c 90.63099670410156
YOLOv8_g5.xlarge d832ae23b5d64a058bf0fed131352ffd 98.76200103759766
YOLOv8_g5.2xlarge a5aed688e3514d72b1fde8c9af5ca9f0 76.48500061035156
YOLOv8_3060Ti fd62020a0c5c48c6a277815e3620338b 92.44599914550781
YOLOv8_3080 b4d060065f7f4e4cbf87a80309f6a61c 94.93900299072266
YOLOv8_3090 ad20be20753043da82055c2906ead8ab 91.9280014038086
YOLOv8_3080_Optimized a665b2aca522403fa944ec932451e2a0 83.41999816894531
YOLOv8_3090_Optimized 1cb273016bbc4ec4be02184f314742ac 96.072998046875


In [None]:
# coco 128, but not much use
# ideal_batch_size = {
#     # AWS
#     "g4dn.xlarge": 32,
#     "g4dn.2xlarge": 32,
#     "g4dn.4xlarge": 32,
#     "g4dn.8xlarge": 32,
#     # "p2.xlarge": 64,
#     # "g2.2xlarge",
#     # "p3.2xlarge",
#     # Genesis
#     "3060Ti": 16,
#     "3080": 32,
#     "3090": 64,
#     "3080_Optimized": 32,
#     "3090_Optimized": 64
# }

# For VisDrone
ideal_batch_size = {
    # AWS
    "g4dn.xlarge": 16,
    "g4dn.2xlarge": 16,
    "g4dn.4xlarge": 16,
    "g4dn.8xlarge": 16,
    # "p2.xlarge": 64,
    # "g2.2xlarge",
    # "p3.2xlarge",
    # Genesis
    "3060Ti": 4,
    "3080": 8,
    "3090": 16,
    "3080_Optimized": 8,
    "3090_Optimized": 16
}

In [None]:
poll_status(tasks)

# Just Testing

In [28]:
import numpy as np

data = {
    16: [],
    32: [],
    64: []
}

order = {
    16: [],
    32: [],
    64: []
}

for i, machine_type in enumerate(queues):
    tasks = Task.get_tasks(project_name="YOLOv8", tags=["BATCH_SIZE_TEST"], task_name=f"^YOLOv8_{machine_type}$", task_filter={'status': ['completed']})
    for task in tasks:
        batch_size = int(task.get_parameter("General/batch"))
        order[batch_size].append(task.name.replace("YOLOv8_", ""))
        print(task.name, batch_size)
        runtimes = get_task_runtimes([task])
        if runtimes:
            epoch_time = np.mean(runtimes[machine_type])
            data[batch_size].append(epoch_time)
        else:
            data[batch_size].append(0)

YOLOv8_g4dn.xlarge 16
YOLOv8_g4dn.xlarge 32
YOLOv8_g4dn.2xlarge 16
YOLOv8_g4dn.2xlarge 32
YOLOv8_g4dn.4xlarge 16
YOLOv8_g4dn.4xlarge 32
YOLOv8_g4dn.8xlarge 16
YOLOv8_g4dn.8xlarge 32
YOLOv8_3060Ti 16
YOLOv8_3080 16
YOLOv8_3080 32
YOLOv8_3090 16
YOLOv8_3090 32
YOLOv8_3090 64
YOLOv8_3080_Optimized 16
YOLOv8_3080_Optimized 32
YOLOv8_3090_Optimized 16
YOLOv8_3090_Optimized 32
YOLOv8_3090_Optimized 64


In [29]:
fig = go.Figure(data=[
    go.Bar(name='Batch 16', x=order[16], y=data[16]),
    go.Bar(name='batch 32', x=order[32], y=data[32]),
    go.Bar(name='batch 64', x=order[64], y=data[64])
])
# Change the bar mode
fig.update_layout(title="Average epoch runtime", barmode='group')
fig.show()

In [39]:
col = ["Some String Parameter", "one", "two", "three", "one", "three"]

values = list(range(len(col[1:])))
ticks = col[1:]
unique_ticks = list(set(ticks))
d = dict(label=col[0], values=values, tickvals=values, ticktext=ticks)

if len(ticks) != len(unique_ticks):  # Mapping duplicate ticktext
    ticktext = {key: i for i, key in enumerate(unique_ticks)}
    d["values"] = [ticktext[tick] for tick in ticks]
    d["tickvals"] = list(range(len(ticktext)))
    d["ticktext"] = list(ticktext.keys())