In [1]:
import torch
from torchinfo import summary
from ultralytics import YOLO
from torchvision.io import read_image
from torchvision.transforms import Resize, Compose
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
from pathlib import Path
from collections import defaultdict
import pandas as pd
import time
from tqdm import tqdm, trange

torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fb24b584610>

In [2]:
if torch.cuda.is_available():
	print('device count:', torch.cuda.device_count())
	device = torch.device(0)
	device_cap = torch.cuda.get_device_capability()
	print(f"GPU {torch.cuda.get_device_name(0)} available with compatibility {device_cap}")
else:
    device = torch.device("cpu")
    print("GPU unavailable")

device count: 1
GPU NVIDIA GeForce GTX 1050 available with compatibility (6, 1)


In [3]:
class Coco2017Dataset(Dataset):
  def __init__(self, img_dir: str, transform=None) -> None:
    assert (p := Path(img_dir)).exists() and p.is_dir(), f"Image directory {img_dir} does not exist"
    self.img_paths = [str(p) for p in Path(img_dir).glob("*.jpg")]
    self.transform = transform

  def __len__(self) -> int:
    return len(self.img_paths)

  def __getitem__(self, idx: int) -> torch.Tensor:
    img_path = self.img_paths[idx]
    img = read_image(img_path)
    if self.transform:
        img = self.transform(img / 255).to(torch.float32)
    return img


class Expand(object):
  def __call__(self, sample: torch.Tensor) -> torch.Tensor:
    if sample.size()[0] != 3:
      deep_copy = sample.detach().clone()
      return deep_copy.expand(3, -1, -1)
    else:
      return sample

In [4]:
coco_ds = Coco2017Dataset(
  img_dir="../../datasets/coco2017_val",
  transform=Compose([
      Resize(size=(640, 640), antialias=True),
      Expand()
  ])
)
# plt.imshow(coco_ds[0].permute(1, 2, 0))

start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

timestamp = time.time_ns()
print(timestamp)
results_filepath = f'../../results_ultimate_0/pytorch-yolo-{timestamp}.csv'
telemetry = defaultdict(list)

1738789097930510070


In [5]:
model = YOLO("yolov8m.pt", verbose=False).to(device)
assert model.device.type == "cuda"

# 5k warmup
# for img in tqdm(coco_ds, desc="Warmup"):
#   img = img.unsqueeze(dim=0).to(device)
#   _ = model.predict(img, device=device)
# with 5k images warmup not necessary on second thought

# latency benchmark
for i, img in enumerate(tqdm(coco_ds, desc="Benchmark"), start=1):
  img = img.unsqueeze(dim=0).to(device)

  start.record()
  res = model.predict(img, device=device, verbose=False)
  end.record()
  torch.cuda.synchronize()

  telemetry["framework"].append("PyTorch")
  telemetry["model_name"].append("YOLOv8m")
  telemetry["phase"].append("latency")
  telemetry["epoch"].append(i)
  telemetry["loss"].append(-1)
  telemetry["performance"].append(start.elapsed_time(end))  # idk who cares how much those times differ but i wanna see
  telemetry["elapsed_time"].append(res[0].speed["inference"])
pd.DataFrame(telemetry).to_csv(results_filepath, index=False)

Benchmark: 100%|██████████| 5000/5000 [07:02<00:00, 11.83it/s]
