In [86]:
from ultralytics import YOLO
import pandas as pd
import csv
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
import os

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model = YOLO("yolov8n.pt").to(device)

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 30.1MB/s]


In [None]:
cat_dog_img = Image.open("cat_dog.jpg")
cat_dog_img_array = np.array(cat_dog_img)
Image.fromarray(cat_dog_img_array)

In [None]:
result = model.predict(cat_dog_img_array)
img = Image.fromarray(result[0].plot())
img.save("cat_dog_result.jpg")
img

In [None]:
# download dataset metadata
!bash setup.sh

In [None]:
def create_data_labels(
    source_file: str, set_name: str = "training", count: int = 10000
):
    food_code = "/m/02wbm"  # found in oivd6-class-descriptions.csv
    if not os.path.exists("datasets/"):
        os.mkdir("datasets/")
    if not os.path.exists(f"datasets/{set_name}_images/"):
        os.mkdir(f"datasets/{set_name}_images/")

    # use csvreader instead pd.read_csv to avoid memory abuse and crash :(
    # also speeds up reading
    records = []
    with open(source_file, "r") as file:
        reader = csv.reader(file)
        for record in reader:
            if record[2] == food_code:
                records.append(
                    (
                        record[0],
                        float(record[4]),
                        float(record[5]),
                        float(record[6]),
                        float(record[7]),
                    )
                )

    annotations = pd.DataFrame(
        records, columns=["ImageID", "XMin", "XMax", "YMin", "YMax"]
    )

    image_id_df = annotations["ImageID"].unique()[:count]
    image_box_data = annotations[annotations["ImageID"].isin(image_id_df)]

    for image_id in image_id_df:
        fname = f"datasets/{set_name}_images/" + image_id + ".txt"
        with open(fname, "w") as file:
            for row in image_box_data[image_box_data["ImageID"] == image_id].iterrows():
                row = row[1]
                xmin = row["XMin"]
                xmax = row["XMax"]
                ymin = row["YMin"]
                ymax = row["YMax"]
                x_center = (xmin + xmax) / 2
                y_center = (ymin + ymax) / 2
                width = xmax - xmin
                height = ymax - ymin
                file.write(f"0 {x_center} {y_center} {width} {height}\n")

    if not os.path.exists("metadata/"):
        os.mkdir("metadata/")
    download_fname = f"metadata/{set_name}_download.txt"
    with open(download_fname, "w") as file:
        for image_id in image_id_df:
            file.write(f"{set_name}/" + image_id + "\n")


create_data_labels(
    "metadata/oidv6-train-annotations-bbox.csv", set_name="train", count=5000
)
create_data_labels(
    "metadata/validation-annotations-bbox.csv", set_name="validation", count=500
)
create_data_labels("metadata/test-annotations-bbox.csv", set_name="test", count=5000)

In [None]:
!python downloader.py metadata/train_download.txt --download_folder=datasets/train_images --num_processes=10

In [None]:
!python downloader.py metadata/validation_download.txt --download_folder=datasets/validation_images --num_processes=10

In [None]:
!python downloader.py metadata/test_download.txt --download_folder=datasets/test_images --num_processes=10

In [None]:
result = model.train(
    data="food.yaml",
    epochs=5,
    batch=6,
    pretrained=True,
    iou=0.5,
    visualize=False,
    patience=0,
)

In [None]:
# model = YOLO("runs/detect/train/weights/best.pt").to(device)

files = os.listdir("datasets/test_images/")
img_files = [file for file in files if file.endswith(".jpg")]
img_files.sort()

size = 5
offset = 20

results = []
prediction_plots = []
for i, img in enumerate(img_files[offset : offset + size**2]):
    # print(i)
    image = Image.open("datasets/test_images/" + img)
    img_array = np.array(image)
    result = model.predict(img_array, visualize=False, verbose=False)
    prediction_plots.append(result[0].plot())
    results.append(result)

In [None]:
fig, ax = plt.subplots(size, size, figsize=(5 * size, 5 * size))
for i in range(size):
    for j in range(size):
        ax[i][j].imshow(prediction_plots[i * size + j])

fig.savefig("predictions.png")

In [None]:
from PIL import ImageFilter


def blur_result(result):
    img = Image.fromarray(result[0].orig_img)
    xyxy = result[0].boxes.xyxy.to(int).tolist()
    for x1, y1, x2, y2 in xyxy:
        # blur only the part within the bounding box
        crop_img = img.crop((x1, y1, x2, y2))
        crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=20))
        img.paste(crop_img, (x1, y1, x2, y2))
    return img

In [None]:
imgs = []
for result in results:
    imgs.append(blur_result(result))

fig, ax = plt.subplots(size, size, figsize=(5 * size, 5 * size))
for i in range(size):
    for j in range(size):
        ax[i][j].imshow(imgs[i * size + j])

fig.savefig("blurred.png")