In [None]:
import sys
print(sys.executable)

import os
print(os.getcwd())

In [None]:
import matplotlib.pyplot as plt
from torchvision.io import read_image


image = read_image("../data/frames/pressbox-001.jpg")
mask = read_image("../data/frames/pressbox-001-mask.jpg")

plt.figure(figsize=(16, 8))
plt.subplot(121)
plt.title("Image")
plt.imshow(image.permute(1, 2, 0))
plt.subplot(122)
plt.title("Mask")
plt.imshow(mask.permute(1, 2, 0))


In [None]:
import os
from sklearn.utils import shuffle
import torch
from torchvision import transforms
from PIL import Image
from sklearn.cluster import KMeans

def quantize_image(image, n_colors):
    channels, height, width,  = mask.shape

    # Reshape image to be a list of pixels
    pixels = mask.reshape(channels, -1).T
    unique_colors = torch.unique(pixels, dim=1).T
    sampled_colors = shuffle(pixels, random_state=0, n_samples=100)
    combined_colors = torch.vstack([unique_colors, sampled_colors])

    # Apply KMeans
    kmeans = KMeans(n_clusters=n_colors)
    labels = kmeans.fit_predict(combined_colors)
    print("Labels:", labels)
    # image = kmeans.cluster_centers_[labels].reshape(-1, 3)

    # # Convert back to PIL Image
    # image = transforms.ToPILImage()(image.clamp(0, 1).view(-1, height, width))

    return image


mask = read_image("../data/frames/pressbox-001-mask.jpg")
mask = mask / 255.0
print("Mask shape:", mask.shape)
quantized_mask = quantize_image(mask, 24)
print("Quantized mask shape:", quantized_mask.shape)

import matplotlib.pyplot as plt
from torchvision.io import read_image


plt.figure(figsize=(16, 8))
plt.subplot(121)
plt.title("Mask")
plt.imshow(image.permute(1, 2, 0))
plt.subplot(122)
plt.title("Quantized")
plt.imshow(quantized_mask.permute(1, 2, 0))

# for filename in os.listdir(path):
#     if filename.endswith('.jpeg') or filename.endswith('.jpg'):
#         # Open image and quantize it
#         image = Image.open(os.path.join(path, filename))
#         image = quantize_image(image, 24)
        
#         # Save quantized image as PNG
#         new_filename = os.path.splitext(filename)[0] + '.png'
#         image.save(os.path.join(path, new_filename))

In [None]:
import os
import torch

from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F


class FootballDataset(torch.utils.data.Dataset):
    def __init__(self, frame_directory, transforms):
        self.frame_directory = frame_directory
        self.transforms = transforms
        images = list(sorted(os.listdir(frame_directory)))
        self.imgs = [img for img in images if not img.endswith("-mask.jpg")]
        self.masks = [img for img in images if img.endswith("-mask.jpg")]

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.frame_directory, self.imgs[idx])
        mask_path = os.path.join(self.frame_directory, self.masks[idx])
        img = read_image(img_path)
        mask = read_image(mask_path)

        # instances are encoded as different colors
        obj_ids = torch.unique(mask)
        
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)

        # split the color-encoded mask into a set
        # of binary masks
        masks = (mask == obj_ids[:, None, None]).to(dtype=torch.uint8)

        # get bounding box coordinates for each mask
        boxes = masks_to_boxes(masks)

        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = idx
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Wrap sample and targets into torchvision tv_tensors:
        img = tv_tensors.Image(img)

        target = {}
        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
        target["masks"] = tv_tensors.Mask(masks)
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)