In [None]:
import sys
print(sys.executable)

import os
print(os.getcwd())

In [None]:
import matplotlib.pyplot as plt
from torchvision.io import read_image


image = read_image("../data/frames/pressbox-001.jpg")
mask = read_image("../data/frames/pressbox-001-mask.jpg")

plt.figure(figsize=(16, 8))
plt.subplot(121)
plt.title("Image")
plt.imshow(image.permute(1, 2, 0))
plt.subplot(122)
plt.title("Mask")
plt.imshow(mask.permute(1, 2, 0))


In [None]:
import os
import numpy as np
from sklearn.utils import shuffle
import torch
import cv2
from torchvision import transforms
import torch.nn.functional as F
from torchvision.io import read_image
from PIL import Image
from sklearn.cluster import KMeans

def quantize_image(image, n_colors):
    channels, height, width,  = image.shape

    # Reshape image to be a list of pixels
    pixels = image.reshape(channels, -1).permute(1, 0)
    print("Pixels shape:", pixels.shape) # [16384, 3]

    unique_colors = torch.unique(pixels, dim=0) # Include all the unique colors
    sampled_colors = shuffle(pixels, random_state=0, n_samples=100) # Sample 100 colors for frequency.
    combined_colors = torch.vstack([unique_colors, sampled_colors]) # Combine the two tensors.

    # Apply KMeans
    kmeans = KMeans(n_clusters=n_colors).fit(combined_colors)
    labels = kmeans.predict(pixels)
    quantized = torch.from_numpy(kmeans.cluster_centers_[labels])
    print("Quantized Shape:", quantized.shape)
    quantized = quantized.permute(1, 0).reshape(channels, height, width)
    return quantized

def blur_image_manual(image: torch.Tensor, kernel_size=3):
    # Define the blur kernel
    blur_kernel = torch.ones(1, 1, kernel_size, kernel_size) / (kernel_size * kernel_size)
    blur_kernel = blur_kernel.repeat(image.shape[0], 1, 1, 1)  # Repeat for each input channel

    # Add an extra dimension to the image tensor and apply blur
    image = image.unsqueeze(0)  # Add extra dimension for batch size
    blurred_image = F.conv2d(image, blur_kernel, padding=1, groups=3)

    # Remove the extra dimension
    return blurred_image.squeeze(0)

def blur_image(image: torch.Tensor, kernel_size=3):
    return transforms.GaussianBlur(kernel_size=kernel_size)(image)

def morphological_closing(image: torch.Tensor, kernel_size=2):
    # Convert the tensor to a NumPy array
    image = (image * 255).clamp(0, 255).to(torch.uint8)
    img = image.permute(1, 2, 0).numpy()

    # Define a kernel for the morphological operation
    kernel = np.ones((kernel_size,kernel_size), np.uint8)  # you may need to adjust the size

    # Get all unique colors in the image
    unique_colors = np.unique(img.reshape(-1, img.shape[2]), axis=0)

    # Perform morphological closing for each unique color
    cleaned_img = np.zeros_like(img)
    for color in unique_colors:
        # Create a binary mask for the current color
        print("Processing color:", color)
        mask = (img == color).all(axis=2).astype(np.uint8)

        # Perform morphological closing on the mask
        closed_mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
        # closed_mask = cv2.morphologyEx(closed_mask, cv2.MORPH_OPEN, kernel)
        # closed_mask = cv2.dilate(closed_mask, kernel, iterations=1)

        # Only keep the largest connected component by pixel count.
        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(closed_mask, connectivity=8)
        # Order the labels by number of active pixels.
        sorted_labels = np.argsort(stats[:, cv2.CC_STAT_AREA])
        max_label = sorted_labels[-2]

        for label in range(1, num_labels):
            label_area = stats[label, cv2.CC_STAT_AREA]
            if label_area < 50:
                continue
            # If the component isn't large enough, remove it from the cleaned image
            print("Component area of {} comapred to max area of {}".format(stats[label, cv2.CC_STAT_AREA], stats[max_label, cv2.CC_STAT_AREA]))
            if label_area < stats[max_label, cv2.CC_STAT_AREA]:
                closed_mask[labels == label] = 0

        # Add the closed mask to the cleaned image
        cleaned_img[closed_mask == 1] = color

    # Convert the cleaned NumPy array back to a tensor
    cleaned_tensor = torch.from_numpy(cleaned_img).permute(2, 0, 1)

    return cleaned_tensor / 255.0

def quantize_image_by_popularity(image, min_popularity: int = 150, use_blur: bool = False, use_morphology: bool = True):
    channels, height, width = image.shape

    # Reshape image to be a list of pixels
    # image = (image * 255).clamp(0, 255).to(torch.int32)
    # image = blur_image(image, kernel_size=3) if use_blur else image
    # pixels = image.reshape(channels, -1).permute(1, 0)
    # print("Pixels shape:", pixels.shape) # [16384, 3]

    # # Count the number of times each color appears in the image.
    # pixel_ints = pixels

    image = blur_image(image, kernel_size=3) if use_blur else image
    pixels = image.reshape(channels, -1).permute(1, 0)

    # Count the number of times each color appears in the image.
    pixel_ints = (pixels * 255).to(torch.int32)
    # Treat any pixel with R, G, and B combined <100 as black.
    pixel_ints[pixel_ints.sum(dim=1) < 100] = 0
    # Treat any pixel with R, G, and B combined >710 as white.
    pixel_ints[pixel_ints.sum(dim=1) > 710] = 255

    pixel_merged = pixel_ints[:, 0] * 256 * 256 + pixel_ints[:, 1] * 256 + pixel_ints[:, 2]


    print("Pixels merged shape:", pixel_merged.shape)
    print("Pixels merged:", pixel_merged[:10])
    unique_color_merged_counts = torch.bincount(pixel_merged)
    print("Unique color merged counts shape:", unique_color_merged_counts.shape)
    print("Unique color merged counts:", unique_color_merged_counts[:10])
    sorted_indices = unique_color_merged_counts.argsort(descending=True)
    top_colors_merged = sorted_indices[:30]
    # Restrict top colors to those that appear at least min_popularity times.
    top_color_counts = unique_color_merged_counts[top_colors_merged]
    top_colors_merged = top_colors_merged[top_color_counts >= min_popularity]
    top_colors = [(color.item() // (256 * 256), (color.item() // 256) % 256, color.item() % 256) for color in top_colors_merged] + [(255, 255, 255)]
    top_colors_tensor = torch.tensor(top_colors, dtype=torch.float32) / 255  # Convert to tensor and normalize to [0, 1]
    print("Top colors:", top_colors)
    print("Top color counts:", top_color_counts)

    # Remap each pixel in the image to the closest of the top colors.
    distances = torch.norm(pixels.unsqueeze(1) - top_colors_tensor, dim=2)  # Calculate distances to top colors
    closest = distances.argmin(dim=1)  # Find the index of the smallest distance
    remapped_pixels = top_colors_tensor[closest]  # Use this index to get the corresponding top color

    # Reshape the remapped pixels to the original image shape
    remapped_image = remapped_pixels.permute(1, 0).reshape(channels, height, width)

    return remapped_image






mask = read_image("../data/frames/madden-001-mask.jpg")
mask = mask / 255.0
print("Mask shape:", mask.shape)

import matplotlib.pyplot as plt
from torchvision.io import read_image


plt.figure(figsize=(16, 12))
plt.subplot(221)
plt.title("Mask")
plt.imshow(mask.permute(1, 2, 0))
plt.subplot(222)
plt.title("Blurred")
plt.imshow(blur_image(mask, kernel_size=3).permute(1, 2, 0))
plt.subplot(223)
plt.title("Quantized by popularity")
plt.imshow(quantize_image_by_popularity(mask, 500).permute(1, 2, 0))
plt.subplot(224)
plt.title("Quantized by k-means")
plt.imshow(quantize_image(mask, 24).permute(1, 2, 0))

In [None]:
import torch
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Define the size of the image
height = 64
width = 128

# Create a tensor with values increasing linearly between 0 and 1
image_hsv = torch.zeros((3, height, width))
image_hsv[0] = torch.linspace(0, 1, height * width).view(height, width)

# Set saturation and value to 1
image_hsv[1] = 1
image_hsv[2] = 1

# Convert the image to RGB
image_rgb = mcolors.hsv_to_rgb(image_hsv.permute(1, 2, 0).numpy())

# Convert back to tensor
image_rgb_tensor = torch.from_numpy(image_rgb)
print("RGB Image Shape:", image_rgb_tensor.shape)

plt.figure(figsize=(8, 8))
plt.subplot(121)
plt.title("Image")
plt.imshow(image_rgb_tensor)
plt.subplot(122)
plt.title("Image")
plt.imshow(quantize_image(image_rgb_tensor.permute(2, 0, 1), 8).permute(1, 2, 0))


In [None]:
print("Shape:", image_rgb_tensor.shape)
pixels = image_rgb_tensor.reshape(-1, 3)
print("Shape:", pixels.shape)
print("First 10 pixels:\n", pixels[:10])

In [None]:
target_dir = "../data/frames/"

import os
from torchvision.io import read_image, write_png
from PIL import Image

for filename in os.listdir(target_dir):
    if filename.endswith('-mask.jpg'):
        print("Processing", filename, "...")
        # Open image and quantize it
        image = read_image(os.path.join(target_dir, filename))
        image = quantize_image_by_popularity(image / 255.0, 500)

        # Print the first 10 pixels that aren't black
        indices = (image.view(3, -1).sum(0) != 0).nonzero(as_tuple=True)[0]

        # Transform flat indices back to 2D coordinates
        y = torch.div(indices[:10], image.shape[2]).to(torch.int32)
        x = torch.fmod(indices[:10], image.shape[2]).to(torch.int32)

        # Print the first 10 non-black pixels and their coordinates
        for idx in range(10):
            print(f"Pixel: {image[:, y[idx], x[idx]]}, Coordinates: ({y[idx].item()}, {x[idx].item()})")

        # Save quantized image as PNG
        int_image = (image * 255).clamp(0, 255).to(torch.uint8)

        new_filename = os.path.splitext(filename)[0] + '.png'
        write_png(int_image, os.path.join(target_dir, new_filename))

In [None]:
import os
import torch

from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F


class FootballDataset(torch.utils.data.Dataset):
    def __init__(self, frame_directory, transforms):
        self.frame_directory = frame_directory
        self.transforms = transforms
        images = list(sorted(os.listdir(frame_directory)))
        self.imgs = [img for img in images if not img.endswith("-mask.jpg")]
        self.masks = [img for img in images if img.endswith("-mask.jpg")]

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.frame_directory, self.imgs[idx])
        mask_path = os.path.join(self.frame_directory, self.masks[idx])
        img = read_image(img_path)
        mask = read_image(mask_path)

        # instances are encoded as different colors
        obj_ids = torch.unique(mask)

        # first id is the background, so remove it
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)

        # split the color-encoded mask into a set
        # of binary masks
        masks = (mask == obj_ids[:, None, None]).to(dtype=torch.uint8)

        # get bounding box coordinates for each mask
        boxes = masks_to_boxes(masks)

        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = idx
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Wrap sample and targets into torchvision tv_tensors:
        img = tv_tensors.Image(img)

        target = {}
        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
        target["masks"] = tv_tensors.Mask(masks)
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)