In [None]:
import sys
print(sys.executable)

import os
print(os.getcwd())

In [None]:
import matplotlib.pyplot as plt
from torchvision.io import read_image


image = read_image("../data/frames/pressbox-001.jpg")
mask = read_image("../data/frames/pressbox-001-mask.jpg")

plt.figure(figsize=(16, 8))
plt.subplot(121)
plt.title("Image")
plt.imshow(image.permute(1, 2, 0))
plt.subplot(122)
plt.title("Mask")
plt.imshow(mask.permute(1, 2, 0))


In [None]:
import os
import numpy as np
from sklearn.utils import shuffle
import torch
import cv2
from torchvision import transforms
import torch.nn.functional as F
from torchvision.io import read_image
from PIL import Image
from sklearn.cluster import KMeans

def quantize_image(image, n_colors):
    channels, height, width,  = image.shape

    # Reshape image to be a list of pixels
    pixels = image.reshape(channels, -1).permute(1, 0)
    print("Pixels shape:", pixels.shape) # [16384, 3]

    unique_colors = torch.unique(pixels, dim=0) # Include all the unique colors
    sampled_colors = shuffle(pixels, random_state=0, n_samples=100) # Sample 100 colors for frequency.
    combined_colors = torch.vstack([unique_colors, sampled_colors]) # Combine the two tensors.

    # Apply KMeans
    kmeans = KMeans(n_clusters=n_colors).fit(combined_colors)
    labels = kmeans.predict(pixels)
    quantized = torch.from_numpy(kmeans.cluster_centers_[labels])
    print("Quantized Shape:", quantized.shape)
    quantized = quantized.permute(1, 0).reshape(channels, height, width)
    return quantized

def blur_image_manual(image: torch.Tensor, kernel_size=3):
    # Define the blur kernel
    blur_kernel = torch.ones(1, 1, kernel_size, kernel_size) / (kernel_size * kernel_size)
    blur_kernel = blur_kernel.repeat(image.shape[0], 1, 1, 1)  # Repeat for each input channel

    # Add an extra dimension to the image tensor and apply blur
    image = image.unsqueeze(0)  # Add extra dimension for batch size
    blurred_image = F.conv2d(image, blur_kernel, padding=1, groups=3)

    # Remove the extra dimension
    return blurred_image.squeeze(0)

def blur_image(image: torch.Tensor, kernel_size=3):
    return transforms.GaussianBlur(kernel_size=kernel_size)(image)

def morphological_closing(image: torch.Tensor, kernel_size=2):
    # Convert the tensor to a NumPy array
    image = (image * 255).clamp(0, 255).to(torch.uint8)
    img = image.permute(1, 2, 0).numpy()

    # Define a kernel for the morphological operation
    kernel = np.ones((kernel_size,kernel_size), np.uint8)  # you may need to adjust the size

    # Get all unique colors in the image
    unique_colors = np.unique(img.reshape(-1, img.shape[2]), axis=0)

    # Perform morphological closing for each unique color
    cleaned_img = np.zeros_like(img)
    for color in unique_colors:
        # Create a binary mask for the current color
        print("Processing color:", color)
        mask = (img == color).all(axis=2).astype(np.uint8)

        # Perform morphological closing on the mask
        closed_mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
        # closed_mask = cv2.morphologyEx(closed_mask, cv2.MORPH_OPEN, kernel)
        # closed_mask = cv2.dilate(closed_mask, kernel, iterations=1)

        # Only keep the largest connected component by pixel count.
        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(closed_mask, connectivity=8)
        # Order the labels by number of active pixels.
        sorted_labels = np.argsort(stats[:, cv2.CC_STAT_AREA])
        max_label = sorted_labels[-2]

        for label in range(1, num_labels):
            label_area = stats[label, cv2.CC_STAT_AREA]
            if label_area < 50:
                continue
            # If the component isn't large enough, remove it from the cleaned image
            print("Component area of {} comapred to max area of {}".format(stats[label, cv2.CC_STAT_AREA], stats[max_label, cv2.CC_STAT_AREA]))
            if label_area < stats[max_label, cv2.CC_STAT_AREA]:
                closed_mask[labels == label] = 0

        # Add the closed mask to the cleaned image
        cleaned_img[closed_mask == 1] = color

    # Convert the cleaned NumPy array back to a tensor
    cleaned_tensor = torch.from_numpy(cleaned_img).permute(2, 0, 1)

    return cleaned_tensor / 255.0

def quantize_image_by_popularity(image, min_popularity: int = 150, use_blur: bool = False, use_morphology: bool = True):
    channels, height, width = image.shape

    # Reshape image to be a list of pixels
    # image = (image * 255).clamp(0, 255).to(torch.int32)
    # image = blur_image(image, kernel_size=3) if use_blur else image
    # pixels = image.reshape(channels, -1).permute(1, 0)
    # print("Pixels shape:", pixels.shape) # [16384, 3]

    # # Count the number of times each color appears in the image.
    # pixel_ints = pixels

    image = blur_image(image, kernel_size=3) if use_blur else image
    pixels = image.reshape(channels, -1).permute(1, 0)

    # Count the number of times each color appears in the image.
    pixel_ints = (pixels * 255).to(torch.int32)
    # Treat any pixel with R, G, and B combined <100 as black.
    pixel_ints[pixel_ints.sum(dim=1) < 100] = 0
    # Treat any pixel with R, G, and B combined >710 as white.
    pixel_ints[pixel_ints.sum(dim=1) > 710] = 255

    pixel_merged = pixel_ints[:, 0] * 256 * 256 + pixel_ints[:, 1] * 256 + pixel_ints[:, 2]


    print("Pixels merged shape:", pixel_merged.shape)
    print("Pixels merged:", pixel_merged[:10])
    unique_color_merged_counts = torch.bincount(pixel_merged)
    print("Unique color merged counts shape:", unique_color_merged_counts.shape)
    print("Unique color merged counts:", unique_color_merged_counts[:10])
    sorted_indices = unique_color_merged_counts.argsort(descending=True)
    top_colors_merged = sorted_indices[:30]
    # Restrict top colors to those that appear at least min_popularity times.
    top_color_counts = unique_color_merged_counts[top_colors_merged]
    top_colors_merged = top_colors_merged[top_color_counts >= min_popularity]
    top_colors = [(color.item() // (256 * 256), (color.item() // 256) % 256, color.item() % 256) for color in top_colors_merged] + [(255, 255, 255)]
    top_colors_tensor = torch.tensor(top_colors, dtype=torch.float32) / 255  # Convert to tensor and normalize to [0, 1]
    print("Top colors:", top_colors)
    print("Top color counts:", top_color_counts)

    # Remap each pixel in the image to the closest of the top colors.
    distances = torch.norm(pixels.unsqueeze(1) - top_colors_tensor, dim=2)  # Calculate distances to top colors
    closest = distances.argmin(dim=1)  # Find the index of the smallest distance
    remapped_pixels = top_colors_tensor[closest]  # Use this index to get the corresponding top color

    # Reshape the remapped pixels to the original image shape
    remapped_image = remapped_pixels.permute(1, 0).reshape(channels, height, width)
    remapped_image = morphological_closing(remapped_image, kernel_size=5) if use_morphology else image
    return remapped_image

def quantized_by_edges(image):
    # Convert the tensor to a NumPy array
    image = (image * 255).clamp(0, 255).to(torch.uint8)
    img = image.permute(1, 2, 0).numpy()

    # Split the image into R, G, B channels
    r, g, b = cv2.split(img)
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # Perform Canny edge detection on each channel
    bottom_threshold = 25
    top_threshold = 100
    edges_k = cv2.Canny(img_gray, bottom_threshold, top_threshold)
    edges_r = cv2.Canny(r, bottom_threshold, top_threshold)
    edges_g = cv2.Canny(g, bottom_threshold, top_threshold)
    edges_b = cv2.Canny(b, bottom_threshold, top_threshold)

    # cv2.imwrite("edges_k.jpg", edges_k)
    # cv2.imwrite("edges_g.jpg", edges_g)
    # Combine the edges from all channels
    _, edges = cv2.threshold(edges_k | edges_r | edges_g | edges_b, 25, 255, cv2.THRESH_BINARY)
    edges_pre_dialation = edges.copy()
    edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
    edges = cv2.erode(edges, np.ones((3, 3), np.uint8), iterations=1)
    edges_post_dialation = edges.copy()
    # Save the dilated regions for later to bring back some of the image.
    dilated_regions = (edges_post_dialation - edges_pre_dialation) > 10
    # cv2.imwrite("edges.jpg", edges)

    # Calculate the intensity of the original image
    intensities = np.mean(img, axis=2)
    # Create a mask of dark colors in the original image
    bg_mask = intensities < 30

    # Invert the edges mask and reapply the background mask
    edges = cv2.bitwise_not(edges)
    edges[bg_mask == 1] = 0

    # Find connected components from edges
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(edges, connectivity=4)

    # Create an empty image to store the result
    result = np.zeros_like(img)

    # Compute a mask that is white where any bounding box of a connected component intersects with another component.
    # This will be used to remove dilated regions that are too close to each other.
    component_intersections_tmp = np.ones_like(edges, dtype=np.int32)
    FIXED_OFFSET = 500
    for label in range(1, num_labels):
        x, y, w, h = stats[label, cv2.CC_STAT_LEFT], stats[label, cv2.CC_STAT_TOP], \
                     stats[label, cv2.CC_STAT_WIDTH], stats[label, cv2.CC_STAT_HEIGHT]
        # x = max(0, x - 15)
        # y = max(0, y - 15)
        # h = min(component_intersections_tmp.shape[0] - y, h + 30)
        # w = min(component_intersections_tmp.shape[1] - x, w + 30)
        component_intersections_tmp[y:y+h, x:x+w] *= FIXED_OFFSET + label
    component_intersections = component_intersections_tmp > (FIXED_OFFSET + num_labels)
    component_indices = component_intersections_tmp.copy()
    component_indices[component_intersections] = -1
    component_indices -= FIXED_OFFSET
    component_indices[component_indices < 0] = -1
    component_indices[~dilated_regions] = -1


    # Iterate over each label found in the image
    for label in range(1, num_labels):
        should_print = False
        if stats[label, cv2.CC_STAT_LEFT] > 400 and stats[label, cv2.CC_STAT_LEFT] < 800 and stats[label, cv2.CC_STAT_TOP] > 400 and stats[label, cv2.CC_STAT_TOP] < 800:
            should_print = False

        log = lambda *args: print(*args) if should_print else None
        if stats[label, cv2.CC_STAT_AREA] < 30:
            continue
        object_mask = (labels == label).astype(np.uint8)
        aux_mask = (component_indices == label).astype(np.uint8)
        # cv2.imwrite(f"object_mask-{label}.jpg", aux_mask * 255)
        object_mask += aux_mask


        # Get the bounding box of the current component
        x, y, w, h = stats[label, cv2.CC_STAT_LEFT], stats[label, cv2.CC_STAT_TOP], \
                     stats[label, cv2.CC_STAT_WIDTH], stats[label, cv2.CC_STAT_HEIGHT]
        log("Processing label {} with bounding box ({}, {}, {}, {})".format(label, x, y, w, h))
        mask = np.zeros_like(edges, dtype=np.uint8)
        mask[y-1:y+h+1, x-1:x+w+1] = 1
        log("Image shape:", img.shape)
        log("Mask shape:", mask.shape)

        # Stack the colors together along the last axis
        colors = img[object_mask == 1]
        log("Colors shape:", colors.shape)

        # Find unique colors and their counts
        unique_colors, counts = np.unique(colors, return_counts=True, axis=0)
        # Calculate the intensity of each unique color
        intensities = np.mean(unique_colors, axis=1)
        # Create a mask that includes only the colors with intensity above a certain threshold
        colors_mask = intensities > 30
        # Apply the mask to unique_colors and counts
        unique_colors = unique_colors[colors_mask]
        counts = counts[colors_mask]

        # Get the color with the maximum count
        most_common_color = unique_colors[np.argmax(counts)]

        # Convert most_common_color to tuple for floodFill
        color_tuple = (int(most_common_color[0]), int(most_common_color[1]), int(most_common_color[2]))

        # Get the seed point
        seed_point = (int(centroids[label][0]), int(centroids[label][1]))

        # Create a mask for flood filling that is 2 pixels larger in each direction than mask.
        flood_fill_mask = np.zeros((mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
        # now fill the same pixels as were in mask but shifted 1 pixel in each direction
        flood_fill_mask[1:-1, 1:-1] = mask

        log(f"Seed point: {seed_point}, color: {color_tuple}")
        # Fill the component with the most common color in the result image
        result[object_mask == 1] = most_common_color
        # Perform flood fill with the most common color
        # cv2.floodFill(result, flood_fill_mask, seedPoint=seed_point, newVal=color_tuple)

    # Convert the result NumPy array back to a tensor
    result_tensor = torch.from_numpy(result).permute(2, 0, 1) / 255.0

    result_tensor = morphological_closing(result_tensor, kernel_size=5)

    # Convert edges to an RGB image
    edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

    # Convert the RGB numpy array to a PyTorch tensor
    edges_tensor = torch.from_numpy(edges_rgb).permute(2, 0, 1) / 255.0

    # return edges_tensor
    return result_tensor

def image_for_display(image, crop=False):
    # Define the crop coordinates
    start_row = 500
    start_col = 500
    height = 200
    width = 400

    # Crop the image
    cropped_img = image[:, start_row:start_row+height, start_col:start_col+width]

    image = cropped_img if crop else image
    return image.permute(1, 2, 0)

mask = read_image("../data/frames/pressbox-001-mask.jpg")
mask = mask / 255.0
print("Mask shape:", mask.shape)

import matplotlib.pyplot as plt
from torchvision.io import read_image


plt.figure(figsize=(16, 12))
plt.subplot(211)
plt.title("Mask")
plt.imshow(image_for_display(mask))
# plt.subplot(222)
# plt.title("Blurred")
# plt.imshow(image_for_display(blur_image(mask, kernel_size=3)))
# plt.subplot(223)
# plt.title("Quantized by popularity")
# plt.imshow(image_for_display(quantize_image_by_popularity(mask, min_popularity=300, use_morphology=True)))
plt.subplot(212)
plt.title("Quantized by edges")
plt.imshow(image_for_display(quantized_by_edges(mask)))

In [None]:
import torch
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Define the size of the image
height = 64
width = 128

# Create a tensor with values increasing linearly between 0 and 1
image_hsv = torch.zeros((3, height, width))
image_hsv[0] = torch.linspace(0, 1, height * width).view(height, width)

# Set saturation and value to 1
image_hsv[1] = 1
image_hsv[2] = 1

# Convert the image to RGB
image_rgb = mcolors.hsv_to_rgb(image_hsv.permute(1, 2, 0).numpy())

# Convert back to tensor
image_rgb_tensor = torch.from_numpy(image_rgb)
print("RGB Image Shape:", image_rgb_tensor.shape)

plt.figure(figsize=(8, 8))
plt.subplot(121)
plt.title("Image")
plt.imshow(image_rgb_tensor)
plt.subplot(122)
plt.title("Image")
plt.imshow(quantize_image(image_rgb_tensor.permute(2, 0, 1), 8).permute(1, 2, 0))


In [None]:
print("Shape:", image_rgb_tensor.shape)
pixels = image_rgb_tensor.reshape(-1, 3)
print("Shape:", pixels.shape)
print("First 10 pixels:\n", pixels[:10])

In [None]:
target_dir = "../data/frames/"
force = True

import os
from torchvision.io import read_image, write_png
from PIL import Image

for filename in os.listdir(target_dir):
    if filename.endswith('-mask.jpg'):
        # Skip the file if the equivalent png file already exists
        new_filename = os.path.splitext(filename)[0] + '.png'
        if os.path.isfile(os.path.join(target_dir, new_filename)) and not force:
            continue

        print("Processing", filename, "...")
        # Open image and quantize it
        image = read_image(os.path.join(target_dir, filename))
        image = quantized_by_edges(image / 255.0)

        # Save quantized image as PNG
        int_image = (image * 255).clamp(0, 255).to(torch.uint8)

        write_png(int_image, os.path.join(target_dir, new_filename))

In [None]:
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py")

In [None]:
import matplotlib.pyplot as plt
from torchvision.io import read_image
import importlib
import process_mask

import matplotlib.pyplot as plt
import matplotlib.patches as patches

importlib.reload(process_mask)

mask = read_image("../data/frames/madden-001-mask.png")
target = process_mask.process_mask(mask, 1)

fig, ax = plt.subplots(1)
ax.imshow(mask.permute(1, 2, 0))
print(target['labels'])


boxes = target["boxes"].numpy()
masks = target["masks"].numpy()
labels = target["labels"].numpy()

# Iterate over each object
for i in range(len(labels)):
    # Draw the bounding box
    box = boxes[i]
    rect = patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)

    # Draw the mask
    mask = masks[i]
    mask_rgba = np.zeros((mask.shape[0], mask.shape[1], 4))  # Initialize with zeros
    mask_rgba[masks[i] > 0, :3] = 1  # Set RGB channels to 1 where mask is not zero
    mask_rgba[masks[i] > 0, 3] = 1  # Set Alpha channel to 1 where mask is not zero

    size_of_mask = np.where(mask == True)[0].sum()

    print(f"Size of mask for label {labels[i]}:", size_of_mask)
    # cv2.imwrite(f'mask-{i}.png', mask_rgba)
    ax.imshow(mask_rgba, alpha=0.5)

In [None]:
import torch
import matplotlib.pyplot as plt

from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks

from train import get_model_instance_segmentation, get_transform

image = read_image("../data/frames/madden-001-mask.png")

model = get_model_instance_segmentation(3)
model.load_state_dict(torch.load('../model.pth'))
model.eval()  # Set the model to evaluation mode

eval_transform = get_transform(train=False)
device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

with torch.no_grad():
    x = eval_transform(image)
    # convert RGBA -> RGB and move to device
    x = x[:3, ...].to(device)
    predictions = model([x, ])
    pred = predictions[0]


image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
image = image[:3, ...]
pred_labels = [f"pedestrian: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
pred_boxes = pred["boxes"].long()
output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red")

masks = (pred["masks"] > 0.7).squeeze(1)
output_image = draw_segmentation_masks(output_image, masks, alpha=0.5, colors="blue")


plt.figure(figsize=(12, 12))
plt.imshow(output_image.permute(1, 2, 0))
