In [None]:
import random

import torch
import numpy as np
import cv2 as cv
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from sklearn.manifold import TSNE

from maskrcnn_benchmark.structures.bounding_box import BoxList
from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou

In [None]:
def init_colors(n_colors, *, randomize=True):
    colors =  list(mcolors.BASE_COLORS.keys())
    if randomize:
        random.shuffle(colors)
    return colors[:n_colors]

tsne = TSNE(n_components=2, n_iter=3000, metric='cosine')
embs_reduced = tsne.fit_transform(embs.detach().numpy())

class_colors = init_colors(n_classes, randomize=False)
color_seq = [class_colors[label] for label in labels.tolist()]

fig, ax = plt.subplots(figsize=(10, 8))
xs = embs_reduced[:, 0]
ys = embs_reduced[:, 1]
ax.scatter(xs, ys, alpha=0.8, c=color_seq)

In [None]:
def features_to_emb(features: torch.Tensor) -> torch.Tensor:
    """Computes embedding vectors from tracker template (exemplar) features.
    For each feature tensor in a batch, it applies global average pooling along
    the channel dimension. Afterwards, it L2-normalizes the vectors to project
    them onto a unit hypersphere.

    Args:
        features (torch.Tensor): template features of shape [B, C, S, S]

    Returns:
        torch.Tensor: embedding vectors of shape [B, C]
    """
    batch_size, n_channels, kernel_size, _ = features.shape
    avg = F.avg_pool2d(features, kernel_size=kernel_size)   # [B, C, 1, 1]
    avg  = avg.reshape((batch_size, n_channels))  # [B, C]
    norm = torch.linalg.norm(avg, dim=1)  # [B,]
    emb = avg / norm[..., None]  # [B, C]
    
    return emb

features = torch.rand((1, 128, 15, 15))
features_to_emb(features).shape

In [None]:
t = torch.rand((10,), dtype=torch.float).cuda()
t.device