In [6]:
#Import numpy
import numpy as np

#Import scikitlearn for machine learning functionalities
import sklearn
from sklearn.manifold import TSNE
from sklearn.datasets import load_digits # For the UCI ML handwritten digits dataset

# Import matplotlib for plotting graphs ans seaborn for attractive graphics.
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe
%matplotlib inline

import seaborn as sb

import importlib
import os

import argparse
import datetime
import json
import random
import time
from pathlib import Path

import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
import datasets
import datasets.DAOD as DAOD
import util.misc as utils
import datasets.samplers as samplers
from datasets import build_dataset, get_coco_api_from_dataset
from engine import evaluate, train_one_epoch
import models
from models import build_model

from config import get_cfg_defaults
from tqdm import tqdm

import util.misc as utils

In [7]:
importlib.reload(models) # reload folder

<module 'models' from '/scratch2/users/cku/adaptation/AQT_subset/models/__init__.py'>

##  config set up

In [8]:
# best way is to modify all args to manual parameter

def setup(config_file):
    # initialise cfg from both defualt and the one defined by file
    cfg = get_cfg_defaults()
    
    if config_file:
        cfg.merge_from_file(config_file)
#     if opts:
#         cfg.merge_from_list(args.opts)
    utils.init_distributed_mode(cfg)
    cfg.freeze()
    
    # copy backup scripts for ease of debugging
    if cfg.OUTPUT_DIR:
        Path(cfg.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
        os.system(f'cp {config_file} {cfg.OUTPUT_DIR}')
        ddetr_src = 'models/deformable_detr.py'
        ddetr_des = Path(cfg.OUTPUT_DIR) / 'deformable_detr.py.backup'
        dtrans_src = 'models/deformable_transformer.py'
        dtrans_des = Path(cfg.OUTPUT_DIR) / 'deformable_transformer.py.backup'
        main_src = 'main.py'
        main_des = Path(cfg.OUTPUT_DIR) / 'main.py.backup'
        os.system(f'cp {ddetr_src} {ddetr_des}')
        os.system(f'cp {dtrans_src} {dtrans_des}')
        os.system(f'cp {main_src} {main_des}')

    return cfg

# write a name matching function, if names match named_params 
def match_name_keywords(n, name_keywords):
    out = False
    for b in name_keywords:
        if b in n:
            out = True
            break
    return out

In [9]:
# config_file = 'configs/r50_uda_c2fc.yaml'

config_file = 'configs/debug_mode.yaml'
cfg = setup(config_file)

Not using distributed mode


In [10]:
os.environ["CUDA_VISIBLE_DEVICES"]='4,5,6,7'

## load config file

In [11]:
# fix the seed for reproducibility
seed = cfg.SEED + utils.get_rank()
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [12]:
cfg.DIST.DISTRIBUTED,
cfg.DIST.DIST_BACKEND,
cfg.DIST.GPU,
cfg.DIST.WORLD_SIZE,
cfg.TRAIN.BATCH_SIZE,
cfg.DATASET.NUM_CLASSES
cfg.DEBUG # check debug mode

True

In [13]:
cfg.DIST.DISTRIBUTED, cfg.DIST.GPU, cfg.DIST.WORLD_SIZE

(False, 0, 1)

## set device manually

In [9]:
# torch.cuda.current_device()

In [10]:
# torch.cuda.set_device(7)
# note that torch.cuda.device() is used as a context manager
# device = torch.device('cuda:7')

In [11]:
# current device will always be zero even though the devices to which cuda is exposed 
# has been specified
# torch.cuda.current_device()

In [14]:
# ok
device = torch.device('cuda:7')

In [15]:
cfg.TRAIN.BATCH_SIZE

4

## build model

In [16]:
# build model and send it to cuda device
model, criterion, postprocessors, postprocessors_target = build_model(cfg)

# cannot set multiple devices at once, can only specify multiple devices when calling data parallel or distributed data parallel

# with torch.cuda.device(0):
model.to(device) 
    
model_without_ddp = model
# model=torch.nn.parallel.DistributedDataParallel(model, device_ids=[0])

n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
n_parameters



46836820

## build dataset

In [24]:
# takes a long time, write in a separate cell
# dataset_train = build_dataset(image_set='train', cfg=cfg)
dataset_val = build_dataset(image_set='val', cfg=cfg)

loading annotations into memory...
Done (t=0.74s)
creating index...
index created!


## build data loader

In [25]:
# random sampler for training data
# sampler_train = torch.utils.data.RandomSampler(dataset_train)

# # for uda only: if not //2 we only get half of the dataset
# batch_sampler_train = torch.utils.data.BatchSampler(
#             sampler_train, cfg.TRAIN.BATCH_SIZE//2, drop_last=True)

# random sampler for training
# data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
#                                collate_fn=DAOD.collate_fn, num_workers=cfg.NUM_WORKERS,
#                              pin_memory=True)

# build batch sampler
sampler_val = torch.utils.data.SequentialSampler(dataset_val)


# the valid dataloder uses a sequential sampler
data_loader_val = DataLoader(dataset_val, cfg.TRAIN.BATCH_SIZE, sampler=sampler_val,
                             drop_last=False, collate_fn=utils.collate_fn, num_workers=cfg.NUM_WORKERS,
                             pin_memory=True)

## build data loader subset

In [14]:
# get subset
indices = torch.arange(10)
dataset_train_subset = torch.utils.data.Subset(dataset_train,indices)

# remember to divide batch size by 2
data_loader_train_subset = DataLoader(dataset_train_subset, cfg.TRAIN.BATCH_SIZE//2,
                            collate_fn=DAOD.collate_fn, num_workers=cfg.NUM_WORKERS,
                             pin_memory=True)  

In [22]:
# check number of batches
len(data_loader_train_subset)

NameError: name 'data_loader_train_subset' is not defined

## load pretrained models

In [26]:
model_path = 'exps_bs4_retrain/AQT_pretrain_multi_scale/checkpoint0095.pth'
# load model weights
checkpoint = torch.load(model_path, map_location='cpu')
model_without_ddp.load_state_dict(checkpoint['model'], strict=False)

<All keys matched successfully>

In [27]:
# choose to capture outputs at train or test mode
model.eval()
# model.train()

DeformableDETR(
  (memory): Memory()
  (transformer): DeformableTransformer(
    (encoder): DeformableTransformerEncoder(
      (layers): ModuleList(
        (0): DeformableTransformerEncoderLayer(
          (space_attn): DomainAttention(
            (grl): GradientReversal()
            (cross_attn): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
            )
            (dropout1): Dropout(p=0.1, inplace=False)
            (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
            (linear): Linear(in_features=256, out_features=256, bias=True)
            (dropout2): Dropout(p=0.1, inplace=False)
            (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
          )
          (channel_attn): DomainAttention(
            (grl): GradientReversal()
            (cross_attn): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features

## collect backbone features

In [26]:
from util import box_ops
import torchvision
from datasets.data_prefetcher import data_prefetcher
from models.utils import weighted_aggregate_tmp

In [27]:
# data_iter = iter(data_loader_train)

In [28]:
B = cfg.TRAIN.BATCH_SIZE
scale = 1/32.

In [29]:
prefetcher = data_prefetcher(data_loader_train, device, prefetch=True)
samples, targets = prefetcher.next() # samples have been transformed at this stage

In [None]:
# we need the input projection layers 
# different layer features
for l, feat in enumerate(features):
    src, mask = feat.decompose()
    srcs.append(self.input_proj[l](src))
    masks.append(mask)
    assert mask is not None

if self.num_feature_levels > len(srcs):
    _len_srcs = len(srcs)
    # num_feature_levels = 4 by defualt
    for l in range(_len_srcs, self.num_feature_levels):

        # one feature level
        if l == _len_srcs:
            src = self.input_proj[l](features[-1].tensors)
        else:
            src = self.input_proj[l](srcs[-1])
        m = samples.mask
        mask = F.interpolate(m[None].float(), size=src.shape[-2:]).to(torch.bool)[0]
        pos_l = self.backbone[1](NestedTensor(src, mask)).to(src.dtype)
        srcs.append(src)
        masks.append(mask)
        pos.append(pos_l)

In [24]:
# rois_all = []
# src_labels = []
# src_scores = []
with torch.no_grad():
    for _ in tqdm(data_loader_train, desc = 'preprocess src rois'):

    #     samples = samples.to(device)
    #     targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        rescaled_boxes_enc = []
        list_of_labels_enc = []
        list_of_scores_enc = []

        # collect boxes
        for batch_idx in range(0, B//2, 1):
            source_boxes = targets[batch_idx]['boxes']
            source_labels = targets[batch_idx]['labels'].tolist()
            source_scores = torch.ones(source_boxes.shape[0]).cuda()
            # source_scores, _ = torch.max(outputs_class_conf[batch_idx][keep_tmp], dim=1)

            boxes_rescaled = box_ops.box_cxcywh_to_xyxy(source_boxes) # src only, batch size = 1
            # and from relative [0, 1] to absolute [0, height] coordinates
            # img_sizes = torch.stack([t["size"] for t in targets], dim=0)
            img_sizes = targets[batch_idx]["size"]
            img_h, img_w = img_sizes.unbind(0)
            scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=0) # different scale factor for different images

            # scale all boxes with the corresponding image sizes
            for b in range(boxes_rescaled.shape[0]):
                boxes_rescaled[b] *= scale_fct

            rescaled_boxes_enc.append(boxes_rescaled) # delist
            list_of_labels_enc.append(source_labels)
            list_of_scores_enc.append(source_scores)

        # (B, feat_dim, h, w)
        backbone_feat = model.backbone(samples)[0]

        # single scale
        backbone_feat_single = backbone_feat[0].tensors

        # get src boxes
        src_boxes = rescaled_boxes_enc[:B//2]
        src_labels = list_of_labels_enc[:B//2]
        src_scores = list_of_scores_enc[:B//2]

        rois_list_per_sample = []
        for batch_idx in range(0, B//2, 1):
            # input dim: (N, C, H, W)
            rois = torchvision.ops.roi_align(backbone_feat_single[batch_idx].unsqueeze(0), [src_boxes[batch_idx]], output_size=(7, 7), spatial_scale=scale, aligned=True).mean(3).mean(2)
            rois_list_per_sample.append(rois)

            
#         rois_all.append(rois_list_per_sample)

        list_of_src_prototype = [] # [scale], (num_classes, feat_dim)
    
        # along scale
        for roi_group in rois_all:
            # batch dim reduced after aggregation
            # (e.g scale 1, bs 2 --> all reduced)
            # (e.g scale 4, bs 2 --> 4, bs reduced)
            
            # (1,8,256)
            src_prototypes_enc, _ = weighted_aggregate_tmp(B, src_labels, roi_group, src_scores, None, 8, 2048)
            list_of_src_prototype.append(src_prototypes_enc)


        samples, targets = prefetcher.next()

NameError: name 'data_loader_train' is not defined

In [19]:
rois_all.__len__()

1487

In [22]:
rois_all[2].__len__()

2

In [23]:
torch.save(rois_all, 'preprocessed_src_rois.pt')

In [10]:
rois_all = torch.load('preprocessed_src_rois.pt')

In [12]:
B = cfg.TRAIN.BATCH_SIZE
list_of_src_prototype = [] # [scale], (num_classes, feat_dim)
for roi_group in rois_all:
    # batch dim reduced after aggregation
    # (e.g scale 1, bs 2 --> all reduced)
    # (e.g scale 4, bs 2 --> 4, bs reduced)
    src_prototypes_enc, _ = weighted_aggregate_tmp(B, src_labels, roi_group, src_scores, None, self.num_classes, self.hidden_dim)
    list_of_src_prototype.append(src_prototypes_enc)

NameError: name 'src_labels' is not defined

## gradcam visualization

In [None]:
from torch.nn import functional as F
import numpy as np
from util.plot_utils import inverse_transform
from datasets.data_prefetcher import data_prefetcher

In [None]:
data_iter = iter(data_loader_train_subset)
data = next(data_iter) # list

In [None]:
# samples are always from the first index, then targets
samples = data[0]
targets = data[1]

# samples = data[0].tensors.shape
# targets.__len__()

In [None]:
# for samples, targets in tqdm(data_loader_val, desc='running inference'):
samples = samples.to(device)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

outputs = model(samples, targets, None, None, None)

out, features, memory, hs, ema_prototypes = outputs

In [None]:
ema_prototypes[1].backward()
memory_gradients = model.get_activations_gradient() # in cam viz mode, this will compute the gradients of the desired feature activation
memory_activations = memory

In [None]:
# for samples, targets in tqdm(data_loader_val, desc='running inference'):
# samples = samples.to(device)
# targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
# outputs = model(samples, targets, None, None, None)
# out, features, memory, hs, ema_prototypes = outputs

ema_prototypes[1].backward()
memory_gradients = model.get_activations_gradient() # in cam viz mode, this will compute the gradients of the desired feature activation
memory_activations = memory

# pool gradients
pooled_memory_gradients = torch.mean(memory_gradients, dim=[0,1])
# weight activations with mean gradients
memory_activations= memory_activations*pooled_memory_gradients.unsqueeze(-1)
# mean across channel dim for visualization
memory_activations = torch.mean(memory_activations, dim=-1)
# last layer features shape
B, c, h, w = features[-1].tensors.shape
memory_activations = memory_activations.reshape(B,h,w)

# relu on heatmap
heatmap = np.maximum(memory_activations, 0)

# normalize the heatmap
heatmap /= torch.max(heatmap)

# draw the heatmap
plt.matshow(heatmap.squeeze())

In [None]:
# visualize heatmap on original image
import cv2
img = cv2.imread('./data/Elephant/data/05fig34.jpg')
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + img
cv2.imwrite('./map.jpg', superimposed_img)

## simple attention map visualization

In [None]:
from torch.nn import functional as F
import numpy as np
from util.plot_utils import inverse_transform
from datasets.data_prefetcher import data_prefetcher

In [None]:
def visulize_attention_ratio(img_path, attention_mask, ratio=0.5, cmap="jet"):
    """
    img_path: 读取图片的位置
    attention_mask: 2-D 的numpy矩阵
    ratio:  放大或缩小图片的比例，可选
    cmap:   attention map的style，可选
    """
    print("load image from: ", img_path)
    # load the image
    img = Image.open(img_path, mode='r')
    img_h, img_w = img.size[0], img.size[1]
    plt.subplots(nrows=1, ncols=1, figsize=(0.02 * img_h, 0.02 * img_w))

    # scale the image
    img_h, img_w = int(img.size[0] * ratio), int(img.size[1] * ratio)
    img = img.resize((img_h, img_w))
    plt.imshow(img, alpha=1)
    plt.axis('off')
    
    # normalize the attention mask
    mask = cv2.resize(attention_mask, (img_h, img_w))
    normed_mask = mask / mask.max()
    normed_mask = (normed_mask * 255).astype('uint8')
    plt.imshow(normed_mask, alpha=0.5, interpolation='nearest', cmap=cmap)

In [None]:
def plot_results(feature_map, boxes, labels):
    '''
    feature_map: encoder feature map (feat_dim, h, w)
    
    boxes: gt bounding boxes [bs] (num_proposals, 4)
    
    labels: gt labels [bs][num_proposals]
    
    '''
    CLASSES = ['person','car','train','rider','truck','motorcycle','bicycle', 'bus'] # whole set

    plt.figure(figsize=(feature_map.shape[-2],feature_map.shape[-1]))
    plt.imshow(feature_map[0].max()[0])

    ax = plt.gca()
    
    for box, label in zip(boxes, labels):
        class_idx = label
        
        for (xmin, ymin, xmax, ymax) in box:

            ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                    fill=False, color='g', linewidth=1))

            text = f'{CLASSES[class_idx-1]}: {class_idx:0.2f}'

            ax.text(xmin, ymin, text, fontsize=15,
                    bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.close()

In [None]:
B = cfg.TRAIN.BATCH_SIZE

# get prototypes
# file_name = 'exps_bs4_retrain/test1_bg_and_fixed_thresh_calibrated_balanced_weight/memory_prototypes/ema_prototypes_epoch_0090.pt'
file_name = 'exps_bs4_retrain/test1_bg_and_fixed_thresh_calibrated_balanced_weight/memory_prototypes/ema_prototypes_epoch_0118.pt'


prototypes = torch.load(file_name) # (num_class, feat_dim)
prototypes = prototypes.cuda()
src_prototype = prototypes[0] # (8, 256)
tgt_prototype = prototypes[1] # (8, 256)

In [None]:
### BUG: DAOD dataset makes batch index a list type when applying iter() on the dataloader
# prefetch data
# prefetcher = data_prefetcher(data_loader_train, device, prefetch=True)
# samples, targets = prefetcher.next() # samples have been transformed at this stage

In [None]:
# subset
data_loader_train = iter(data_loader_train_subset)
samples_and_targets = next(data_loader_train)
samples = samples_and_targets[0]
targets = samples_and_targets[1]

In [None]:
samples.tensors.shape, targets.__len__()

In [None]:
### send data to cuda devices
sample = samples.to(device) # tensor, need to be a nested tensor before foward pass
target = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [None]:
### get target gt boxes for visualization
from util import box_ops

target_boxes_list = []
target_labels_list = []
### get proposal boxes
for batch_idx in range(B//2, B, 1):
    target_gt_boxes = target[batch_idx]['boxes']
    target_gt_labels = target[batch_idx]['labels'].tolist()
    target_gt_scores = torch.ones(target_gt_boxes.shape[0]).cuda()
    
    boxes_rescaled = box_ops.box_cxcywh_to_xyxy(target_gt_boxes)
    # and from relative [0, 1] to absolute [0, height] coordinates
    img_sizes = target[batch_idx]["size"]
    img_h, img_w = img_sizes.unbind(0)

    # since box tensor is (x,y,x,y)
    scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=0)
    
    # scale boxes back to original sizes
    for b in range(boxes_rescaled.shape[0]):
        boxes_rescaled[b] *= scale_fct[0] # batch_size = 1, one image

    target_boxes_list.append(boxes_rescaled)
    target_labels_list.append(target_gt_labels)
#     target_scores, _ = torch.max(outputs_class_conf[batch_idx][keep_tmp], dim=1)
#     target_scores_list.append(target_scores)

In [None]:
### forward
outputs = model(sample, target, 1, 1, 1)
out, features, memory, hs, _ = outputs

In [None]:
memory.shape

In [None]:
memory_feat = memory[-1] # use last sample
memory_feat = memory_feat.unsqueeze(0)

In [None]:
memory_feat.shape

In [None]:
# class_prototype_src = src_prototype[1] # (,256) car prototype
# class_prototype_tgt = tgt_prototype[1]

class_prototype_src = src_prototype[0] # (,256) person prototype
class_prototype_tgt = tgt_prototype[0] 

In [None]:
### compute similarity
# class_prototype_src_reshaped = class_prototype_src.expand(2,1,class_prototype_src.shape[-1]) # (bs, feat_dim, token_num)
# memory_reshaped = memory.flatten(2,3).permute(0,2,1)
# attn_mask = torch.matmul(memory_reshaped, class_prototype_src_reshaped.transpose(2,1)) # memory: (bs, feat_dim, token_num)
# attn_mask = attn_mask.squeeze(-1)

In [None]:
### use conv2d to compute similarity

class_filter_src = class_prototype_src.view(1, class_prototype_src.shape[-1], 1, 1) # (num_class, feat, h, w)
class_filter_tgt = class_prototype_tgt.view(1, class_prototype_tgt.shape[-1], 1, 1) # (num_class, feat, h, w)

# memory = memory.squeeze(-1) #  (bs, token_num, feat_dim, 1)

attn_mask_src = F.conv2d(memory_feat, class_filter_src) # (1, h, w)
attn_mask_tgt = F.conv2d(memory_feat, class_filter_tgt) # (1, h, w)

attn_mask_src = attn_mask_src.squeeze(0).squeeze(0) # (h,w)
attn_mask_tgt = attn_mask_tgt.squeeze(0).squeeze(0) # (h,w)

# # w/ src prototype
# # attn_mask_source_image = attn_mask_src[0].squeeze()
# attn_mask_target_image = attn_mask_src[1].squeeze()

# # w/ target prototype
# # attn_mask_source_image_tgt = attn_mask_tgt[0].squeeze()
# attn_mask_target_image_tgt = attn_mask_tgt[1].squeeze()

In [None]:
attn_mask_src.max(), attn_mask_src.min()

In [None]:
attn_mask_tgt.shape, attn_mask_src.shape

In [None]:
import cv2

# plot preprocessed train images
unmasked_samples = sample.tensors[-1][:, :target[1]['size'][0], :target[1]['size'][1]]
inverted_image_tensors = inverse_transform(unmasked_samples)
inverted_image_tensors = inverted_image_tensors.permute(1,2,0)
inverted_image_tensors = inverted_image_tensors.cpu().detach().numpy() # ok

In [None]:
# convert to numpy
attn_mask_src_numpy = attn_mask_src.detach().cpu().numpy()
attn_mask_tgt_numpy = attn_mask_tgt.detach().cpu().numpy()

In [None]:
# try uin8
inverted_image_tensors_uin8 = np.uint8(inverted_image_tensors*255)

In [None]:
# resize heat map
heatmap_src = cv2.resize(attn_mask_src_numpy, (inverted_image_tensors.shape[1], inverted_image_tensors.shape[0]))
heatmap_src = np.uint8(255 * heatmap_src)
heatmap_src = cv2.applyColorMap(heatmap_src, cv2.COLORMAP_JET)

In [None]:
# resize heat map
heatmap_tgt = cv2.resize(attn_mask_tgt_numpy, (inverted_image_tensors.shape[1], inverted_image_tensors.shape[0]))
heatmap_tgt = np.uint8(255 * heatmap_tgt)
heatmap_tgt = cv2.applyColorMap(heatmap_tgt, cv2.COLORMAP_JET)

In [None]:
# need to convert both type into numpy array, otherwise there will be a concat error
superimposed_img_src = heatmap_src * 0.4 + inverted_image_tensors_uin8
cv2.imwrite('./target_img_w_src_proto.jpg', superimposed_img_src)

# plt.figure(figsize=(30, 50))
# plt.imshow(superimposed_img)

In [None]:
# need to convert both type into numpy array, otherwise there will be a concat error
superimposed_img_tgt = heatmap_tgt * 0.4 + inverted_image_tensors_uin8
cv2.imwrite('./target_img_w_tgt_proto.jpg', superimposed_img_tgt)

# plt.figure(figsize=(30, 50))
# plt.imshow(superimposed_img)

In [None]:
# visualize heatmap on original image
# import cv2
# img = cv2.imread('./data/Elephant/data/05fig34.jpg')
# heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
# heatmap = np.uint8(255 * heatmap)
# heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# superimposed_img = heatmap * 0.4 + img
# cv2.imwrite('./map.jpg', superimposed_img)

In [None]:
memory.shape

In [None]:
def visualize_attn(attn_mask):
    # normalize attention mask
    attn_mask_normalized = attn_mask/attn_mask.max()
    attn_mask_visible = attn_mask_normalized.cpu().detach().numpy()
    # attn_mask = (attn_mask * 255).cpu().detach().numpy().astype('uint8')

#     cmap = 'jet'
    plt.figure(figsize=(10,20))
#     plt.imshow(attn_mask_visible, alpha=0.9, interpolation='nearest', cmap=cmap)
    plt.imshow(attn_mask_visible)

In [None]:
visualize_attn(attn_mask[1].reshape(memory.shape[-2], memory.shape[-1]))

In [None]:
# original feature map from pretrained AQT
visualize_attn(memory[1].max(0)[0])

In [None]:
def plot_boxes(feature_map, boxes, labels):
    '''
    feature_map: encoder feature map (feat_dim, h, w)
    
    boxes: gt bounding boxes [bs] (num_proposals, 4)
    
    labels: gt labels [bs][num_proposals]
    
    '''
    CLASSES = ['person','car','train','rider','truck','motorcycle','bicycle', 'bus'] # whole set
    
    # feature_map[0]: source feature map
    # feature_map[1]: target feature map
    
    feature_map_np = feature_map[0].max(0)[0].cpu().detach().numpy() # convert to numpy
    plt.figure(figsize=(feature_map.shape[-2],feature_map.shape[-1]))
    plt.imshow(feature_map_np)

    ax = plt.gca()
    
    for box, label in zip(boxes, labels):
        class_idx = label
        import pdb; pdb.set_trace()
        for box_i in range(box.shape[0]):
            
            xmin = box[box_i][0].cpu().detach().numpy()
            ymin = box[box_i][1].cpu().detach().numpy()
            xmax = box[box_i][2].cpu().detach().numpy()
            ymax = box[box_i][3].cpu().detach().numpy()
            
            ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                    fill=False, color='g', linewidth=1))

            text = f'{CLASSES[class_idx-1]}: {class_idx:0.2f}'

            ax.text(xmin, ymin, text, fontsize=15,
                    bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.close()

In [None]:
plot_boxes(memory, target_boxes_list, target_labels_list)

In [None]:
# original feature map from pretrained AQT
visualize_attn(memory[0].max(0)[0])

## visualize images

In [None]:
# plot preprocessed train images
unmasked_samples = sample.tensors[1][:, :target[1]['size'][0], :target[1]['size'][1]]
inverted_image_tensors = inverse_transform(unmasked_samples)
inverted_image_tensors = inverted_image_tensors.permute(1,2,0)
inverted_image_tensors = inverted_image_tensors.cpu().detach()
plt.figure(figsize=(30, 50))
plt.imshow(inverted_image_tensors)

In [None]:
unmasked_samples = sample.tensors[0][:, :target[1]['size'][0], :target[1]['size'][1]]
inverted_image_tensors = inverse_transform(unmasked_samples)
inverted_image_tensors = inverted_image_tensors.permute(1,2,0)
inverted_image_tensors = inverted_image_tensors.cpu().detach()
plt.figure(figsize=(30, 50))
plt.imshow(inverted_image_tensors)

In [None]:
# source prototype on target image
visualize_attn(attn_mask_target_image)

In [None]:
# target prototype on target image
visualize_attn(attn_mask_target_image_tgt)

In [None]:
sample.shape

In [None]:
unmasked_samples = sample[1][:, :target[1]['size'][0], :target[1]['size'][1]]
inverted_image_tensors = inverse_transform(unmasked_samples)
inverted_image_tensors = inverted_image_tensors.permute(1,2,0)
inverted_image_tensors = inverted_image_tensors.cpu().detach()
plt.figure(figsize=(30, 50))
plt.imshow(inverted_image_tensors)

In [None]:
# view corresponding sample image

unpadded_samples = samples.tensors[0][:, :targets[0]['size'][0], :targets[0]['size'][1]]
average_image = unpadded_samples.mean(0)
inverted_image_tensors = inverse_transform(unpadded_samples)
inverted_image_tensors = inverted_image_tensors.permute(1,2,0)
plt.figure(figsize=(30, 50))
plt.imshow(inverted_image_tensors.detach())
plt.axis('off')
plt.savefig(str('./visualization/image/image_{}.png').format(targets[0]['image_id'].cpu().item()), bbox_inches='tight')
plt.close()

## train TSNE

In [28]:
# get batch index and query index
def _get_src_permutation_idx(indices):
    # permute predictions following indices
    batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
    src_idx = torch.cat([src for (src, _) in indices])
    return batch_idx, src_idx

def _get_tgt_permutation_idx(indices):
    # permute targets following indices
    batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
    tgt_idx = torch.cat([tgt for (_, tgt) in indices])
    return batch_idx, tgt_idx

In [30]:
# model.eval() # set to eval mode to get both source and target labels

total_epoch = 0
cur_iter_num = 0
total_iter_num = 0 
# length of dataloader is 250
target_features_1 = []
target_features_2 = []
target_features_new = []
tgt_label_1 = []
tgt_label_2 = []
tgt_label_new = []

boxes = []

# data and labels for inference
for samples, targets in tqdm(data_loader_val, desc='running inference'):
    samples = samples.to(device)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    outputs = model(samples, targets, cur_iter_num, total_iter_num, total_epoch) # in debug mode, outputs would be a tuple
    
    # out, features, memory, hs = outputs
    out = outputs

    loss_dict, indices = criterion(outputs, targets, mode='test', scale='single')
    
    # get matched query gt indices
    # two sets of indices for train mode
    idx_1 = _get_src_permutation_idx(indices) # get batch and permuted query position
    
    # target_classes: (2,300,9)
    target_classes = torch.full((2,300), 9,
                                dtype=torch.int64, device=out['pred_logits'].device)

    
    # all class labels across all samples within a batch
    target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
    
    target_classes[idx_1] = target_classes_o

    # torch.Size([300])
    # for eval mode, only target data is loaded
    target_label_1 = target_classes[0]
    
    # exclude background
    tgt_query_pos_1 = torch.where(target_label_1!=9)
    
    # get correponding labels for each non-background query
    target_gt_1 = target_label_1[tgt_query_pos_1[0]]
    
    # you may visualise subset of classes here by excluding the classes
#     target_gt_1_idx = torch.where(target_gt_1!=1)
#     target_gt_1_new = target_gt_1[target_gt_1_idx]
#     import pdb; pdb.set_trace()
    
    # get src query embeddings
    target_out_1 = torch.index_select(hs[-1][0], 0, tgt_query_pos_1[0]) #tgt_query_pos_1 is a tuple, thus need to index element
#     target_gt_1 = torch.index_select(hs[-1][0], 0, target_gt_1_idx[0])
#     target_gt_1 = torch.index_select(, 0, target_gt_1[0])
#     import pdb; pdb.set_trace()

    # store 
    target_features_1.extend(target_out_1.cpu().detach().numpy())
    target_features_new.extend(target_gt_1.cpu().detach().numpy())
    
    
    # accumulate for all samples
    tgt_label_1.extend(target_gt_1.cpu().detach().numpy())
#     tgt_label_new.extend(target_gt_1_new.cpu().detach().numpy())
    
    # target boxes
#     boxes.append(rescaled_boxes[1].cpu().detach().numpy())

target_features_1 = np.stack(target_features_1)
# target_features_new = np.stack(target_features_new)
tgt_label_1 = np.stack(tgt_label_1)
# tgt_label_new = np.stack(tgt_label_new)

running inference:   0%|                                                   | 0/125 [00:01<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
from itertools import groupby

# check lengths
sum([len(list(group)) for key, group in groupby(sorted(tgt_label_1))])
len([len(list(group)) for key, group in groupby(sorted(tgt_label_1))])
len(target_features_1), len(tgt_label_1)

In [None]:
# train tsne
# tsne_tgt_new = TSNE(n_components=2).fit_transform(new_target_features_1)
tsne_tgt_new = TSNE(n_components=2).fit_transform(target_features_1)

In [None]:
# scale and move the coordinates so they fit [0; 1] range

def scale_to_01_range(x):

    # compute the distribution range
    value_range = (np.max(x) - np.min(x))

   # move the distribution so that it starts from zero
   # by extracting the minimal value from all its values
    starts_from_zero = x - np.min(x)
    # make the distribution fit [0; 1] by dividing by its range

    return starts_from_zero / value_range

## tsne on tgt query embeddings

In [None]:
tsne_tgt_new.shape

In [None]:
# get values for x and y
tx_tgt = tsne_tgt_new[:, 0]
ty_tgt = tsne_tgt_new[:, 1]

# scale to range between 0 and 1
tx_tgt = scale_to_01_range(tx_tgt)
ty_tgt = scale_to_01_range(ty_tgt)

## restrict number of samples

In [None]:
# from itertools import groupby

# number_samples= np.sort([len(list(group)) for key, group in groupby(sorted(tgt_label_1))])[1]

# # mutable, so write in different loops
# new_tgt_label_1 = []
# for key, group in groupby(sorted(tgt_label_1)):
# #     print(list(group)[:number_samples])
# #     length = len(list(group))
# #     new_target_features_1.extend(target_features_1[:length][:number_samples])
#     new_tgt_label_1.extend(list(group)[:number_samples])

# # new_tgt_label_1 = []
# new_target_features_1 = []
# for key, group in groupby(sorted(tgt_label_1)):
# #     print(list(group)[:number_samples])
#     length = len(list(group))
#     new_target_features_1.extend(target_features_1[:length][:number_samples])
# #     new_tgt_label_1.extend(list(group)[:number_samples])

## contrastive learning

In [None]:
# define an extra label for prototype tokens
colors_per_class = {'0': 'r', '1': 'g', '2': 'b',
          '3':'c', '4': 'm', '5': 'y',
                    '6':'k', '7':'pink', '8':'w'}


colors_per_class = {'1': 'g', '2': 'b', '4': 'm', '7':'pink'}

# fir distinuishing decoder embeddings and prototypes 
marker = ['*']

# need to use a different marker for prototype tokens

# labels = []

# need to know what labels are assigned to each feature in order to plot them
fig = plt.figure()
ax = fig.add_subplot(111)

# plot a cluster one by one
for label in colors_per_class:

    indices = [i for i, l in enumerate(tgt_label_1) if l == int(label)]
    # use the indices of the current label to index the corresponding feature embeddings
    current_tx = np.take(tx_tgt, indices)
    current_ty = np.take(ty_tgt, indices)

    # add a scatter plot with the corresponding color and label
    ax.scatter(current_tx, current_ty, marker=marker[0], c=colors_per_class[label], label=label)
        
    
# build a legend using the labels we set previously
ax.legend(loc='best')
# finally, show the plot
plt.show()

## baseline

In [None]:
# define an extra label for prototype tokens
# colors_per_class = {'0': 'r', '1': 'g', '2': 'b',
#           '3':'c', '4': 'm', '5': 'y',
#                     '6':'k', '7':'pink', '8':'w'}


colors_per_class = {'1': 'g', '2': 'b', '4': 'm', '7':'pink'}

# fir distinuishing decoder embeddings and prototypes 
marker = ['*']

# need to use a different marker for prototype tokens

# labels = []

# need to know what labels are assigned to each feature in order to plot them
fig = plt.figure()
ax = fig.add_subplot(111)

# plot a cluster one by one
for label in colors_per_class:

    indices = [i for i, l in enumerate(tgt_label_1) if l == int(label)]
    # use the indices of the current label to index the corresponding feature embeddings
    current_tx = np.take(tx_tgt, indices)
    current_ty = np.take(ty_tgt, indices)

    # add a scatter plot with the corresponding color and label
    ax.scatter(current_tx, current_ty, marker=marker[0], c=colors_per_class[label], label=label)
        
    
# build a legend using the labels we set previously
ax.legend(loc='best')
# finally, show the plot
plt.show()

In [None]:
tgt_label_1.shape, tgt_label_2.shape

## plot memory embeddings

In [None]:
# scale and move the coordinates so they fit [0; 1] range

def scale_to_01_range(x):

    # compute the distribution range
    value_range = (np.max(x) - np.min(x))

   # move the distribution so that it starts from zero
   # by extracting the minimal value from all its values
    starts_from_zero = x - np.min(x)
    # make the distribution fit [0; 1] by dividing by its range

    return starts_from_zero / value_range

In [None]:
# w/o positional update
embeddings = torch.load('exps/200epochs/r50_uda_multi_scale_multi_layer_memory_size_40/keys0169.pt')
embeddings_np = embeddings.cpu().numpy()
# perplexity: how to balance between local/global aspects of your data
# it is also about the number of close neighbors each point has
tsne = TSNE(n_components=2, perplexity=2).fit_transform(embeddings_np)

tx = tsne[:, 0]
ty = tsne[:, 1]
tx = scale_to_01_range(tx)
ty = scale_to_01_range(ty)

# need to know what labels are assigned to each feature in order to plot them
fig = plt.figure()
ax = fig.add_subplot(111)

# add a scatter plot with the corresponding color and label
ax.scatter(tx, ty)
plt.show()

In [None]:
# w/o positional update
embeddings = torch.load('exps/200epochs/r50_uda_multi_scale_multi_layer_memory_partitioned/keys0169.pt')
embeddings_np = embeddings.cpu().numpy()
# perplexity: how to balance between local/global aspects of your data
# it is also about the number of close neighbors each point has
tsne = TSNE(n_components=2, perplexity=2).fit_transform(embeddings_np)

tx = tsne[:, 0]
ty = tsne[:, 1]
tx = scale_to_01_range(tx)
ty = scale_to_01_range(ty)

# need to know what labels are assigned to each feature in order to plot them
fig = plt.figure()
ax = fig.add_subplot(111)

# add a scatter plot with the corresponding color and label
ax.scatter(tx, ty)
plt.show()

In [None]:
# colors_per_class = {'1': np.array([0.000, 0.447, 0.741]), '2': [0.850, 0.325, 0.098], '3': [0.929, 0.694, 0.125],
#           '4': [0.494, 0.184, 0.556], '5': [0.466, 0.674, 0.188], '6': [0.301, 0.745, 0.933],
#                     '7':[0.453, 0.233, 0.763], '8':[0.333, 0.674, 0.000]}


colors_per_class = ['r', 'g', 'b', 'c', 'm', 'y', 'k', 'pink', 'orange']



# supposedly this list should indicate the class id for each sample, but here we have a set
labels = ['1','2','3','4','5','6','7','8']