## Setup

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
import numpy as np
import random
import time

from PIL import Image
import requests
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/MyDrive/DeepLearning/CS7643 Deep Learning Project/RelTR"
%load_ext autoreload
%autoreload 2
from models.backbone import Backbone, Joiner
from models.position_encoding import PositionEmbeddingSine
from models.transformer import Transformer
from models.reltr import RelTR

from lib.evaluation.sg_eval import BasicSceneGraphEvaluator
import torch
import torch.nn.functional as F
from torchvision.ops import box_iou
from collections import defaultdict
import numpy as np
from models.matcher import build_matcher
from deecap_for_metrics import DeecapRelTR

CLASSES = [ 'N/A', 'airplane', 'animal', 'arm', 'bag', 'banana', 'basket', 'beach', 'bear', 'bed', 'bench', 'bike',
                'bird', 'board', 'boat', 'book', 'boot', 'bottle', 'bowl', 'box', 'boy', 'branch', 'building',
                'bus', 'cabinet', 'cap', 'car', 'cat', 'chair', 'child', 'clock', 'coat', 'counter', 'cow', 'cup',
                'curtain', 'desk', 'dog', 'door', 'drawer', 'ear', 'elephant', 'engine', 'eye', 'face', 'fence',
                'finger', 'flag', 'flower', 'food', 'fork', 'fruit', 'giraffe', 'girl', 'glass', 'glove', 'guy',
                'hair', 'hand', 'handle', 'hat', 'head', 'helmet', 'hill', 'horse', 'house', 'jacket', 'jean',
                'kid', 'kite', 'lady', 'lamp', 'laptop', 'leaf', 'leg', 'letter', 'light', 'logo', 'man', 'men',
                'motorcycle', 'mountain', 'mouth', 'neck', 'nose', 'number', 'orange', 'pant', 'paper', 'paw',
                'people', 'person', 'phone', 'pillow', 'pizza', 'plane', 'plant', 'plate', 'player', 'pole', 'post',
                'pot', 'racket', 'railing', 'rock', 'roof', 'room', 'screen', 'seat', 'sheep', 'shelf', 'shirt',
                'shoe', 'short', 'sidewalk', 'sign', 'sink', 'skateboard', 'ski', 'skier', 'sneaker', 'snow',
                'sock', 'stand', 'street', 'surfboard', 'table', 'tail', 'tie', 'tile', 'tire', 'toilet', 'towel',
                'tower', 'track', 'train', 'tree', 'truck', 'trunk', 'umbrella', 'vase', 'vegetable', 'vehicle',
                'wave', 'wheel', 'window', 'windshield', 'wing', 'wire', 'woman', 'zebra']

REL_CLASSES = ['__background__', 'above', 'across', 'against', 'along', 'and', 'at', 'attached to', 'behind',
                'belonging to', 'between', 'carrying', 'covered in', 'covering', 'eating', 'flying in', 'for',
                'from', 'growing on', 'hanging from', 'has', 'holding', 'in', 'in front of', 'laying on',
                'looking at', 'lying on', 'made of', 'mounted on', 'near', 'of', 'on', 'on back of', 'over',
                'painted on', 'parked on', 'part of', 'playing', 'riding', 'says', 'sitting on', 'standing on',
                'to', 'under', 'using', 'walking in', 'walking on', 'watching', 'wearing', 'wears', 'with']

from types import SimpleNamespace

device = "cuda" if torch.cuda.is_available() else "cpu"

args = {
    'bbox_loss_coef': 5,
    'giou_loss_coef': 2,
    'rel_loss_coef': 1,
    'aux_loss': False,
    'set_cost_class': 1,
    'set_cost_bbox': 5,
    'set_cost_giou': 2,
    'set_iou_threshold':0.7,
    'dataset': 'vg',
    'device': device,
    'eos_coef': 0.1,
    'seed': 42,
    'lr_backbone': 1e-5,
    'lr': 1e-4,
    'lr_drop': 200,
    'weight_decay': 1e-4,
    'ann_path': 'data/vg/',
    'img_folder': 'data/vg/images',
    'eval': True,
    'batch_size': 10,
    'epochs': 10,
    'num_workers': 2
}
args = SimpleNamespace(**args)

from models.backbone import build_backbone
from models.matcher import build_matcher
from models.transformer import build_transformer
from models.reltr import SetCriterion, PostProcess
from datasets import build_dataset
import util.misc as utils
from torch.utils.data import DataLoader, DistributedSampler
import datasets
import util.misc as utils
from datasets import build_dataset, get_coco_api_from_dataset
from datetime import datetime
from engine import evaluate_rel_batch
from collections import Counter

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1qHoaQqsa_84wKp4XFuiBiBUnxjcdMWJm/CS7643 Deep Learning Project/RelTR


## Load Model

In [None]:
def build_model_for_eval(args):
  num_classes = 151 if args.dataset != 'oi' else 289 # some entity categories in OIV6 are deactivated.
  num_rel_classes = 51 if args.dataset != 'oi' else 31

  device = torch.device(args.device)

  matcher = build_matcher(args)

  model = DeecapRelTR(reltr_model)
  model.load_state_dict(torch.load('model weights/avg_mix_50_0.7.pth', map_location=torch.device('cuda')))

  model.to(device)

  weight_dict = {'loss_ce': 1, 'loss_bbox': args.bbox_loss_coef}
  weight_dict['loss_giou'] = args.giou_loss_coef
  weight_dict['loss_rel'] = args.rel_loss_coef

  # TODO this is a hack
  if args.aux_loss:
      aux_weight_dict = {}
      for i in range(args.dec_layers - 1):
          aux_weight_dict.update({k + f'_{i}': v for k, v in weight_dict.items()})
      weight_dict.update(aux_weight_dict)

  losses = ['labels', 'boxes', 'cardinality', "relations"]

  criterion = SetCriterion(num_classes, num_rel_classes, matcher=matcher, weight_dict=weight_dict,
                            eos_coef=args.eos_coef, losses=losses)
  criterion.to(device)
  postprocessors = {'bbox': PostProcess()}
  return model, criterion, postprocessors

position_embedding = PositionEmbeddingSine(128, normalize=True)
backbone = Backbone('resnet50', False, False, False)
backbone = Joiner(backbone, position_embedding)
backbone.num_channels = 2048

transformer = Transformer(d_model=256, dropout=0.1, nhead=8,
                          dim_feedforward=2048,
                          num_encoder_layers=6,
                          num_decoder_layers=6,
                          normalize_before=False,
                          return_intermediate_dec=True)

reltr_model = RelTR(backbone, transformer, num_classes=151, num_rel_classes = 51,
              num_entities=100, num_triplets=200)

# The checkpoint is pretrained on Visual Genome
device = "cuda" if torch.cuda.is_available() else "cpu"
ckpt = torch.load('ckpt/checkpoint0149.pth', map_location=torch.device(device), weights_only=False)
reltr_model.load_state_dict(ckpt['model'])
reltr_model.eval()
for param in reltr_model.parameters():
    param.requires_grad = False

utils.init_distributed_mode(args)

device = torch.device(args.device)

# fix the seed for reproducibility
seed = args.seed + utils.get_rank()
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

model, criterion, postprocessors = build_model_for_eval(args)
model.to(device)

Not using distributed mode
loading annotations into memory...
Done (t=0.75s)
creating index...
index created!


# Load Dataset

In [None]:
dataset_test = build_dataset(image_set='val', args=args)

sampler_test = torch.utils.data.SequentialSampler(dataset_test)

# small_indices = list(range(20))  # <-- 20 = no of samples to test.
# sampler_test = torch.utils.data.SubsetRandomSampler(small_indices)

data_loader_test = DataLoader(dataset_test, args.batch_size, sampler=sampler_test,
                             drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers)

base_ds = get_coco_api_from_dataset(dataset_test)

In [None]:
@torch.no_grad()  # modification of engine.py in https://github.com/yrcong/RelTR
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, args, imitate=True):
    model.eval()
    criterion.eval()

    if hasattr(args, 'exit'):
      print("Evaluation for exit at layer-", args.exit)


    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}'))
    metric_logger.add_meter('sub_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}'))
    metric_logger.add_meter('obj_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}'))
    metric_logger.add_meter('rel_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}'))
    header = 'Test:'

    # initilize evaluator
    if args.dataset == 'vg':
        evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=False)
        if args.eval:
            evaluator_list = []
            for index, name in enumerate(data_loader.dataset.rel_categories):
                if index == 0:
                    continue
                evaluator_list.append((index, name, BasicSceneGraphEvaluator.all_modes()))
        else:
            evaluator_list = None
    else:
        all_results = []

    iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys())
    coco_evaluator = CocoEvaluator(base_ds, iou_types)
    all_exit_layers = []
    avg_entropy = 0
    for samples, targets in metric_logger.log_every(data_loader, 100, header):

        samples = samples.to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        outputs = None
        if hasattr(args, 'exit'):
          outputs, entropy, batch_exit_layers = model(samples, layer_exit = args.exit, imitate=imitate)
        else:
          if hasattr(args, 'confidence_threshold'):
            outputs, entropy, batch_exit_layers = model(samples, confidence_threshold = args.confidence_threshold, imitate=imitate)
            avg_entropy += entropy
          else:
            outputs, entropy, batch_exit_layers = model(samples)
            avg_entropy += entropy
          avg_entropy /= 2
        all_exit_layers.extend(batch_exit_layers)
        loss_dict = criterion(outputs, targets)
        weight_dict = criterion.weight_dict

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        loss_dict_reduced_scaled = {k: v * weight_dict[k]
                                    for k, v in loss_dict_reduced.items() if k in weight_dict}
        loss_dict_reduced_unscaled = {f'{k}_unscaled': v
                                      for k, v in loss_dict_reduced.items()}
        metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()),
                             **loss_dict_reduced_scaled,
                             **loss_dict_reduced_unscaled)
        metric_logger.update(class_error=loss_dict_reduced['class_error'])
        metric_logger.update(sub_error=loss_dict_reduced['sub_error'])
        metric_logger.update(obj_error=loss_dict_reduced['obj_error'])
        metric_logger.update(rel_error=loss_dict_reduced['rel_error'])

        if args.dataset == 'vg':
            evaluate_rel_batch(outputs, targets, evaluator, evaluator_list)
        else:
            evaluate_rel_batch_oi(outputs, targets, all_results)

        orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
        results = postprocessors['bbox'](outputs, orig_target_sizes)

        res = {target['image_id'].item(): output for target, output in zip(targets, results)}
        if coco_evaluator is not None:
            coco_evaluator.update(res)

    if args.dataset == 'vg':
        evaluator['sgdet'].print_stats()
        # evaluator['sgcls'].print_stats()
        # evaluator['predcls'].print_stats()
    else:
        task_evaluation_sg.eval_rel_results(all_results, 100, do_val=True, do_vis=False)

    if args.eval and args.dataset == 'vg':
        calculate_mR_from_evaluator_list(evaluator_list, 'sgdet')
        calculate_mR_from_evaluator_list(evaluator_list, 'sgcls')
        calculate_mR_from_evaluator_list(evaluator_list, 'predcls')

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    if coco_evaluator is not None:
        coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    if coco_evaluator is not None:
        coco_evaluator.accumulate()
        coco_evaluator.summarize()

    stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
    if coco_evaluator is not None:
        if 'bbox' in postprocessors.keys():
            stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist()

    return stats, coco_evaluator, avg_entropy, all_exit_layers

# Evaluate Average Entropy Values Per Layer + Time Taken for inference per Layer

In [None]:
import time
import matplotlib.pyplot as plt
args['exit'] = 0
del args['confidence_threshold']
times, avg_entropies = [], []
layers = [0, 1, 2, 3, 4, 5]

for layer in layers:
    if args.eval:
        args.exit = layer
        print(f"Entropy Level: {layer}")
        start = time.time()
        test_stats, coco_evaluator, avg_entropy, all_exit_layers = evaluate(
            model, criterion, postprocessors, data_loader_test, base_ds, device, args
        )
        times.append(time.time() - start)
        avg_entropies.append(avg_entropy)

plt.plot(layers, times, marker='o')
plt.xlabel('Layer'); plt.ylabel('Time (s)')
plt.title('Inference Time vs Exit Layer')
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

plt.plot(layers, avg_entropies, marker='o', color='orange')
plt.xlabel('Layer'); plt.ylabel('Average Entropy')
plt.title('Average Entropy vs Exit Layer')
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

# Evaluate Exit Layer Distribution over different Entropies

In [None]:
import time
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter
args['confidence_threshold'] = 0
del args['exit']
exit_distributions = {}
avg_exit_layers = {}
entropies = [0.4, 0.45, 0.5, 0.55, 0.6]

for entropy in entropies:
    if args.eval:
        args.confidence_threshold = entropy
        print(f"Entropy Threshold: {entropy}")
        test_stats, coco_evaluator, avg_entropy, all_exit_layers = evaluate(
            model, criterion, postprocessors, data_loader_test, base_ds, device, args
        )
        counts = Counter(all_exit_layers)
        total = sum(counts.values())
        exit_distributions[entropy] = {layer: counts.get(layer, 0) / total * 100 for layer in range(6)}
        avg_exit_layers[entropy] = sum(layer * count for layer, count in counts.items()) / total

layers = list(range(6))
x = np.arange(len(entropies))
width = 0.5
bottom = np.zeros(len(entropies))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

fig, ax = plt.subplots(figsize=(7,5))

for idx, layer in enumerate(layers):
    values = [exit_distributions[e].get(layer, 0) for e in entropies]
    ax.bar(x, values, bottom=bottom, width=width, label=f'Layer {layer}', color=colors[idx])
    bottom += values

ax.set_xlabel('Entropy Threshold')
ax.set_ylabel('Percentage of Samples')
ax.set_title('Exit Layer Distribution Across Entropy Thresholds')
ax.set_xticks(x)
ax.set_xticklabels(entropies)
ax.legend(title='Exit Layer')
ax.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

plt.figure(figsize=(6,4))
plt.plot(entropies, [avg_exit_layers[e] for e in entropies], marker='o', color='black')
plt.xlabel('Entropy Threshold')
plt.ylabel('Average Exit Layer')
plt.title('Average Exit Layer vs Entropy Threshold')
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()


# Evaluate inference time for exit v no exit

In [None]:
import time

del args['exit']
del args['confidence_threshold']

for entropy in entropies:
    if args.eval:
        print(f"Entropy Threshold: {entropy}")

        args['confidence_threshold'] = 0.6
        start = time.time()
        test_stats_exit, coco_evaluator_exit, avg_entropy_exit, all_exit_layers_exit = evaluate(
            model, criterion, postprocessors, data_loader_test, base_ds, device, args
        )
        time_exit = time.time() - start
        print(f"Early Exit Inference Time: {time_exit:.2f} seconds")

        args['confidence_threshold'] = 0
        start = time.time()
        test_stats_full, coco_evaluator_full, avg_entropy_full, all_exit_layers_full = evaluate(
            model, criterion, postprocessors, data_loader_test, base_ds, device, args
        )
        time_full = time.time() - start
        print(f"Full Model Inference Time: {time_full:.2f} seconds")


# Evaluate With and without imitation performance

In [None]:
import time

del args['exit']
del args['confidence_threshold']

for entropy in entropies:
    if args.eval:
        print(f"With Imitation: {entropy}")

        args['confidence_threshold'] = 0.6
        start = time.time()
        test_stats_exit, coco_evaluator_exit, avg_entropy_exit, all_exit_layers_exit = evaluate(
            model, criterion, postprocessors, data_loader_test, base_ds, device, args, imitate=True
        )
        time_exit = time.time() - start
        print(f"Without Imitation: {time_exit:.2f} seconds")

        args['confidence_threshold'] = 0
        start = time.time()
        test_stats_full, coco_evaluator_full, avg_entropy_full, all_exit_layers_full = evaluate(
            model, criterion, postprocessors, data_loader_test, base_ds, device, args, imitate = False
        )
        time_full = time.time() - start
        print(f"Full Model Inference Time: {time_full:.2f} seconds")
