# ResNet50 with HDC
- Here we use vanilla ResNet50 that is unquantized to check and see how it fairs with HDC
- It's an adaptation of the CNN+FSL but with the ResNet50 architecture for the CNN block

# Importing Packages

In [73]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision.models.resnet import ResNet, Bottleneck
from torch.quantization import QuantStub, DeQuantStub
import torch.quantization
import torch.optim as optim
from torchinfo import summary
import torch.nn.functional as F
from collections import defaultdict
from tqdm import tqdm

# GPU Setting

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = QuantizableResNet50()

if torch.cuda.device_count() > 1:
    print(f'Using {torch.cuda.device_count()} GPUs')
    model = nn.DataParallel(model)

model = model.to(device)

Using 4 GPUs


# Loading Presaved Model

In [4]:
state_dict = torch.load("vanilla_resnet50_cifar100.pth", map_location=device)
model.load_state_dict(state_dict)

<All keys matched successfully>

In [7]:
model.eval()

DataParallel(
  (module): QuantizableResNet50(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): Identity()
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride

# Hook to Extract Maxpool Outputs

In [8]:
# Placeholder for feature map
activation = {}

def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

In [None]:
model.module.avgpool.register_forward_hook(get_activation('avgpool'))

# Extract Data

In [20]:
# 2. Dataset setup (no batching)
transform_test = transforms.Compose([
    transforms.ToTensor(),
])
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)

In [21]:
pooled_outputs = []
n_collected = 0
max_samples = 100

In [47]:
# === 3. Dictionary: class_label -> list of feature tensors ===
N = 100  # number of items per class
class_features = defaultdict(list)

In [48]:
# === 4. Loop through testset to populate dictionary ===
with torch.no_grad():
    for idx in range(len(testset)):
        image, label = testset[idx]

        if len(class_features[label]) >= N:
            continue

        input_tensor = image.unsqueeze(0).to(device)  # [1, 3, 32, 32]
        _ = model(input_tensor)

        pooled = activation['avgpool'].squeeze().cpu()  # [2048]
        class_features[label].append(pooled)

        # Stop early if we have enough from every class
        if all(len(v) >= N for v in class_features.values()) and len(class_features) == 100:
            break


In [49]:
# === 5. Check and use ===
print(f"Collected features for {len(class_features)} classes.")
print(f"Example: Class 0 has {len(class_features[0])} feature vectors")
print(f"Feature vector shape: {class_features[0][0].shape}")

Collected features for 100 classes.
Example: Class 0 has 100 feature vectors
Feature vector shape: torch.Size([2048])


In [26]:
print(len(classes))

100


In [52]:
print(len(class_features[99][0]))

2048


# HDC training Stuff

In [53]:
# Set the dimensions
input_dim = len(class_features[0][0])   # e.g., 512 channels × 7 × 7
output_dim = 512    # your projected size

# Randomly generate +1 and -1 entries
projection_matrix = (torch.randint(0, 2, (input_dim, output_dim)) * 2 - 1).float()  # values: [0, 1] → [−1, 1]
projection_matrix.shape

torch.Size([2048, 512])

In [66]:
# Some fixed parameters
NUM_CLASSES = 100
HV_DIM = 512
NUM_TRAIN_SAMPLES = 50

# Initialize class HVs
class_hvs_bin = {}
class_hvs_int = {}

# Iterate per class with tqdm
for class_item in tqdm(range(NUM_CLASSES), desc="Training HDC"):

    # Initialize class hv
    class_hv = torch.zeros(HV_DIM)

    for item in range(NUM_TRAIN_SAMPLES):
        query_v = class_features[class_item][item] @ projection_matrix
        class_hv = class_hv + query_v

    label = class_item
    class_hvs_int[label] = class_hv

    # Sign magnitude
    class_hv_bin = torch.sign(class_hv)
    class_hvs_bin[label] = class_hv_bin
    

Training HDC: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 185.53it/s]


# HDC Testing on Trained Samples

In [94]:
def sim_search(class_hvs, q_hv):
    sim_score = -2
    target_label = -9
    for i in range(len(class_hvs)):
        cos_sim  = F.cosine_similarity(q_hv, class_hvs[i], dim=0)
        if cos_sim > sim_score:
            sim_score = cos_sim
            target_label = i
    return sim_score, target_label

In [96]:
START_NUM = NUM_TRAIN_SAMPLES
MAX_SAMPLES = 100

correct_qhv_int = 0
correct_qhv_bin = 0

for class_set in tqdm(range(NUM_CLASSES), desc="Evaluating classes"):  
    for item in range(NUM_TRAIN_SAMPLES):
        query_v = class_features[class_set][item] @ projection_matrix
        # Compare int style first
        _, target_label_int = sim_search(class_hvs_int, query_v)
        # Compare bin style 2nd
        projected_bin = torch.sign(query_v)
        _, target_label_bin = sim_search(class_hvs_bin, projected_bin)

        if target_label_int == class_set:
            correct_qhv_int += 1

        if target_label_bin == class_set:
            correct_qhv_bin += 1


Evaluating classes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:51<00:00,  1.96it/s]


In [97]:
TOTAL_TEST = (NUM_TRAIN_SAMPLES)*NUM_CLASSES
accuracy_int = correct_qhv_int / TOTAL_TEST
accuracy_bin = correct_qhv_bin / TOTAL_TEST
print(f"Accuracy (int): {accuracy_int:.2f}")
print(f"Accuracy (bin): {accuracy_bin:.2f}")

Accuracy (int): 0.78
Accuracy (bin): 0.75


# HDC Testing on Untrained Samples

In [98]:
START_NUM = NUM_TRAIN_SAMPLES
MAX_SAMPLES = 100

correct_qhv_int = 0
correct_qhv_bin = 0

for class_set in tqdm(range(NUM_CLASSES), desc="Evaluating classes"):  
    for item in range(START_NUM,MAX_SAMPLES):
        query_v = class_features[class_set][item] @ projection_matrix
        # Compare int style first
        _, target_label_int = sim_search(class_hvs_int, query_v)
        # Compare bin style 2nd
        projected_bin = torch.sign(query_v)
        _, target_label_bin = sim_search(class_hvs_bin, projected_bin)

        if target_label_int == class_set:
            correct_qhv_int += 1

        if target_label_bin == class_set:
            correct_qhv_bin += 1


Evaluating classes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:42<00:00,  2.36it/s]


In [99]:
TOTAL_TEST = (MAX_SAMPLES - NUM_TRAIN_SAMPLES)*NUM_CLASSES
accuracy_int = correct_qhv_int / TOTAL_TEST
accuracy_bin = correct_qhv_bin / TOTAL_TEST
print(f"Accuracy (int): {accuracy_int:.2f}")
print(f"Accuracy (bin): {accuracy_bin:.2f}")

Accuracy (int): 0.76
Accuracy (bin): 0.72


# Notes
- It is interesting to see that ResNet is the better feature extractor and hence the different from vanilla HDC (top-1 accuracy is 76.56 %) is not too far from the CNN+FSL version (top-1 accuracy is 78% int and 75% binary on trained set while we have 76% int and 72% binary on untrained set)
- I guess the argument here is that unlike VGG, ResNet CNN architecture is already a very nice feature extractor 