In [167]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, TensorDataset
import random
import numpy as np
from typing import Tuple
from tqdm import tqdm
from scipy.spatial import distance_matrix
import pandas as pd

In [168]:
root = "C:/Users/xiaoy/OneDrive/Desktop/P7/p7 project/DVP7/"
# load data
X_train = torch.tensor( np.load(root + "Features/train_features_vgg16_cifar10.npy" ) )
y_train = np.load(root + "Features/train_labels_vgg16_cifar10.npy" )

X_test = torch.tensor( np.load(root + "Features/test_features_vgg16_cifar10.npy" ) )
y_test = np.load(root + "Features/test_labels_vgg16_cifar10.npy")

y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [169]:
def meanAveragePrecision(test_hashes, training_hashes, test_labels, training_labels):
    aps = []
    num_queries = len(test_hashes)
    for i in tqdm(range(num_queries)):
        label = test_labels[i]
        distances = (training_hashes != test_hashes[i]).sum(axis=1)  # Hamming distance
        tp = (training_labels == label).long()  # True positive indicator
        hash_df = pd.DataFrame({"distances": distances, "tp": tp.cpu().numpy()})
        hash_df = hash_df.sort_values(by="distances").reset_index(drop=True)
        hash_df["tp_cumsum"] = hash_df["tp"].cumsum()
        hash_df["precision"] = hash_df["tp_cumsum"] / (np.arange(len(hash_df)) + 1)
        ap = hash_df["precision"].where(hash_df["tp"] == 1).mean() if hash_df["tp"].sum() > 0 else 0
        aps.append(ap)

    return np.mean(aps)


In [215]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SUBIC_encoder(nn.Module): 
    def __init__(self, bits=48, num_classes=10, num_blocks=8, block_size=8):
        super(SUBIC_encoder, self).__init__()
       
        assert bits % num_blocks == 0, "Bits must be divisible by num_blocks"

        self.bits = bits 
        self.num_blocks = num_blocks
        self.block_size = block_size

        # Define the encoder structure
        self.encoder = nn.Sequential(
            nn.Linear(4096, 256), 
            nn.ReLU(),
            nn.Linear(256, bits)
        )  # Outputs binary feature vectors
        
        self.fc3 = nn.Linear(bits, num_classes)  # Logits for num_classes
    
    def block_softmax(self, x):
        
        batch_size = x.shape[0]
        block_size = x.shape[1] // self.num_blocks
        
        # Ensure that x has the expected shape
        assert x.shape[1] == self.bits, f"Expected shape [batch_size, {self.bits}], got {x.shape}"
        
        # Reshape and apply softmax
        x = x.view(batch_size, self.num_blocks, block_size)
        x = F.softmax(x, dim=-1) 
        return x.view(batch_size, -1) #-1 refers to the value that will match the original elements 
    
    def block_one_hot(self, x):
        batch_size = x.shape[0]

        x = x.view(batch_size, self.num_blocks, self.block_size)
        max_indices = x.argmax(dim=-1, keepdim=True)
        
        # Create one-hot encoding
        one_hot = torch.zeros_like(x).scatter_(-1, max_indices, 1)

        return one_hot.view(batch_size, self.bits)
    
    def forward(self, x, use_one_hot=False):
        # Ensure x is a flat tensor before passing to encoder
        batch_size = x.shape[0]
        x = x.view(batch_size, -1)  # Flatten if necessary

        z = self.encoder(x)

        if use_one_hot:
            binary_codes = self.block_one_hot(z)
        else:
            binary_codes = self.block_softmax(z)

        class_probs = F.softmax(self.fc3(binary_codes), dim=-1) 

        return class_probs, binary_codes


In [225]:
binary_codes

tensor([[0.0980, 0.2045, 0.1069,  ..., 0.1427, 0.1689, 0.2128],
        [0.0956, 0.2492, 0.2026,  ..., 0.1155, 0.1896, 0.1899],
        [0.1116, 0.2078, 0.1420,  ..., 0.1291, 0.1221, 0.2132],
        ...,
        [0.0921, 0.1672, 0.0834,  ..., 0.1116, 0.1655, 0.2510],
        [0.0969, 0.1438, 0.0821,  ..., 0.1126, 0.1187, 0.2859],
        [0.1001, 0.1743, 0.1019,  ..., 0.1006, 0.1254, 0.2441]],
       grad_fn=<ViewBackward0>)

In [217]:
def compute_total_loss(class_probs, target, binary_codes, num_blocks, block_size, gamma=0.5, mu=0.5):
    """
    Computes the total loss, which includes:
    - Cross-entropy classification loss
    - Mean entropy loss (encouraging one-hot encoding within each block)
    - Batch entropy loss (encouraging uniform distribution across blocks)
    
    Parameters:
    - class_probs: The class probabilities from the classification layer.
    - target: The true labels.
    - binary_codes: The binary codes generated by the encoder.
    - num_blocks: The number of blocks in the binary codes.
    - block_size: The size of each block in the binary codes.
    - gamma: Weight for the mean entropy loss.
    - mu: Weight for the batch entropy loss.
    
    """

    def cross_entropy(class_prob, target):
        s = class_prob[torch.arange(len(target)), target]
        return -torch.log2(s)/torch.log2(torch.FloatTensor([class_prob.shape[1]]))

    classification_loss = cross_entropy(class_probs, target)

    batch_size = binary_codes.shape[0]
    binary_codes = binary_codes.view(batch_size, num_blocks, block_size) #used in structure encoding

    def entropy(p):
        entropy_result = -torch.sum(p * torch.log2(p + 1e-30), dim=-1)
        return entropy_result

    #Mean Entropy Loss (encourages each block to resemble a one-hot vector) using softmax binary code
    mean_entropy_loss = entropy(binary_codes).mean(dim=1)

    #Batch Entropy Loss (encourages uniform distribution across blocks)
    average_support = binary_codes.mean(dim=0)  
    batch_entropy_loss = entropy(average_support).mean(dim=0)

    #Combine losses with weights gamma and mu
    entropy_loss = (gamma * mean_entropy_loss - mu * batch_entropy_loss)/torch.log2(torch.FloatTensor([block_size]))
    total_loss = (classification_loss + entropy_loss).mean()
    
    
    return total_loss

#logits, binary_codes = model(X_train, use_one_hot=False)
#loss = compute_total_loss(logits, y_train_tensor, binary_codes, num_blocks=8, block_size=4, gamma=0.5, mu=0.05)

In [239]:
model = SUBIC_encoder(bits = 48, num_classes = 10, num_blocks = 8, block_size = 6)
optimizer = optim.Adam(model.parameters(), lr=0.001)
class_probs, binary_codes = model.forward(X_train, use_one_hot=False)

In [None]:
entropy_loss = (gamma * mean_entropy_loss - mu * batch_entropy_loss)/torch.log2(torch.FloatTensor([block_size]))

In [240]:
epochs = 10

train_dataset = TensorDataset(X_train, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = TensorDataset(X_test, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for epoch in range(epochs):
    model.train()
    total_loss = 0.0

    for images, labels in train_loader:
        
        images, labels = images.to(device), labels.to(device)
        class_probs, binary_codes = model.forward(images, use_one_hot=False)


        # Compute loss and update model
        loss = compute_total_loss(class_probs, labels, binary_codes, num_blocks=8, block_size=6, gamma=0.05, mu=0.5)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(binary_codes[0])
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}")


tensor([1.2502e-04, 3.1845e-07, 5.0873e-02, 8.5722e-05, 2.8362e-04, 9.4863e-01,
        4.7419e-03, 7.2259e-06, 2.6153e-02, 9.6746e-01, 5.7432e-04, 1.0613e-03,
        2.6782e-03, 9.9721e-01, 1.8306e-06, 1.0693e-04, 1.2051e-06, 4.1100e-07,
        4.3297e-05, 9.9886e-01, 1.0261e-04, 2.3200e-08, 3.4048e-09, 9.9351e-04,
        4.6623e-05, 3.0452e-06, 9.6816e-01, 1.0828e-03, 6.6856e-07, 3.0703e-02,
        2.7300e-04, 2.4654e-07, 8.3684e-01, 1.6287e-01, 2.1520e-06, 1.2030e-05,
        2.3747e-02, 1.6056e-03, 1.4271e-03, 1.2888e-07, 1.1019e-06, 9.7322e-01,
        1.0758e-05, 1.0098e-07, 7.4191e-08, 3.7767e-03, 9.9621e-01, 6.9524e-06],
       grad_fn=<SelectBackward0>)
Epoch [1/10], Loss: -0.0794
tensor([8.1866e-08, 9.7812e-01, 4.1501e-09, 2.1877e-02, 3.1813e-06, 1.5997e-06,
        2.6571e-07, 1.0000e+00, 1.5810e-08, 9.1155e-09, 9.5968e-12, 2.8679e-07,
        5.5497e-09, 4.3964e-10, 1.0000e+00, 1.8760e-08, 4.1262e-06, 5.9109e-15,
        1.3070e-09, 2.9150e-06, 2.9511e-10, 9.9999e-01, 3

KeyboardInterrupt: 

<torch.utils.data.dataloader.DataLoader at 0x268a0a13e20>

In [261]:
model.eval()
all_query_codes, all_query_labels = [], []
all_db_codes, all_db_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        _, binary_codes = model(images, use_one_hot=True)

        # Ensure binary_codes is a tensor
        if not isinstance(binary_codes, torch.Tensor):
            raise TypeError("Expected binary_codes to be a tensor.")
        
        all_db_codes.append(binary_codes)
        all_db_labels.append(labels)

        if len(all_query_codes) == 0:  
            all_query_codes.append(binary_codes.clone())  
            all_query_labels.append(labels.clone())

# Concatenate all tensors
all_query_codes = torch.cat(all_query_codes, dim=0)
all_query_labels = torch.cat(all_query_labels, dim=0)
all_db_codes = torch.cat(all_db_codes, dim=0)
all_db_labels = torch.cat(all_db_labels, dim=0)

# Calculate MAP Score
map_score = meanAveragePrecision(
    all_query_codes,
    all_db_codes,
    all_query_labels,
    all_db_labels
    )

print(f"MAP Score: {map_score:.5f}")

100%|██████████| 64/64 [00:00<00:00, 240.21it/s]

MAP Score: 0.52585





In [87]:
def entropy(p):
    entropy_result = torch.sum(p * torch.log2(p + 1e-30), dim=1)
    return entropy_result

batch_size = binary_codes.shape[0]
binary_codes = binary_codes.view(batch_size, 8, 6)    

#Mean Entropy Loss (encourages each block to resemble a one-hot vector) using softmax binary codes
mean_entropy_loss = entropy(binary_codes).mean()

In [90]:
test = entropy(binary_codes)[0]
test.mean()

tensor(-3.3904, grad_fn=<MeanBackward0>)

In [94]:
entropy(binary_codes)[4].mean()

tensor(-3.3979, grad_fn=<MeanBackward0>)