In [None]:
import torch
import os
from itertools import product
from random import shuffle
from tqdm import tqdm

from KNN_Embeddings import *

In [None]:
from sklearn.metrics import precision_recall_curve
from sklearn.preprocessing import label_binarize

def calculate_map(y_true, y_pred, n_classes):
    # Binarize the output labels for each class
    y_true_binarized = label_binarize(y_true, classes=np.arange(n_classes))
    
    # Calculate the Average Precision (AP) for each class
    average_precisions = []
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_binarized[:, i], (y_pred == i).astype(int))
        # Calculate AP by integrating the precision-recall curve
        ap = np.sum(np.diff(recall) * np.array(precision)[:-1])
        average_precisions.append(ap)
    
    # Calculate MAP by averaging the APs
    return np.mean(average_precisions)

# Assume you know the number of classes
n_classes = 10  # Example for CIFAR-10
map_score = calculate_map(y_test, y_pred, n_classes)
print('Mean Average Precision (MAP):', map_score)


In [None]:
# Feature normalization
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from torch.utils.data import DataLoader, TensorDataset

# Create a scaler object
scaler = StandardScaler()

# Fit on training data and transform both training and test data
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train_normalized, dtype=torch.float)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_normalized, dtype=torch.float)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Creating DataLoader instances
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## CNNH Model 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNH(nn.Module):
    def __init__(self, input_dim, num_bits):
        super(CNNH, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.relu1 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_bits)  # Outputs hash codes

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNH(input_dim=X_train.shape[1], num_bits=48).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()


## Train CNNH Model

In [None]:
# Training function
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    for epoch in range(10):
        total_loss = 0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            hash_codes = torch.tanh(outputs)  # Ensuring outputs are between -1 and 1
            similarity_matrix = torch.mm(hash_codes, hash_codes.t())
            target_similarity = (target[:, None] == target[None, :]).float() * 2 - 1
            loss = criterion(similarity_matrix, target_similarity)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}')

# Training the model
train(model, train_loader, optimizer, criterion, device)



## KNN on hashed embeddings

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import label_binarize


def get_binary_hash_codes(model, loader, device):
    model.eval()
    hash_codes = []
    labels = []
    with torch.no_grad():
        for data, target in loader:
            data = data.to(device)
            outputs = model(data)
            binary_codes = torch.sign(outputs).cpu().numpy()  # Convert to binary hash codes
            hash_codes.extend(binary_codes)
            labels.extend(target.numpy())
    return np.array(hash_codes), np.array(labels)

# Extract hash codes
train_codes, train_labels = get_binary_hash_codes(model, train_loader, device)
test_codes, test_labels = get_binary_hash_codes(model, test_loader, device)

# Classification with KNN
knn = KNeighborsClassifier(n_neighbors=5, metric='hamming')
knn.fit(train_codes, train_labels)
predictions = knn.predict(test_codes)
y_pred_proba = knn.predict_proba(test_codes)

print(classification_report(test_labels, predictions))


# Binarize the labels for a one-vs-rest computation
y_test_binarized = label_binarize(y_test, classes=np.unique(y_train))

# Calculate the average precision for each class
average_precisions = []
for i in range(y_test_binarized.shape[1]):  # iterate over classes
    average_precisions.append(average_precision_score(y_test_binarized[:, i], y_pred_proba[:, i]))

# Compute the mean of the average precisions
map_score = np.mean(average_precisions)
print(f'Mean Average Precision (MAP): {map_score}')






In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import matplotlib

def visualize_embeddings(embeddings, labels, num_classes):
    # Convert labels from tensor to numpy if not already
    labels = labels.numpy() if not isinstance(labels, np.ndarray) else labels

    # Initialize t-SNE
    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    tsne_results = tsne.fit_transform(embeddings)

    # Create a figure with high resolution
    plt.figure(figsize=(10, 7), dpi=300)  # Set DPI for high resolution
    cmap = plt.cm.get_cmap('tab20b', num_classes)  # Use a more distinct colormap

    # Plotting the results of t-SNE
    scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=labels, cmap=cmap, marker='o', edgecolor='k', alpha=0.6)
    plt.colorbar(scatter, ticks=range(num_classes))
    plt.title('t-SNE visualization of Embeddings', fontsize=14, fontweight='bold')
    plt.xlabel('t-SNE axis 1', fontsize=12)
    plt.ylabel('t-SNE axis 2', fontsize=12)
    plt.grid(True, linestyle='--', linewidth=0.5)  # Lighter grid
    plt.show()

# Usage example
train_embeddings, train_labels = get_binary_hash_codes(model, test_loader, device)  # Make sure these are on CPU
visualize_embeddings(train_embeddings, train_labels, len(np.unique(train_labels)))
