# Importing libraries

In [5]:
import os

import pickle
# import numpy as np
import numpy
from itertools import product

from collections import Counter
from tqdm import tqdm
import numpy as np

import torch
import faiss

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings('ignore')

In [6]:
res = faiss.StandardGpuResources()  # use a single GPU

# Loading the trained embeddings

In [7]:
def load_embeddings(model_name, city):
    if model_name == "declutr":
        emb_dir = os.path.join(os.getcwd(), "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/pretrained_declutr")
    else:
        emb_dir = os.path.join(os.getcwd(), "../models/pickled/embeddings/pretrained_styleEmbedding")
        
    train_label_filename = "pretrained_checkpoint_" + model_name + "_" + city + "_labels_train.pt"
    train_data_filename = "pretrained_checkpoint_" + model_name  + "_" + city + "_data_train.pt"
    test_label_filename = "pretrained_checkpoint_" + model_name + "_" + city + "_labels_test.pt"
    test_data_filename = "pretrained_checkpoint_" + model_name  + "_" + city + "_data_test.pt"
    
    train_emb = torch.load(os.path.join(emb_dir, train_data_filename), map_location=torch.device('cpu'))
    train_labels = torch.load(os.path.join(emb_dir, train_label_filename), map_location=torch.device('cpu'))
    
    test_emb = torch.load(os.path.join(emb_dir, test_data_filename), map_location=torch.device('cpu'))
    test_labels = torch.load(os.path.join(emb_dir, test_label_filename), map_location=torch.device('cpu'))
    
    return train_emb, train_labels, test_emb, test_labels    

# R-Precision metrics

In [8]:
def recall_at_k(actual, predicted, k):
    recall_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())  # Convert PyTorch tensor to NumPy array, then to set
        pred_set = set(pred[:k].numpy())  # Convert PyTorch tensor to NumPy array, then to set
        recall_list.append(round(len(act_set & pred_set) / float(len(act_set)), 2))
    return recall_list

def generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings)

    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    vendor_dict = {int(vendor_id): (train_labels == vendor_id).sum().item() for vendor_id in unique_vendors}
    
    r_precision_score = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        # Skip if there are no test embeddings for this vendor_id
        if len(test_adsidx) == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        
        # Ensure test_vendor_embeddings is a 2D array
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings[np.newaxis, :]

        k = vendor_dict[vendor_id]
        if k == 0:  # Skip if k is 0, meaning there are no relevant embeddings to search for
            continue

        try:
            _, I = gpu_index_flat.search(test_vendor_embeddings, k)
        except ValueError as e:
            print(f"Error searching for vendor_id {vendor_id} with shape {test_vendor_embeddings.shape}: {e}")
            continue

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
    
    r_precision_mean = np.mean(list(r_precision_score.values()))
    r_precision_std = np.std(list(r_precision_score.values()))

    print(f"R precision mean: {round(r_precision_mean, 4)} ± {round(r_precision_std, 2)}")
    
    return r_precision_mean, r_precision_std

In [9]:
def calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)  # Inner Product (cosine similarity)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings)  # Directly use numpy array

    mrr_scores = []

    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc=f"Calculating MRR@{k}"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]
        if len(test_adsidx) == 0:
            continue
        
        test_vendor_embeddings = test_embeddings[test_adsidx]
        
        # Ensure test_vendor_embeddings is a 2D array
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings[np.newaxis, :]

        try:
            D, I = gpu_index_flat.search(test_vendor_embeddings, k)  # Retrieve the top k results
        except ValueError as e:
            print(f"Error searching for vendor_id {vendor_id} with shape {test_vendor_embeddings.shape}: {e}")
            continue

        # I is the indices of the nearest neighbors
        for idx, indices in enumerate(I):
            correct_indices = np.where(train_labels_np == vendor_id)[0]
            for rank, index in enumerate(indices, start=1):
                if index in correct_indices:
                    mrr_scores.append(1.0 / rank)
                    break
            else:
                mrr_scores.append(0.0)

    mrr_mean = np.mean(mrr_scores) if mrr_scores else 0
    mrr_std = np.std(mrr_scores) if mrr_scores else 0
    print(f"MRR@{k} mean: {round(mrr_mean, 4)} ± {round(mrr_std, 2)}")

    return mrr_mean, mrr_std

In [10]:
def calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)  # Inner Product (cosine similarity)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings)  # Directly use numpy array

    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    y_true = []
    y_pred = []

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc=f"Calculating Macro-F1@{k}"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]
        if len(test_adsidx) == 0:
            continue
        
        test_vendor_embeddings = test_embeddings[test_adsidx]
        
        # Ensure test_vendor_embeddings is a 2D array
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings[np.newaxis, :]

        try:
            D, I = gpu_index_flat.search(test_vendor_embeddings, k)  # Retrieve the top k results
        except ValueError as e:
            print(f"Error searching for vendor_id {vendor_id} with shape {test_vendor_embeddings.shape}: {e}")
            continue

        # Collect true labels and predicted labels for micro-F1 calculation
        for indices in I:
            true_labels = [vendor_id] * k  # True labels are the same vendor_id for the current test sample
            predicted_labels = train_labels_np[indices]  # Predicted labels are the labels of the top k retrieved samples

            y_true.extend(true_labels)
            y_pred.extend(predicted_labels)

    # Calculate micro-F1 score
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    macro_f1 = f1_score(y_true, y_pred, average='macro')

    print(f"Macro-F1@{k}: {macro_f1}")
    
    return macro_f1

In [11]:
def generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    
    # train_embeddings are already NumPy arrays
    gpu_index_flat.add(train_embeddings)
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}

    r_precision_score = {}
    total_samples = 0  # Total number of samples for all classes

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx.numpy()]
        
        # Ensure test_vendor_embeddings is a 2D array
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = np.expand_dims(test_vendor_embeddings, axis=0)

        k = vendor_dict[vendor_id]
        
        # Check if k is within the valid range
        if k > train_embeddings.shape[0]:
            print(f"Warning: k ({k}) is greater than the number of training samples ({train_embeddings.shape[0]}), adjusting k to maximum possible.")
            k = train_embeddings.shape[0]

        try:
            _, I = gpu_index_flat.search(test_vendor_embeddings, int(k))
        except Exception as e:
            print(f"Error during FAISS search for vendor_id {vendor_id}: {e}")
            continue

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
        total_samples += len(test_adsidx)  # Add the number of samples for this class

    # Calculate the weighted mean of R-Precision
    weighted_r_precision_sum = sum(r_precision_score[vendor_id] * len((test_labels == vendor_id).nonzero(as_tuple=True)[0]) for vendor_id in r_precision_score.keys())
    macro_r_precision = weighted_r_precision_sum / total_samples

    print(f"Macro R-precision: {macro_r_precision}")
    
    return macro_r_precision

In [12]:
def precision_at_k(actual, predicted, k):
    precision_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        pred_set = set(pred[:k].numpy())
        precision_list.append(len(act_set & pred_set) / float(k))
    return precision_list

def f1_at_k(actual, predicted, k):
    f1_list = []
    precision_list = precision_at_k(actual, predicted, k)
    recall_list = recall_at_k(actual, predicted, k)
    for precision, recall in zip(precision_list, recall_list):
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0
        f1_list.append(f1)
    return f1_list

def generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings)

    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    f1_score_list = []

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating Macro-F1@X"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if isinstance(test_vendor_embeddings, torch.Tensor):
            test_vendor_embeddings = test_vendor_embeddings.numpy()

        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = np.expand_dims(test_vendor_embeddings, axis=0)

        k = vendor_dict[vendor_id]
        if k > train_embeddings.shape[0]:
            k = train_embeddings.shape[0]

        _, I = gpu_index_flat.search(test_vendor_embeddings, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        f1_score_list.extend(f1_at_k(true_label_list, predicted_label_list, k))

    macro_f1_mean = np.mean(f1_score_list)
    macro_f1_std = np.std(f1_score_list)

    print(f"Macro F1@X: {round(macro_f1_mean, 4)} ± {round(macro_f1_std, 2)}")
    
    return macro_f1_mean, macro_f1_std

# Declutr model

In [13]:
import torch
import hashlib

def remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels):
    """
    Removes duplicate embeddings between train and test sets using hashing.

    Args:
        train_embeddings (torch.Tensor): Embeddings from the training set.
        train_labels (torch.Tensor): Labels corresponding to the training embeddings.
        test_embeddings (torch.Tensor): Embeddings from the test set.
        test_labels (torch.Tensor): Labels corresponding to the test embeddings.

    Returns:
        unique_train_embeddings (torch.Tensor): Unique embeddings from the training set.
        unique_train_labels (torch.Tensor): Labels corresponding to the unique training embeddings.
        unique_test_embeddings (torch.Tensor): Unique embeddings from the test set (excluding duplicates with train set).
        unique_test_labels (torch.Tensor): Labels corresponding to the unique test embeddings.
    """

    def hash_embedding(embedding):
        # Ensure the embedding is contiguous in memory and of type float32
        embedding = embedding.contiguous().view(-1).float()
        # Round to reduce the impact of floating-point precision errors
        embedding = torch.round(embedding * 1e6) / 1e6  # Adjust precision as needed
        # Convert the embedding to bytes
        emb_bytes = embedding.numpy().tobytes()
        # Compute MD5 hash
        return hashlib.md5(emb_bytes).hexdigest()

    # Create dictionaries mapping hashes to embeddings and labels for the train set
    train_hash_embedding_label_dict = {}
    for emb, label in zip(train_embeddings, train_labels):
        emb_hash = hash_embedding(emb)
        # Store the embedding and label only if the hash is not already in the dictionary
        if emb_hash not in train_hash_embedding_label_dict:
            train_hash_embedding_label_dict[emb_hash] = (emb, label.item())

    # Create dictionaries mapping hashes to embeddings and labels for the test set
    test_hash_embedding_label_dict = {}
    for emb, label in zip(test_embeddings, test_labels):
        emb_hash = hash_embedding(emb)
        # Store the embedding and label only if the hash is not already in the dictionary
        if emb_hash not in test_hash_embedding_label_dict:
            test_hash_embedding_label_dict[emb_hash] = (emb, label.item())

    # Identify common hashes between train and test sets
    common_hashes = set(train_hash_embedding_label_dict.keys()).intersection(set(test_hash_embedding_label_dict.keys()))

    # Remove duplicates from the test set
    unique_test_hashes = set(test_hash_embedding_label_dict.keys()) - common_hashes

    # Reconstruct unique embeddings and labels for the train set
    unique_train_embeddings_list = [emb_label[0] for emb_label in train_hash_embedding_label_dict.values()]
    unique_train_labels_list = [emb_label[1] for emb_label in train_hash_embedding_label_dict.values()]

    # Reconstruct unique embeddings and labels for the test set
    unique_test_embeddings_list = [test_hash_embedding_label_dict[h][0] for h in unique_test_hashes]
    unique_test_labels_list = [test_hash_embedding_label_dict[h][1] for h in unique_test_hashes]

    # Convert lists to tensors
    unique_train_embeddings = torch.stack(unique_train_embeddings_list)
    unique_train_labels = torch.tensor(unique_train_labels_list)

    unique_test_embeddings = torch.stack(unique_test_embeddings_list)
    unique_test_labels = torch.tensor(unique_test_labels_list)

    return unique_train_embeddings, unique_train_labels, unique_test_embeddings, unique_test_labels

In [14]:
def find_members(train_embeddings, train_labels, test_embeddings, test_labels):
    """
    Filters test_embeddings and test_labels by removing entries whose labels do not exist in train_labels.

    Parameters:
    - train_embeddings (torch.Tensor): Embeddings for the training data.
    - train_labels (torch.Tensor): Labels for the training data.
    - test_embeddings (torch.Tensor): Embeddings for the test data.
    - test_labels (torch.Tensor): Labels for the test data.

    Returns:
    - train_embeddings (torch.Tensor): Original training embeddings (unchanged).
    - train_labels (torch.Tensor): Original training labels (unchanged).
    - filtered_test_embeddings (torch.Tensor): Filtered test embeddings.
    - filtered_test_labels (torch.Tensor): Filtered test labels.
    """

    # Ensure labels are on the same device
    if train_labels.device != test_labels.device:
        train_labels = train_labels.to(test_labels.device)

    # Use torch.isin to create a mask of test labels that exist in train labels
    if hasattr(torch, 'isin'):
        # torch.isin is available in PyTorch 1.10 and later
        mask = torch.isin(test_labels, train_labels)
    else:
        # For older versions of PyTorch, convert to NumPy arrays
        train_labels_np = train_labels.cpu().numpy()
        test_labels_np = test_labels.cpu().numpy()
        mask_np = np.isin(test_labels_np, train_labels_np)
        mask = torch.from_numpy(mask_np).to(test_labels.device)

    # Apply the mask to filter test embeddings and labels
    filtered_test_embeddings = test_embeddings[mask]
    filtered_test_labels = test_labels[mask]

    return train_embeddings, train_labels, filtered_test_embeddings, filtered_test_labels

In [12]:
r_precision_dict, mrr_dict = ({} for i in range(2))
for city_ in ["south", "midwest", "west", "northeast"]:    
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)
    
    train_embeddings = train_embeddings.detach().cpu().numpy()
    # train_labels = train_labels.detach().cpu().numpy()
    test_embeddings = test_embeddings.detach().cpu().numpy()
    # test_labels = test_labels.detach().cpu().numpy()
    
    r_precision_mean, r_precision_std = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    mrr_mean, mrr_std = calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # macro_f1_1 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # macro_f1_10 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # macro_f1_100 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_average_r_precision = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    _, _  = generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels)
     
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict[city_] = (mrr_mean, mrr_std)
    
#with open('../models/pickled/embeddings/pretrained_declutr/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
#with open('../models/pickled/embeddings/pretrained_declutr/mrr.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

--------------------------------------------------
City: south


Calculating R-precision: 100%|██████████| 964/964 [00:00<00:00, 2571.97it/s]


R precision mean: 0.2248 ± 0.3


Calculating MRR@1: 100%|██████████| 964/964 [00:00<00:00, 4572.49it/s]


MRR@1 mean: 0.3265 ± 0.47


Calculating Macro-F1@X: 100%|██████████| 964/964 [00:00<00:00, 2529.49it/s]


Macro F1@X: 0.2223 ± 0.3
--------------------------------------------------
City: midwest


Calculating R-precision: 100%|██████████| 624/624 [00:00<00:00, 3194.88it/s]


R precision mean: 0.2866 ± 0.36


Calculating MRR@1: 100%|██████████| 624/624 [00:00<00:00, 5686.55it/s]


MRR@1 mean: 0.3943 ± 0.49


Calculating Macro-F1@X: 100%|██████████| 624/624 [00:00<00:00, 3129.27it/s]


Macro F1@X: 0.2804 ± 0.36
--------------------------------------------------
City: west


Calculating R-precision: 100%|██████████| 259/259 [00:00<00:00, 3848.44it/s]


R precision mean: 0.3479 ± 0.41


Calculating MRR@1: 100%|██████████| 259/259 [00:00<00:00, 6851.67it/s]


MRR@1 mean: 0.3139 ± 0.46


Calculating Macro-F1@X: 100%|██████████| 259/259 [00:00<00:00, 3633.76it/s]


Macro F1@X: 0.2731 ± 0.36
--------------------------------------------------
City: northeast


Calculating R-precision: 100%|██████████| 293/293 [00:00<00:00, 4470.79it/s]


R precision mean: 0.3385 ± 0.38


Calculating MRR@1: 100%|██████████| 293/293 [00:00<00:00, 7374.77it/s]


MRR@1 mean: 0.4037 ± 0.49


Calculating Macro-F1@X: 100%|██████████| 293/293 [00:00<00:00, 4735.45it/s]

Macro F1@X: 0.3801 ± 0.39





In [15]:
r_precision_dict, mrr_dict = ({} for i in range(2))
for city_ in ["south", "midwest", "west", "northeast"]:    
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)
    
    train_embeddings = train_embeddings.detach().cpu().numpy()
    # train_labels = train_labels.detach().cpu().numpy()
    test_embeddings = test_embeddings.detach().cpu().numpy()
    # test_labels = test_labels.detach().cpu().numpy()
    
    # Normalize training embeddings
    faiss.normalize_L2(train_embeddings)
    # Normalize test embeddings
    faiss.normalize_L2(test_embeddings)

    r_precision_mean, r_precision_std = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    mrr_mean, mrr_std = calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # macro_f1_1 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # macro_f1_10 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # macro_f1_100 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_average_r_precision = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    _, _  = generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels)
     
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict[city_] = (mrr_mean, mrr_std)
    
#with open('../models/pickled/embeddings/pretrained_declutr/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
#with open('../models/pickled/embeddings/pretrained_declutr/mrr.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

--------------------------------------------------
City: south


Calculating R-precision: 100%|██████████| 964/964 [00:00<00:00, 1629.82it/s]


R precision mean: 0.5318 ± 0.35


Calculating MRR@1: 100%|██████████| 964/964 [00:00<00:00, 2699.69it/s]


MRR@1 mean: 0.7532 ± 0.43


Calculating Macro-F1@X: 100%|██████████| 964/964 [00:00<00:00, 1612.86it/s]


Macro F1@X: 0.4887 ± 0.37
--------------------------------------------------
City: midwest


Calculating R-precision: 100%|██████████| 624/624 [00:00<00:00, 2260.10it/s]


R precision mean: 0.6207 ± 0.39


Calculating MRR@1: 100%|██████████| 624/624 [00:00<00:00, 3930.78it/s]


MRR@1 mean: 0.6954 ± 0.46


Calculating Macro-F1@X: 100%|██████████| 624/624 [00:00<00:00, 2244.76it/s]


Macro F1@X: 0.5397 ± 0.42
--------------------------------------------------
City: west


Calculating R-precision: 100%|██████████| 259/259 [00:00<00:00, 3126.23it/s]


R precision mean: 0.749 ± 0.37


Calculating MRR@1: 100%|██████████| 259/259 [00:00<00:00, 5373.38it/s]


MRR@1 mean: 0.6104 ± 0.49


Calculating Macro-F1@X: 100%|██████████| 259/259 [00:00<00:00, 2979.37it/s]


Macro F1@X: 0.568 ± 0.43
--------------------------------------------------
City: northeast


Calculating R-precision: 100%|██████████| 293/293 [00:00<00:00, 3704.18it/s]


R precision mean: 0.6499 ± 0.41


Calculating MRR@1: 100%|██████████| 293/293 [00:00<00:00, 5853.28it/s]


MRR@1 mean: 0.7807 ± 0.41


Calculating Macro-F1@X: 100%|██████████| 293/293 [00:00<00:00, 3774.57it/s]

Macro F1@X: 0.6694 ± 0.4





# Style Embedding model

In [50]:
r_precision_dict, mrr_dict = ({} for i in range(2))

for city_ in ["chicago", "atlanta", "detroit", "houston", "dallas", "NY", "SF", "canada"]:
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="styleEmbedding", city=city_)
    r_precision_mean, r_precision_std = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    mrr_mean, mrr_std = calculate_mrr_at_1(train_embeddings, train_labels, test_embeddings, test_labels)
    r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    mrr_dict[city_] = (mrr_mean, mrr_std)
    
with open('../models/pickled/embeddings/pretrained_styleEmbedding/rprecision.pickle', 'wb') as handle:
    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('../models/pickled/embeddings/pretrained_styleEmbedding/mrr.pickle', 'wb') as handle:
    pickle.dump(mrr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

City: chicago


Calculating R-precision: 100%|██████████| 490/490 [00:00<00:00, 3255.38it/s]


R precision mean: 0.04380285181322671
R precision std: 0.14621349882076917


Calculating MRR@1: 100%|██████████| 490/490 [00:00<00:00, 6089.40it/s]


MRR@1 Mean: 0.07626514611546685
MRR@1 Std: 0.26542187853199545
City: atlanta


Calculating R-precision: 100%|██████████| 471/471 [00:00<00:00, 3946.74it/s]


R precision mean: 0.04968652458652459
R precision std: 0.1567127507156519


Calculating MRR@1: 100%|██████████| 471/471 [00:00<00:00, 6467.33it/s]


MRR@1 Mean: 0.07265388496468214
MRR@1 Std: 0.25956752101181857
City: detroit


Calculating R-precision: 100%|██████████| 153/153 [00:00<00:00, 4600.44it/s]


R precision mean: 0.10399391534391535
R precision std: 0.20905570292954856


Calculating MRR@1: 100%|██████████| 153/153 [00:00<00:00, 7412.23it/s]


MRR@1 Mean: 0.1157556270096463
MRR@1 Std: 0.31993165180277167
City: houston


Calculating R-precision: 100%|██████████| 422/422 [00:00<00:00, 3842.40it/s]


R precision mean: 0.06559834893975931
R precision std: 0.1840278621241181


Calculating MRR@1: 100%|██████████| 422/422 [00:00<00:00, 6554.18it/s]


MRR@1 Mean: 0.07065750736015702
MRR@1 Std: 0.2562518761176323
City: dallas


Calculating R-precision: 100%|██████████| 377/377 [00:00<00:00, 4237.39it/s]


R precision mean: 0.06683864083108589
R precision std: 0.18458040934132475


Calculating MRR@1: 100%|██████████| 377/377 [00:00<00:00, 6934.19it/s]


MRR@1 Mean: 0.06427688504326329
MRR@1 Std: 0.24524552410268038
City: NY


Calculating R-precision: 100%|██████████| 309/309 [00:00<00:00, 4933.76it/s]


R precision mean: 0.0815415244596132
R precision std: 0.2264429605299262


Calculating MRR@1: 100%|██████████| 309/309 [00:00<00:00, 7593.08it/s]


MRR@1 Mean: 0.09038461538461538
MRR@1 Std: 0.28673199452867226
City: SF


Calculating R-precision: 100%|██████████| 268/268 [00:00<00:00, 3969.44it/s]


R precision mean: 0.09614669797428418
R precision std: 0.22998653947544206


Calculating MRR@1: 100%|██████████| 268/268 [00:00<00:00, 6576.88it/s]


MRR@1 Mean: 0.09494640122511486
MRR@1 Std: 0.2931408912443202
City: canada


Calculating R-precision: 100%|██████████| 149/149 [00:00<00:00, 5323.81it/s]


R precision mean: 0.1725718112244898
R precision std: 0.3262201418683189


Calculating MRR@1: 100%|██████████| 149/149 [00:00<00:00, 7852.53it/s]


MRR@1 Mean: 0.20353982300884957
MRR@1 Std: 0.40263055455140984


# Modified scripts with class frequencies and returned dictionaries

In [16]:
def recall_at_k(actual, predicted, k):
    recall_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())  # Convert PyTorch tensor to NumPy array, then to set
        pred_set = set(pred[:k].numpy())  # Convert PyTorch tensor to NumPy array, then to set
        
        # Check if the actual set is empty to avoid division by zero
        if len(act_set) == 0:
            recall_list.append(0.0)  # Assign recall as 0 if there are no true labels
        else:
            recall_list.append(round(len(act_set & pred_set) / float(len(act_set)), 2))
    
    return recall_list


def generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings)

    # Combine train and test labels to compute class frequency across the entire dataset
    combined_labels = torch.cat([train_labels, test_labels], dim=0)
    unique_vendors = torch.unique(combined_labels)
    
    # Compute class frequencies from the entire dataset (train + test)
    vendor_freq_dict = {int(vendor_id): (combined_labels == vendor_id).sum().item() for vendor_id in unique_vendors}

    # Initialize a dict to store scores by frequency
    frequency_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]

        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings[np.newaxis, :]

        k = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if k == 0:
            continue

        try:
            _, I = gpu_index_flat.search(test_vendor_embeddings, k)
        except ValueError as e:
            print(f"Error searching for vendor_id {vendor_id} with shape {test_vendor_embeddings.shape}: {e}")
            continue

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]

        score = np.mean(recall_at_k(true_label_list, predicted_label_list, k))

        # Track performance score by frequency of instances
        freq = vendor_freq_dict[vendor_id]  # Get the frequency from the combined dataset
        if freq not in frequency_score_dict:
            frequency_score_dict[freq] = []
        frequency_score_dict[freq].append(score)

    # Calculate average score for each frequency
    frequency_avg_score_dict = {freq: np.mean(scores) for freq, scores in frequency_score_dict.items()}

    return frequency_avg_score_dict


def calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings)

    # Combine train and test labels to compute class frequency across the entire dataset
    combined_labels = torch.cat([train_labels, test_labels], dim=0)
    unique_vendors = torch.unique(combined_labels)

    # Compute class frequencies from the entire dataset (train + test)
    vendor_freq_dict = {int(vendor_id): (combined_labels == vendor_id).sum().item() for vendor_id in unique_vendors}

    # Initialize a dict to store scores by frequency
    frequency_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc=f"Calculating MRR@{k}"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]
        if len(test_adsidx) == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]

        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings[np.newaxis, :]

        try:
            D, I = gpu_index_flat.search(test_vendor_embeddings, k)
        except ValueError as e:
            print(f"Error searching for vendor_id {vendor_id} with shape {test_vendor_embeddings.shape}: {e}")
            continue

        scores = []
        for idx, indices in enumerate(I):
            correct_indices = np.where(train_labels.numpy() == vendor_id)[0]
            for rank, index in enumerate(indices, start=1):
                if index in correct_indices:
                    scores.append(1.0 / rank)
                    break
            else:
                scores.append(0.0)

        # Track performance score by frequency of instances
        freq = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if freq not in frequency_score_dict:
            frequency_score_dict[freq] = []
        frequency_score_dict[freq].extend(scores)

    # Calculate average MRR score for each frequency
    frequency_avg_mrr_dict = {freq: np.mean(scores) for freq, scores in frequency_score_dict.items()}

    return frequency_avg_mrr_dict

def generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings)

    # Combine train and test labels to compute class frequency across the entire dataset
    combined_labels = torch.cat([train_labels, test_labels], dim=0)
    unique_vendors = torch.unique(combined_labels)

    # Compute class frequencies from the entire dataset (train + test)
    vendor_freq_dict = {int(vendor_id): (combined_labels == vendor_id).sum().item() for vendor_id in unique_vendors}

    # Initialize a dict to store scores by frequency
    frequency_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating Macro-F1@X"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]

        if isinstance(test_vendor_embeddings, torch.Tensor):
            test_vendor_embeddings = test_vendor_embeddings.numpy()

        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = np.expand_dims(test_vendor_embeddings, axis=0)

        k = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if k > train_embeddings.shape[0]:
            k = train_embeddings.shape[0]

        _, I = gpu_index_flat.search(test_vendor_embeddings, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]

        f1_scores = f1_at_k(true_label_list, predicted_label_list, k)

        # Track performance score by frequency of instances
        freq = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if freq not in frequency_score_dict:
            frequency_score_dict[freq] = []
        frequency_score_dict[freq].extend(f1_scores)

    # Calculate average F1 score for each frequency
    frequency_avg_f1_dict = {freq: np.mean(scores) for freq, scores in frequency_score_dict.items()}

    return frequency_avg_f1_dict

In [17]:
r_precision_dict, mrr_dict = ({} for i in range(2))
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr", city=city_)
    mrr = calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    rprecision = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    # macro_f1_1 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # macro_f1_10 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # macro_f1_100 = calculate_micro_f1_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_average_r_precision = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    macro  = generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels)

    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/mrr/zs_mrr_{city_}.pickle', 'wb') as handle:
        pickle.dump(mrr, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/rprecision/zs_rprecision_{city_}.pickle', 'wb') as handle:
        pickle.dump(rprecision, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/macro-f1/zs_macro_{city_}.pickle', 'wb') as handle:
        pickle.dump(macro, handle, protocol=pickle.HIGHEST_PROTOCOL)
     
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict[city_] = (mrr_mean, mrr_std)
    
#with open('../models/pickled/embeddings/pretrained_declutr/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
#with open('../models/pickled/embeddings/pretrained_declutr/mrr.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

--------------------------------------------------
City: south


Calculating MRR@10: 100%|██████████| 1463/1463 [00:00<00:00, 5056.92it/s]
Calculating R-precision: 100%|██████████| 1463/1463 [00:00<00:00, 3838.33it/s]
Calculating Macro-F1@X: 100%|██████████| 1463/1463 [00:00<00:00, 3769.78it/s]


--------------------------------------------------
City: midwest


Calculating MRR@10: 100%|██████████| 1033/1033 [00:00<00:00, 6895.44it/s]
Calculating R-precision: 100%|██████████| 1033/1033 [00:00<00:00, 5225.17it/s]
Calculating Macro-F1@X: 100%|██████████| 1033/1033 [00:00<00:00, 5057.24it/s]


--------------------------------------------------
City: west


Calculating MRR@10: 100%|██████████| 520/520 [00:00<00:00, 9378.43it/s]
Calculating R-precision: 100%|██████████| 520/520 [00:00<00:00, 7107.28it/s]
Calculating Macro-F1@X: 100%|██████████| 520/520 [00:00<00:00, 6714.77it/s]


--------------------------------------------------
City: northeast


Calculating MRR@10: 100%|██████████| 591/591 [00:00<00:00, 10341.75it/s]
Calculating R-precision: 100%|██████████| 591/591 [00:00<00:00, 8541.63it/s]
Calculating Macro-F1@X: 100%|██████████| 591/591 [00:00<00:00, 8624.70it/s]


In [18]:
with open('/workspace/persistent/HTClipper/results/retrieval/text/mrr/zs_mrr_northeast.pickle', 'rb') as handle:
    mrr_zs = pickle.load(handle)
    
with open('/workspace/persistent/HTClipper/results/retrieval/text/rprecision/zs_rprecision_northeast.pickle', 'rb') as handle:
    rprecision_zs = pickle.load(handle)
    
with open('/workspace/persistent/HTClipper/results/retrieval/text/macro-f1/zs_macro_northeast.pickle', 'rb') as handle:
    macro_zs = pickle.load(handle)

In [19]:
print(sorted({k:round(v,4) for k, v in mrr_zs.items()}.items()))
print(sorted({k:round(v,4) for k, v in rprecision_zs.items()}.items()))
print(sorted({k:round(v,4) for k, v in macro_zs.items()}.items()))

[(2, 0.2217), (3, 0.3012), (4, 0.4056), (5, 0.5215), (6, 0.4979), (7, 0.7466), (8, 0.4358), (9, 0.8819), (10, 0.5841), (11, 0.3368), (12, 0.7432), (13, 1.0), (14, 1.0), (15, 0.5833), (16, 0.5), (17, 0.28), (18, 0.7381), (19, 0.4667), (21, 1.0), (25, 0.2292), (26, 0.5444), (30, 1.0), (34, 1.0), (35, 0.4583), (43, 1.0)]
[(2, 0.2778), (3, 0.2727), (4, 0.4165), (5, 0.4299), (6, 0.4115), (7, 0.6174), (8, 0.3675), (9, 0.6419), (10, 0.43), (11, 0.5833), (12, 0.6826), (13, 0.5), (14, 0.98), (15, 0.4762), (16, 0.62), (17, 0.4667), (18, 0.2958), (19, 0.93), (21, 0.324), (25, 0.0662), (26, 0.4371), (30, 0.94), (34, 0.788), (35, 0.235), (43, 0.5267)]
[(2, 0.1572), (3, 0.1938), (4, 0.3125), (5, 0.3116), (6, 0.3195), (7, 0.5215), (8, 0.2901), (9, 0.5876), (10, 0.3912), (11, 0.3646), (12, 0.6038), (13, 0.48), (14, 0.8107), (15, 0.4753), (16, 0.5536), (17, 0.484), (18, 0.2604), (19, 0.7884), (21, 0.2807), (25, 0.0536), (26, 0.3567), (30, 0.8343), (34, 0.7246), (35, 0.2133), (43, 0.4851)]
