# Importing libraries

In [2]:
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    import os
import gc

import pickle
import numpy as np
from itertools import product

from collections import Counter
from tqdm import tqdm
import numpy as np

import torch
import faiss
import time

import warnings
warnings.filterwarnings('ignore')   

In [3]:
res = faiss.StandardGpuResources()  # use a single GPU

# Loading the trained embeddings

In [4]:
def load_embeddings(model_name, city):
    if model_name == "declutr_ce":
        emb_dir = "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_all/"
    elif model_name == "declutr_supcon":
        emb_dir = "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_SupCononly_all_all/"
    elif model_name == "declutr_triplet":
        emb_dir = "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_tripletonly_all/"
    else:
        emb_dir = "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_CE+contra_all/"
        
    train_label_filename = city + "_labels_train.pt"
    train_data_filename = city + "_data_train.pt"
    test_label_filename = city + "_labels_test.pt"
    test_data_filename = city + "_data_test.pt"
    
    train_emb = torch.load(os.path.join(emb_dir, train_data_filename), map_location=torch.device('cpu'))
    train_labels = torch.load(os.path.join(emb_dir, train_label_filename), map_location=torch.device('cpu'))

    test_emb = torch.load(os.path.join(emb_dir, test_data_filename), map_location=torch.device('cpu'))
    test_labels = torch.load(os.path.join(emb_dir, test_label_filename), map_location=torch.device('cpu'))
    
    return train_emb, train_labels, test_emb, test_labels    

In [5]:
def load_and_combine_embeddings(cities, model_name):
    combined_train_emb, combined_train_labels = [], []
    combined_test_emb, combined_test_labels = [], []

    for city in cities:
        if city == "all":
            continue
        train_emb, train_labels, test_emb, test_labels = load_embeddings(model_name, city)
        combined_train_emb.append(train_emb)
        combined_train_labels.append(train_labels)
        combined_test_emb.append(test_emb)
        combined_test_labels.append(test_labels)

    combined_train_emb = torch.cat(combined_train_emb)
    combined_train_labels = torch.cat(combined_train_labels)
    combined_test_emb = torch.cat(combined_test_emb)
    combined_test_labels = torch.cat(combined_test_labels)

    return combined_train_emb, combined_train_labels, combined_test_emb, combined_test_labels

# R-Precision metrics

In [6]:
def recall_at_k(actual, predicted, k):
    recall_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())  # Convert PyTorch tensor to NumPy array, then to set
        pred_set = set(pred[:k].numpy())  # Convert PyTorch tensor to NumPy array, then to set
        recall_list.append(round(len(act_set & pred_set) / float(len(act_set)), 2))
    return recall_list

def generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    
    train_embeddings_np = train_embeddings.numpy()
    gpu_index_flat.add(train_embeddings_np)
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}

    r_precision_score = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        
        # Ensure test_vendor_embeddings is a 2D array and convert to numpy
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        
        # Check if k is within the valid range
        if k > train_embeddings_np.shape[0]:
            print(f"Warning: k ({k}) is greater than the number of training samples ({train_embeddings_np.shape[0]}), adjusting k to maximum possible.")
            k = train_embeddings_np.shape[0]

        try:
            # Verify k is an integer and not numpy integer
            _, I = gpu_index_flat.search(test_vendor_embeddings_np, int(k))
        except Exception as e:
            print(f"Error during FAISS search for vendor_id {vendor_id}: {e}")
            continue

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
    
    r_precision_mean = np.mean(list(r_precision_score.values()))
    r_precision_std = np.std(list(r_precision_score.values()))

    print(f"R precision mean: {round(r_precision_mean, 4)} ± {round(r_precision_std, 2)}")
    
    return r_precision_mean, r_precision_std

In [7]:
def mrr_at_k(actual, predicted, k_):
    """
    Calculate the Mean Reciprocal Rank (MRR) at K_.
    
    Parameters:
        actual (list of Tensors): List of tensors containing the actual labels.
        predicted (list of Tensors): List of tensors containing the predicted label indices.
        k_ (int): The number of top predictions to consider for computing MRR.
    
    Returns:
        float: The MRR@k.
    """
    mrr = 0.0
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        pred_set = pred[:k_].numpy()
        for i, p in enumerate(pred_set, 1):
            if p in act_set:
                mrr += 1 / i
                break
    return mrr / len(actual) if actual else 0

def generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels, k):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)

    train_embeddings_np = train_embeddings.numpy()
    gpu_index_flat.add(train_embeddings_np)

    unique_labels = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    mrr_score = []

    for label in tqdm(unique_labels, total=len(unique_labels), desc=f"Calculating MRR@{k}"):
        label_id = int(label)
        test_idx = (test_labels == label_id).nonzero(as_tuple=True)[0]

        if len(test_idx) == 0:
            continue

        test_embeddings_np = test_embeddings[test_idx].numpy()

        D, I = gpu_index_flat.search(test_embeddings_np, 1)  # Search for top-1 nearest neighbors

        predicted_label_list = [train_labels[I[index]] for index in range(len(test_idx))]
        true_label_list = [train_labels[np.where(train_labels == label_id)[0]] for _ in range(len(test_idx))]
        
        mrr_score.append(mrr_at_k(true_label_list, predicted_label_list, k_=k))

    mrr_mean = np.mean(mrr_score)
    mrr_std = np.std(mrr_score)
    print(f"MRR@{k} mean: {round(mrr_mean, 4)} ± {round(mrr_std, 2)}")
    
    return mrr_mean, mrr_std

In [8]:
def generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels):
    dim = train_embeddings.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    
    train_embeddings_np = train_embeddings.numpy()
    gpu_index_flat.add(train_embeddings_np)
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels.numpy()

    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}

    r_precision_score = {}
    total_samples = 0  # Total number of samples for all classes

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        
        # Ensure test_vendor_embeddings is a 2D array and convert to numpy
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        
        # Check if k is within the valid range
        if k > train_embeddings_np.shape[0]:
            print(f"Warning: k ({k}) is greater than the number of training samples ({train_embeddings_np.shape[0]}), adjusting k to maximum possible.")
            k = train_embeddings_np.shape[0]

        try:
            # Verify k is an integer and not numpy integer
            _, I = gpu_index_flat.search(test_vendor_embeddings_np, int(k))
        except Exception as e:
            print(f"Error during FAISS search for vendor_id {vendor_id}: {e}")
            continue

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
        total_samples += len(test_adsidx)  # Add the number of samples for this class

    # Calculate the weighted mean of R-Precision
    weighted_r_precision_sum = sum(r_precision_score[vendor_id] * len((test_labels == vendor_id).nonzero(as_tuple=True)[0]) for vendor_id in r_precision_score.keys())
    macro_r_precision = weighted_r_precision_sum / total_samples

    print(f"Macro R-precision: {macro_r_precision}")
    
    return macro_r_precision

In [9]:
def precision_at_k(actual, predicted, k):
    precision_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        pred_set = set(pred[:k].numpy())
        precision_list.append(len(act_set & pred_set) / float(k))
    return precision_list

def f1_at_k(actual, predicted, k):
    f1_list = []
    precision_list = precision_at_k(actual, predicted, k)
    recall_list = recall_at_k(actual, predicted, k)
    for precision, recall in zip(precision_list, recall_list):
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0
        f1_list.append(f1)
    return f1_list

def generate_macro_f1_at_x_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    f1_score_list = []

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating Macro-F1@X"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        f1_score_list.extend(f1_at_k(true_label_list, predicted_label_list, k))

    macro_f1_mean = np.mean(f1_score_list)
    macro_f1_std = np.std(f1_score_list)

    print(f"Macro F1@X: {round(macro_f1_mean, 4)} ± {round(macro_f1_std, 2)}")
    
    return macro_f1_mean, macro_f1_std

In [10]:
# Global variables
dim = None
index = None
train_embeddings_np = None
train_labels_np = None

# @profile
def initialize_globals(train_embeddings, train_labels):
    global dim, index, train_embeddings_np, train_labels_tensor
    start_time = time.time()
    dim = train_embeddings.shape[1]
    index = faiss.IndexFlatIP(dim)
    
    train_embeddings_np = train_embeddings.numpy()
    batch_size = 10000
    num_batches = (train_embeddings_np.shape[0] + batch_size - 1) // batch_size

    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, train_embeddings_np.shape[0])
        batch_embeddings = train_embeddings_np[start_idx:end_idx]
        index.add(batch_embeddings)
        print(f"Added batch {i+1}/{num_batches} to index.")
    
    train_labels_tensor = train_labels
    elapsed_time = time.time() - start_time
    print(f"Global variables initialized in {elapsed_time:.2f} seconds.")

def recall_at_k(actual, predicted, k):
    recall_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        pred_set = set(pred[:k].numpy())
        recall_list.append(len(act_set & pred_set) / float(len(act_set)))
    return recall_list

def generate_rprecision_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    r_precision_score = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
    
    r_precision_mean = np.mean(list(r_precision_score.values()))
    r_precision_std = np.std(list(r_precision_score.values()))

    print(f"R precision mean: {round(r_precision_mean, 4)} ± {round(r_precision_std, 2)}")
    # print(f"R precision std: {r_precision_std}")
    
    return r_precision_mean, r_precision_std

def mrr_at_k(actual, predicted, k_):
    mrr = 0.0
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        for i, p in enumerate(pred[:k_].numpy(), 1):
            if p in act_set:
                mrr += 1 / i
                break
    return mrr / len(actual) if actual else 0

def generate_mrr_at_1_results(test_embeddings, test_labels, k):
    unique_labels = torch.unique(test_labels)
    mrr_score = []

    for label in tqdm(unique_labels, total=len(unique_labels), desc=f"Calculating MRR@{k}"):
        label_id = int(label)
        test_idx = (test_labels == label_id).nonzero(as_tuple=True)[0]

        if len(test_idx) == 0:
            continue

        test_embeddings_np = test_embeddings[test_idx].numpy()

        D, I = index.search(test_embeddings_np, 1)

        predicted_label_list = [train_labels_tensor[I[index]] for index in range(len(test_idx))]
        true_label_list = [train_labels_tensor[torch.where(train_labels_tensor == label_id)[0]] for _ in range(len(test_idx))]
        
        mrr_score.append(mrr_at_k(true_label_list, predicted_label_list, k_=k))

    mrr_mean = np.mean(mrr_score)
    mrr_std = np.std(mrr_score)
    print(f"MRR@{k} mean: {round(mrr_mean, 4)} ± {round(mrr_std, 2)}")
    # print(f"MRR@{k} std: {mrr_std}")
    
    return mrr_mean, mrr_std

def generate_macro_rprecision_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    r_precision_score = {}
    total_samples = 0

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
        total_samples += len(test_adsidx)

    weighted_r_precision_sum = sum(r_precision_score[vendor_id] * len((test_labels == vendor_id).nonzero(as_tuple=True)[0]) for vendor_id in r_precision_score.keys())
    macro_r_precision = weighted_r_precision_sum / total_samples

    print(f"Macro R-precision: {macro_r_precision}")
    
    return macro_r_precision

In [11]:
import torch
import hashlib

def remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels):
    """
    Removes duplicate embeddings between train and test sets using hashing.

    Args:
        train_embeddings (torch.Tensor): Embeddings from the training set.
        train_labels (torch.Tensor): Labels corresponding to the training embeddings.
        test_embeddings (torch.Tensor): Embeddings from the test set.
        test_labels (torch.Tensor): Labels corresponding to the test embeddings.

    Returns:
        unique_train_embeddings (torch.Tensor): Unique embeddings from the training set.
        unique_train_labels (torch.Tensor): Labels corresponding to the unique training embeddings.
        unique_test_embeddings (torch.Tensor): Unique embeddings from the test set (excluding duplicates with train set).
        unique_test_labels (torch.Tensor): Labels corresponding to the unique test embeddings.
    """

    def hash_embedding(embedding):
        # Ensure the embedding is contiguous in memory and of type float32
        embedding = embedding.contiguous().view(-1).float()
        # Round to reduce the impact of floating-point precision errors
        embedding = torch.round(embedding * 1e6) / 1e6  # Adjust precision as needed
        # Convert the embedding to bytes
        emb_bytes = embedding.numpy().tobytes()
        # Compute MD5 hash
        return hashlib.md5(emb_bytes).hexdigest()

    # Create dictionaries mapping hashes to embeddings and labels for the train set
    train_hash_embedding_label_dict = {}
    for emb, label in zip(train_embeddings, train_labels):
        emb_hash = hash_embedding(emb)
        # Store the embedding and label only if the hash is not already in the dictionary
        if emb_hash not in train_hash_embedding_label_dict:
            train_hash_embedding_label_dict[emb_hash] = (emb, label.item())

    # Create dictionaries mapping hashes to embeddings and labels for the test set
    test_hash_embedding_label_dict = {}
    for emb, label in zip(test_embeddings, test_labels):
        emb_hash = hash_embedding(emb)
        # Store the embedding and label only if the hash is not already in the dictionary
        if emb_hash not in test_hash_embedding_label_dict:
            test_hash_embedding_label_dict[emb_hash] = (emb, label.item())

    # Identify common hashes between train and test sets
    common_hashes = set(train_hash_embedding_label_dict.keys()).intersection(set(test_hash_embedding_label_dict.keys()))

    # Remove duplicates from the test set
    unique_test_hashes = set(test_hash_embedding_label_dict.keys()) - common_hashes

    # Reconstruct unique embeddings and labels for the train set
    unique_train_embeddings_list = [emb_label[0] for emb_label in train_hash_embedding_label_dict.values()]
    unique_train_labels_list = [emb_label[1] for emb_label in train_hash_embedding_label_dict.values()]

    # Reconstruct unique embeddings and labels for the test set
    unique_test_embeddings_list = [test_hash_embedding_label_dict[h][0] for h in unique_test_hashes]
    unique_test_labels_list = [test_hash_embedding_label_dict[h][1] for h in unique_test_hashes]

    # Convert lists to tensors
    unique_train_embeddings = torch.stack(unique_train_embeddings_list)
    unique_train_labels = torch.tensor(unique_train_labels_list)

    unique_test_embeddings = torch.stack(unique_test_embeddings_list)
    unique_test_labels = torch.tensor(unique_test_labels_list)

    return unique_train_embeddings, unique_train_labels, unique_test_embeddings, unique_test_labels

In [12]:
def find_members(train_embeddings, train_labels, test_embeddings, test_labels):
    """
    Filters test_embeddings and test_labels by removing entries whose labels do not exist in train_labels.

    Parameters:
    - train_embeddings (torch.Tensor): Embeddings for the training data.
    - train_labels (torch.Tensor): Labels for the training data.
    - test_embeddings (torch.Tensor): Embeddings for the test data.
    - test_labels (torch.Tensor): Labels for the test data.

    Returns:
    - train_embeddings (torch.Tensor): Original training embeddings (unchanged).
    - train_labels (torch.Tensor): Original training labels (unchanged).
    - filtered_test_embeddings (torch.Tensor): Filtered test embeddings.
    - filtered_test_labels (torch.Tensor): Filtered test labels.
    """

    # Ensure labels are on the same device
    if train_labels.device != test_labels.device:
        train_labels = train_labels.to(test_labels.device)

    # Use torch.isin to create a mask of test labels that exist in train labels
    if hasattr(torch, 'isin'):
        # torch.isin is available in PyTorch 1.10 and later
        mask = torch.isin(test_labels, train_labels)
    else:
        # For older versions of PyTorch, convert to NumPy arrays
        train_labels_np = train_labels.cpu().numpy()
        test_labels_np = test_labels.cpu().numpy()
        mask_np = np.isin(test_labels_np, train_labels_np)
        mask = torch.from_numpy(mask_np).to(test_labels.device)

    # Apply the mask to filter test embeddings and labels
    filtered_test_embeddings = test_embeddings[mask]
    filtered_test_labels = test_labels[mask]

    return train_embeddings, train_labels, filtered_test_embeddings, filtered_test_labels

# Declutr model

In [None]:
# All
r_precision_dict, mrr_dict1, mrr_dict10, mrr_dict100 = ({} for i in range(4))
mean_total, std_total = 0, 0
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_triplet", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)
    
    initialize_globals(train_embeddings, train_labels)
    try:
        mean, std = generate_mrr_at_1_results(test_embeddings, test_labels, 1)
        mean, std = generate_rprecision_results(test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean, std  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        mean_total += mean
        std_total += std
    except Exception as e:
        print(f"Error processing {city}: {e}")
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()
    
    # _, _ = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    # mrr_mean_1, mrr_std_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # _, _ = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # mrr_mean_100, mrr_std_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_f_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 1)
    # macro_f_10 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 10)
    # macro_f_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 100)
    # _ = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict1[city_] = (mrr_mean_1, mrr_std_1)
    # mrr_dict10[city_] = (mrr_mean_10, mrr_std_10)
    # mrr_dict100[city_] = (mrr_mean_100, mrr_std_100)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr1.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict1, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr10.pickle', 'wb') as handle:
#     pickle.dump(mrr_dict10, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr100.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict100, handle, protocol=pickle.HIGHEST_PROTOCOL)

print(f"Mean average: {round(float(mean_total / 7), 4)} ± {round(float(std_total / 7), 2)}")

In [18]:
# All
r_precision_dict, mrr_dict1, mrr_dict10, mrr_dict100 = ({} for i in range(4))
mean_total, std_total = 0, 0
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_supcon", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # Normalize training embeddings
    faiss.normalize_L2(train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    faiss.normalize_L2(test_embeddings.detach().cpu().numpy())
    
    initialize_globals(train_embeddings, train_labels)
    try:
        mean, std = generate_mrr_at_1_results(test_embeddings, test_labels, 1)
        mean, std = generate_rprecision_results(test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean, std  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        mean_total += mean
        std_total += std
    except Exception as e:
        print(f"Error processing {city}: {e}")
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()
    
    # _, _ = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    # mrr_mean_1, mrr_std_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # _, _ = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # mrr_mean_100, mrr_std_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_f_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 1)
    # macro_f_10 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 10)
    # macro_f_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 100)
    # _ = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict1[city_] = (mrr_mean_1, mrr_std_1)
    # mrr_dict10[city_] = (mrr_mean_10, mrr_std_10)
    # mrr_dict100[city_] = (mrr_mean_100, mrr_std_100)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr1.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict1, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr10.pickle', 'wb') as handle:
#     pickle.dump(mrr_dict10, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr100.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict100, handle, protocol=pickle.HIGHEST_PROTOCOL)

print(f"Mean average: {round(float(mean_total / 7), 4)} ± {round(float(std_total / 7), 2)}")

--------------------------------------------------
City: south
Added batch 1/2 to index.
Added batch 2/2 to index.
Global variables initialized in 0.04 seconds.


Calculating MRR@1: 100%|██████████| 964/964 [00:10<00:00, 96.24it/s] 


MRR@1 mean: 0.9163 ± 0.25


Calculating R-precision: 100%|██████████| 964/964 [00:10<00:00, 95.11it/s] 


R precision mean: 0.8099 ± 0.26


Calculating Macro-F1@X: 100%|██████████| 964/964 [00:10<00:00, 94.17it/s] 


Macro F1@X: 0.8446 ± 0.24
--------------------------------------------------
City: midwest
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@1: 100%|██████████| 624/624 [00:04<00:00, 145.48it/s]


MRR@1 mean: 0.7932 ± 0.37


Calculating R-precision: 100%|██████████| 624/624 [00:04<00:00, 142.14it/s]


R precision mean: 0.6615 ± 0.36


Calculating Macro-F1@X: 100%|██████████| 624/624 [00:04<00:00, 143.23it/s]


Macro F1@X: 0.6544 ± 0.36
--------------------------------------------------
City: west
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@1: 100%|██████████| 259/259 [00:00<00:00, 333.51it/s]


MRR@1 mean: 0.868 ± 0.32


Calculating R-precision: 100%|██████████| 259/259 [00:00<00:00, 336.69it/s]


R precision mean: 0.8013 ± 0.33


Calculating Macro-F1@X: 100%|██████████| 259/259 [00:00<00:00, 329.07it/s]


Macro F1@X: 0.766 ± 0.3
--------------------------------------------------
City: northeast
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@1: 100%|██████████| 293/293 [00:00<00:00, 443.44it/s]


MRR@1 mean: 0.776 ± 0.4


Calculating R-precision: 100%|██████████| 293/293 [00:00<00:00, 441.37it/s]


R precision mean: 0.707 ± 0.4


Calculating Macro-F1@X: 100%|██████████| 293/293 [00:00<00:00, 440.76it/s]


Macro F1@X: 0.7279 ± 0.38
Mean average: 0.4276 ± 0.18


In [14]:
# All
r_precision_dict, mrr_dict1, mrr_dict10, mrr_dict100 = ({} for i in range(4))
mean_total, std_total = 0, 0
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_ce", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    
    initialize_globals(train_embeddings, train_labels)
    try:
        mean, std = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean, std = generate_rprecision_results(test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean, std  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        mean_total += mean
        std_total += std
    except Exception as e:
        print(f"Error processing {city}: {e}")
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()
    
    # _, _ = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    # mrr_mean_1, mrr_std_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # _, _ = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # mrr_mean_100, mrr_std_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_f_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 1)
    # macro_f_10 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 10)
    # macro_f_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 100)
    # _ = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict1[city_] = (mrr_mean_1, mrr_std_1)
    # mrr_dict10[city_] = (mrr_mean_10, mrr_std_10)
    # mrr_dict100[city_] = (mrr_mean_100, mrr_std_100)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr1.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict1, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr10.pickle', 'wb') as handle:
#     pickle.dump(mrr_dict10, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr100.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict100, handle, protocol=pickle.HIGHEST_PROTOCOL)

print(f"Mean average: {round(float(mean_total / 7), 4)} ± {round(float(std_total / 7), 2)}")

--------------------------------------------------
City: south
Added batch 1/2 to index.
Added batch 2/2 to index.
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 975/975 [00:10<00:00, 93.14it/s] 


MRR@10 mean: 0.7361 ± 0.4


Calculating R-precision: 100%|██████████| 975/975 [00:10<00:00, 93.71it/s] 


R precision mean: 0.5557 ± 0.36


Calculating Macro-F1@X: 100%|██████████| 975/975 [00:10<00:00, 95.81it/s] 


Macro F1@X: 0.6098 ± 0.35
--------------------------------------------------
City: midwest
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 633/633 [00:04<00:00, 146.65it/s]


MRR@10 mean: 0.5622 ± 0.46


Calculating R-precision: 100%|██████████| 633/633 [00:04<00:00, 148.89it/s]


R precision mean: 0.4596 ± 0.4


Calculating Macro-F1@X: 100%|██████████| 633/633 [00:04<00:00, 149.27it/s]


Macro F1@X: 0.476 ± 0.38
--------------------------------------------------
City: west
Added batch 1/1 to index.
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 268/268 [00:00<00:00, 350.07it/s]


MRR@10 mean: 0.6179 ± 0.46


Calculating R-precision: 100%|██████████| 268/268 [00:00<00:00, 347.56it/s]


R precision mean: 0.5842 ± 0.41


Calculating Macro-F1@X: 100%|██████████| 268/268 [00:00<00:00, 314.05it/s]


Macro F1@X: 0.6123 ± 0.35
--------------------------------------------------
City: northeast
Added batch 1/1 to index.
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 309/309 [00:00<00:00, 448.55it/s]


MRR@10 mean: 0.5558 ± 0.49


Calculating R-precision: 100%|██████████| 309/309 [00:00<00:00, 467.77it/s]


R precision mean: 0.4944 ± 0.43


Calculating Macro-F1@X: 100%|██████████| 309/309 [00:00<00:00, 462.73it/s]

Macro F1@X: 0.5042 ± 0.42
Mean average: 0.3146 ± 0.22





In [19]:
# All
r_precision_dict, mrr_dict1, mrr_dict10, mrr_dict100 = ({} for i in range(4))
mean_total, std_total = 0, 0
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_ce", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # Normalize training embeddings
    faiss.normalize_L2(train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    faiss.normalize_L2(test_embeddings.detach().cpu().numpy())
    
    initialize_globals(train_embeddings, train_labels)
    try:
        mean, std = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean, std = generate_rprecision_results(test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean, std  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        mean_total += mean
        std_total += std
    except Exception as e:
        print(f"Error processing {city}: {e}")
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()
    
    # _, _ = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    # mrr_mean_1, mrr_std_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # _, _ = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # mrr_mean_100, mrr_std_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_f_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 1)
    # macro_f_10 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 10)
    # macro_f_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 100)
    # _ = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict1[city_] = (mrr_mean_1, mrr_std_1)
    # mrr_dict10[city_] = (mrr_mean_10, mrr_std_10)
    # mrr_dict100[city_] = (mrr_mean_100, mrr_std_100)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr1.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict1, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr10.pickle', 'wb') as handle:
#     pickle.dump(mrr_dict10, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr100.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict100, handle, protocol=pickle.HIGHEST_PROTOCOL)

print(f"Mean average: {round(float(mean_total / 7), 4)} ± {round(float(std_total / 7), 2)}")

--------------------------------------------------
City: south
Added batch 1/2 to index.
Added batch 2/2 to index.
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 964/964 [00:10<00:00, 96.15it/s] 


MRR@10 mean: 0.8759 ± 0.29


Calculating R-precision: 100%|██████████| 964/964 [00:10<00:00, 94.98it/s] 


R precision mean: 0.685 ± 0.32


Calculating Macro-F1@X: 100%|██████████| 964/964 [00:10<00:00, 93.85it/s] 


Macro F1@X: 0.7171 ± 0.31
--------------------------------------------------
City: midwest
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 624/624 [00:04<00:00, 141.06it/s]


MRR@10 mean: 0.7303 ± 0.41


Calculating R-precision: 100%|██████████| 624/624 [00:04<00:00, 142.15it/s]


R precision mean: 0.6074 ± 0.39


Calculating Macro-F1@X: 100%|██████████| 624/624 [00:04<00:00, 142.09it/s]


Macro F1@X: 0.6012 ± 0.38
--------------------------------------------------
City: west
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 259/259 [00:00<00:00, 342.06it/s]


MRR@10 mean: 0.8155 ± 0.37


Calculating R-precision: 100%|██████████| 259/259 [00:00<00:00, 328.68it/s]


R precision mean: 0.7463 ± 0.37


Calculating Macro-F1@X: 100%|██████████| 259/259 [00:00<00:00, 332.38it/s]


Macro F1@X: 0.7242 ± 0.32
--------------------------------------------------
City: northeast
Added batch 1/1 to index.
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 293/293 [00:00<00:00, 460.86it/s]


MRR@10 mean: 0.7332 ± 0.43


Calculating R-precision: 100%|██████████| 293/293 [00:00<00:00, 439.45it/s]


R precision mean: 0.6401 ± 0.41


Calculating Macro-F1@X: 100%|██████████| 293/293 [00:00<00:00, 448.57it/s]

Macro F1@X: 0.6542 ± 0.4
Mean average: 0.3852 ± 0.2





In [15]:
# All
r_precision_dict, mrr_dict1, mrr_dict10, mrr_dict100 = ({} for i in range(4))
mean_total, std_total = 0, 0
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_supcon", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    
    initialize_globals(train_embeddings, train_labels)
    try:
        mean, std = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean, std = generate_rprecision_results(test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean, std  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        mean_total += mean
        std_total += std
    except Exception as e:
        print(f"Error processing {city}: {e}")
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()
    
    # _, _ = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    # mrr_mean_1, mrr_std_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # _, _ = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # mrr_mean_100, mrr_std_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_f_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 1)
    # macro_f_10 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 10)
    # macro_f_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 100)
    # _ = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict1[city_] = (mrr_mean_1, mrr_std_1)
    # mrr_dict10[city_] = (mrr_mean_10, mrr_std_10)
    # mrr_dict100[city_] = (mrr_mean_100, mrr_std_100)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr1.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict1, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr10.pickle', 'wb') as handle:
#     pickle.dump(mrr_dict10, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr100.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict100, handle, protocol=pickle.HIGHEST_PROTOCOL)

print(f"Mean average: {round(float(mean_total / 7), 4)} ± {round(float(std_total / 7), 2)}")

--------------------------------------------------
City: south
Added batch 1/2 to index.
Added batch 2/2 to index.
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 975/975 [00:10<00:00, 90.30it/s] 


MRR@10 mean: 0.8729 ± 0.3


Calculating R-precision: 100%|██████████| 975/975 [00:10<00:00, 90.07it/s] 


R precision mean: 0.7673 ± 0.29


Calculating Macro-F1@X: 100%|██████████| 975/975 [00:10<00:00, 90.85it/s] 


Macro F1@X: 0.8157 ± 0.27
--------------------------------------------------
City: midwest
Added batch 1/1 to index.
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 633/633 [00:04<00:00, 150.20it/s]


MRR@10 mean: 0.7527 ± 0.4


Calculating R-precision: 100%|██████████| 633/633 [00:04<00:00, 150.80it/s]


R precision mean: 0.6346 ± 0.37


Calculating Macro-F1@X: 100%|██████████| 633/633 [00:04<00:00, 151.71it/s]


Macro F1@X: 0.6333 ± 0.36
--------------------------------------------------
City: west
Added batch 1/1 to index.
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 268/268 [00:00<00:00, 344.82it/s]


MRR@10 mean: 0.8051 ± 0.38


Calculating R-precision: 100%|██████████| 268/268 [00:00<00:00, 342.92it/s]


R precision mean: 0.7612 ± 0.35


Calculating Macro-F1@X: 100%|██████████| 268/268 [00:00<00:00, 349.95it/s]


Macro F1@X: 0.7408 ± 0.31
--------------------------------------------------
City: northeast
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 309/309 [00:00<00:00, 484.23it/s]


MRR@10 mean: 0.7131 ± 0.44


Calculating R-precision: 100%|██████████| 309/309 [00:00<00:00, 479.03it/s]


R precision mean: 0.6707 ± 0.41


Calculating Macro-F1@X: 100%|██████████| 309/309 [00:00<00:00, 491.58it/s]


Macro F1@X: 0.695 ± 0.39
Mean average: 0.4121 ± 0.19


In [25]:
# All
r_precision_dict, mrr_dict1, mrr_dict10, mrr_dict100 = ({} for i in range(4))
mean_total, std_total = 0, 0
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_supcon", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # Normalize training embeddings
    faiss.normalize_L2(train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    faiss.normalize_L2(test_embeddings.detach().cpu().numpy())
    
    initialize_globals(train_embeddings, train_labels)
    try:
        mean, std = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean, std = generate_rprecision_results(test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean, std  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        mean_total += mean
        std_total += std
    except Exception as e:
        print(f"Error processing {city}: {e}")
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()
    
    # _, _ = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    # mrr_mean_1, mrr_std_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=1)
    # _, _ = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels, k=10)
    # mrr_mean_100, mrr_std_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, k=100)
    # macro_f_1 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 1)
    # macro_f_10 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 10)
    # macro_f_100 = generate_macro_f1_at_k_results(train_embeddings, train_labels, test_embeddings, test_labels, 100)
    # _ = generate_macro_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    # r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    # mrr_dict1[city_] = (mrr_mean_1, mrr_std_1)
    # mrr_dict10[city_] = (mrr_mean_10, mrr_std_10)
    # mrr_dict100[city_] = (mrr_mean_100, mrr_std_100)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/rprecision.pickle', 'wb') as handle:
#    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr1.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict1, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr10.pickle', 'wb') as handle:
#     pickle.dump(mrr_dict10, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
# with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr100.pickle', 'wb') as handle:
#    pickle.dump(mrr_dict100, handle, protocol=pickle.HIGHEST_PROTOCOL)

print(f"Mean average: {round(float(mean_total / 7), 4)} ± {round(float(std_total / 7), 2)}")

--------------------------------------------------
City: south
Added batch 1/2 to index.
Added batch 2/2 to index.
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 964/964 [00:10<00:00, 95.11it/s] 


MRR@10 mean: 0.9163 ± 0.25


Calculating R-precision: 100%|██████████| 964/964 [00:10<00:00, 93.82it/s] 


R precision mean: 0.8099 ± 0.26


Calculating Macro-F1@X: 100%|██████████| 964/964 [00:10<00:00, 92.13it/s] 


Macro F1@X: 0.8446 ± 0.24
--------------------------------------------------
City: midwest
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 624/624 [00:04<00:00, 144.98it/s]


MRR@10 mean: 0.7932 ± 0.37


Calculating R-precision: 100%|██████████| 624/624 [00:04<00:00, 146.21it/s]


R precision mean: 0.6615 ± 0.36


Calculating Macro-F1@X: 100%|██████████| 624/624 [00:04<00:00, 144.74it/s]


Macro F1@X: 0.6544 ± 0.36
--------------------------------------------------
City: west
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 259/259 [00:00<00:00, 341.05it/s]


MRR@10 mean: 0.868 ± 0.32


Calculating R-precision: 100%|██████████| 259/259 [00:00<00:00, 326.25it/s]


R precision mean: 0.8013 ± 0.33


Calculating Macro-F1@X: 100%|██████████| 259/259 [00:00<00:00, 327.71it/s]


Macro F1@X: 0.766 ± 0.3
--------------------------------------------------
City: northeast
Added batch 1/1 to index.
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 293/293 [00:00<00:00, 448.98it/s]


MRR@10 mean: 0.776 ± 0.4


Calculating R-precision: 100%|██████████| 293/293 [00:00<00:00, 435.56it/s]


R precision mean: 0.707 ± 0.4


Calculating Macro-F1@X: 100%|██████████| 293/293 [00:00<00:00, 442.81it/s]

Macro F1@X: 0.7279 ± 0.38
Mean average: 0.4276 ± 0.18





In [33]:
# all
r_precision_dict, mrr_dict = ({} for i in range(2))
for city_ in ["chicago", "atlanta", "detroit", "houston", "dallas", "ny", "df", "canada"]:
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr", city=city_)
    r_precision_mean, r_precision_std = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    mrr_mean, mrr_std = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    mrr_dict[city_] = (mrr_mean, mrr_std)
    
with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/rprecision.pickle', 'wb') as handle:
    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('../models/pickled/embeddings/pooled_trained_declutr_chicago/mrr.pickle', 'wb') as handle:
    pickle.dump(mrr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

City: chicago


Calculating R-precision: 100%|██████████| 490/490 [00:00<00:00, 2575.61it/s]


R precision mean: 0.9616027318499994
R precision std: 0.17722079615936118


Calculating MRR@1: 100%|██████████| 490/490 [00:00<00:00, 3591.85it/s]


MRR@1 mean: 0.9472761037539362
MRR@1 std: 0.2189324097600345
City: atlanta


Calculating R-precision: 100%|██████████| 471/471 [00:00<00:00, 3663.08it/s]


R precision mean: 0.9577687978687979
R precision std: 0.177517277763828


Calculating MRR@1: 100%|██████████| 471/471 [00:00<00:00, 4254.47it/s]


MRR@1 mean: 0.936395805185614
MRR@1 std: 0.24103923617661122
City: detroit


Calculating R-precision: 100%|██████████| 153/153 [00:00<00:00, 3908.57it/s]


R precision mean: 0.9662666666666666
R precision std: 0.14521499769498866


Calculating MRR@1: 100%|██████████| 153/153 [00:00<00:00, 4500.04it/s]


MRR@1 mean: 0.9673202614379085
MRR@1 std: 0.1777970001141888
City: houston


Calculating R-precision: 100%|██████████| 422/422 [00:00<00:00, 3042.14it/s]


R precision mean: 0.9538112315672145
R precision std: 0.18651454272879214


Calculating MRR@1: 100%|██████████| 422/422 [00:00<00:00, 4032.50it/s]


MRR@1 mean: 0.9395734597156398
MRR@1 std: 0.23226006208877056
City: dallas


Calculating R-precision: 100%|██████████| 377/377 [00:00<00:00, 3775.97it/s]


R precision mean: 0.9497255440637793
R precision std: 0.20059073886018863


Calculating MRR@1: 100%|██████████| 377/377 [00:00<00:00, 4337.93it/s]


MRR@1 mean: 0.925287356321839
MRR@1 std: 0.25940053363222754
City: ny


Calculating R-precision: 100%|██████████| 309/309 [00:00<00:00, 4381.65it/s]


R precision mean: 0.9736405005688281
R precision std: 0.13597950278647924


Calculating MRR@1: 100%|██████████| 309/309 [00:00<00:00, 4759.92it/s]


MRR@1 mean: 0.9352750809061489
MRR@1 std: 0.24603984218444291
City: df


Calculating R-precision: 100%|██████████| 268/268 [00:00<00:00, 3575.32it/s]

R precision mean: 0.9575560506441259
R precision std: 0.19128800864810627



Calculating MRR@1: 100%|██████████| 268/268 [00:00<00:00, 4049.18it/s]


MRR@1 mean: 0.9291044776119403
MRR@1 std: 0.2566502431196663
City: canada


Calculating R-precision: 100%|██████████| 149/149 [00:00<00:00, 3611.77it/s]


R precision mean: 0.8974538690476191
R precision std: 0.27297316875569755


Calculating MRR@1: 100%|██████████| 149/149 [00:00<00:00, 4639.79it/s]


MRR@1 mean: 0.8559843400447427
MRR@1 std: 0.34306553343642904


# Style Embedding model

In [7]:
r_precision_dict, mrr_dict = ({} for i in range(2))
for city_ in ["chicago", "atlanta", "detroit", "houston", "dallas", "ny", "df", "canada"]:
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="styleEmbedding", city=city_)
    r_precision_mean, r_precision_std = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
    mrr_mean, mrr_std = generate_mrr_at_1_results(train_embeddings, train_labels, test_embeddings, test_labels)
    
    r_precision_dict[city_] = (r_precision_mean, r_precision_std)
    mrr_dict[city_] = (mrr_mean, mrr_std)
    
with open('../models/pickled/embeddings/trained_styleEmbedding_chicago/rprecision.pickle', 'wb') as handle:
    pickle.dump(r_precision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('../models/pickled/embeddings/trained_styleEmbedding_chicago/mrr.pickle', 'wb') as handle:
    pickle.dump(mrr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

City: chicago


Calculating R-precision: 100%|██████████| 490/490 [00:00<00:00, 2650.47it/s]


R precision mean: 0.23570085502060392
R precision std: 0.3501274544330388


Calculating MRR@1: 100%|██████████| 490/490 [00:00<00:00, 3643.15it/s]


MRR@1 mean: 0.26969009826152684
MRR@1 std: 0.4225519481173043
City: atlanta


Calculating R-precision: 100%|██████████| 471/471 [00:00<00:00, 3690.95it/s]


R precision mean: 0.17019464305535734
R precision std: 0.28286919583435144


Calculating MRR@1: 100%|██████████| 471/471 [00:00<00:00, 4248.89it/s]


MRR@1 mean: 0.1995018428139447
MRR@1 std: 0.36392880819794504
City: detroit


Calculating R-precision: 100%|██████████| 153/153 [00:00<00:00, 4009.73it/s]


R precision mean: 0.18867699134199134
R precision std: 0.29214815049472875


Calculating MRR@1: 100%|██████████| 153/153 [00:00<00:00, 4530.57it/s]


MRR@1 mean: 0.22020249177111925
MRR@1 std: 0.38491030684397226
City: houston


Calculating R-precision: 100%|██████████| 422/422 [00:00<00:00, 3339.72it/s]


R precision mean: 0.12468577303016073
R precision std: 0.2483092055923549


Calculating MRR@1: 100%|██████████| 422/422 [00:00<00:00, 4005.73it/s]


MRR@1 mean: 0.15329012746074355
MRR@1 std: 0.32737294316489385
City: dallas


Calculating R-precision: 100%|██████████| 377/377 [00:00<00:00, 3614.85it/s]


R precision mean: 0.15565606686461567
R precision std: 0.2659244798996246


Calculating MRR@1: 100%|██████████| 377/377 [00:00<00:00, 4307.46it/s]


MRR@1 mean: 0.20627963202603875
MRR@1 std: 0.3736196093193114
City: ny


Calculating R-precision: 100%|██████████| 309/309 [00:00<00:00, 4340.68it/s]


R precision mean: 0.18537437022590605
R precision std: 0.30974707348257785


Calculating MRR@1: 100%|██████████| 309/309 [00:00<00:00, 4853.14it/s]


MRR@1 mean: 0.18756356911696717
MRR@1 std: 0.3691417264328915
City: df


Calculating R-precision: 100%|██████████| 268/268 [00:00<00:00, 2366.71it/s]


R precision mean: 0.21147590539446448
R precision std: 0.3147223322755013


Calculating MRR@1: 100%|██████████| 268/268 [00:00<00:00, 3244.06it/s]


MRR@1 mean: 0.21212465651117976
MRR@1 std: 0.3843673084707061
City: canada


Calculating R-precision: 100%|██████████| 149/149 [00:00<00:00, 4351.00it/s]


R precision mean: 0.07708698979591837
R precision std: 0.25209573579789857


Calculating MRR@1: 100%|██████████| 149/149 [00:00<00:00, 5080.04it/s]


MRR@1 mean: 0.07088926174496644
MRR@1 std: 0.24583972547768784


# Modified scripts with class frequencies and returned dictionaries

In [75]:
def recall_at_k(actual, predicted, k):
    recall_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())  # Convert PyTorch tensor to NumPy array, then to set
        pred_set = set(pred[:k].numpy())  # Convert PyTorch tensor to NumPy array, then to set
        
        # Check if the actual set is empty to avoid division by zero
        if len(act_set) == 0:
            recall_list.append(0.0)  # Assign recall as 0 if there are no true labels
        else:
            recall_list.append(round(len(act_set & pred_set) / float(len(act_set)), 2))
    
    return recall_list


def generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels):
    # Convert embeddings from PyTorch tensors to NumPy arrays
    train_embeddings_np = train_embeddings.cpu().numpy()
    test_embeddings_np = test_embeddings.cpu().numpy()
    
    dim = train_embeddings_np.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings_np)

    # Combine train and test labels to compute class frequency across the entire dataset
    combined_labels = torch.cat([train_labels, test_labels], dim=0)
    unique_vendors = torch.unique(combined_labels)
    
    # Compute class frequencies from the entire dataset (train + test)
    vendor_freq_dict = {int(vendor_id): (combined_labels == vendor_id).sum().item() for vendor_id in unique_vendors}

    # Initialize a dict to store scores by frequency
    frequency_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0:
            continue

        test_vendor_embeddings = test_embeddings_np[test_adsidx]

        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = np.expand_dims(test_vendor_embeddings, axis=0)

        k = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if k == 0:
            continue

        try:
            _, I = gpu_index_flat.search(test_vendor_embeddings, k)
        except ValueError as e:
            print(f"Error searching for vendor_id {vendor_id} with shape {test_vendor_embeddings.shape}: {e}")
            continue

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]

        score = np.mean(recall_at_k(true_label_list, predicted_label_list, k))

        # Track performance score by frequency of instances
        freq = vendor_freq_dict[vendor_id]  # Get the frequency from the combined dataset
        if freq not in frequency_score_dict:
            frequency_score_dict[freq] = []
        frequency_score_dict[freq].append(score)

    # Calculate average score for each frequency
    frequency_avg_score_dict = {freq: np.mean(scores) for freq, scores in frequency_score_dict.items()}

    return frequency_avg_score_dict


def calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, k=10):
    # Convert embeddings from PyTorch tensors to NumPy arrays
    train_embeddings_np = train_embeddings.cpu().numpy()
    test_embeddings_np = test_embeddings.cpu().numpy()
    
    dim = train_embeddings_np.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings_np)

    # Combine train and test labels to compute class frequency across the entire dataset
    combined_labels = torch.cat([train_labels, test_labels], dim=0)
    unique_vendors = torch.unique(combined_labels)

    # Compute class frequencies from the entire dataset (train + test)
    vendor_freq_dict = {int(vendor_id): (combined_labels == vendor_id).sum().item() for vendor_id in unique_vendors}

    # Initialize a dict to store scores by frequency
    frequency_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc=f"Calculating MRR@{k}"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]
        if len(test_adsidx) == 0:
            continue

        test_vendor_embeddings = test_embeddings_np[test_adsidx]

        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = np.expand_dims(test_vendor_embeddings, axis=0)

        try:
            D, I = gpu_index_flat.search(test_vendor_embeddings, k)
        except ValueError as e:
            print(f"Error searching for vendor_id {vendor_id} with shape {test_vendor_embeddings.shape}: {e}")
            continue

        scores = []
        for idx, indices in enumerate(I):
            correct_indices = np.where(train_labels.numpy() == vendor_id)[0]
            for rank, index in enumerate(indices, start=1):
                if index in correct_indices:
                    scores.append(1.0 / rank)
                    break
            else:
                scores.append(0.0)

        # Track performance score by frequency of instances
        freq = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if freq not in frequency_score_dict:
            frequency_score_dict[freq] = []
        frequency_score_dict[freq].extend(scores)

    # Calculate average MRR score for each frequency
    frequency_avg_mrr_dict = {freq: np.mean(scores) for freq, scores in frequency_score_dict.items()}

    return frequency_avg_mrr_dict


def generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels):
    # Convert embeddings from PyTorch tensors to NumPy arrays
    train_embeddings_np = train_embeddings.cpu().numpy()
    test_embeddings_np = test_embeddings.cpu().numpy()
    
    dim = train_embeddings_np.shape[1]
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatIP(dim)
    gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index_flat.add(train_embeddings_np)

    # Combine train and test labels to compute class frequency across the entire dataset
    combined_labels = torch.cat([train_labels, test_labels], dim=0)
    unique_vendors = torch.unique(combined_labels)

    # Compute class frequencies from the entire dataset (train + test)
    vendor_freq_dict = {int(vendor_id): (combined_labels == vendor_id).sum().item() for vendor_id in unique_vendors}

    # Initialize a dict to store scores by frequency
    frequency_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating Macro-F1@X"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0:
            continue

        test_vendor_embeddings = test_embeddings_np[test_adsidx]

        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = np.expand_dims(test_vendor_embeddings, axis=0)

        k = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = gpu_index_flat.search(test_vendor_embeddings, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels == vendor_id)[0] for _ in range(len(test_adsidx))]

        f1_scores = f1_at_k(true_label_list, predicted_label_list, k)

        # Track performance score by frequency of instances
        freq = vendor_freq_dict[vendor_id]  # Use the frequency from the combined dataset
        if freq not in frequency_score_dict:
            frequency_score_dict[freq] = []
        frequency_score_dict[freq].extend(f1_scores)

    # Calculate average F1 score for each frequency
    frequency_avg_f1_dict = {freq: np.mean(scores) for freq, scores in frequency_score_dict.items()}

    return frequency_avg_f1_dict

In [76]:
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_ce", city=city_)
    
    try:
        mrr = calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, 10)
        rprecision = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        macro  = generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels)
    except Exception as e:
        print(f"Error processing {city_}: {e}")
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/mrr/trained_ce_mrr_{city_}.pickle', 'wb') as handle:
        pickle.dump(mrr, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/rprecision/trained_ce_rprecision_{city_}.pickle', 'wb') as handle:
        pickle.dump(rprecision, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/macro-f1/trained_ce_macro_{city_}.pickle', 'wb') as handle:
        pickle.dump(macro, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------
City: south


Calculating MRR@10: 100%|██████████| 1463/1463 [00:00<00:00, 5327.20it/s]
Calculating R-precision: 100%|██████████| 1463/1463 [00:00<00:00, 3773.61it/s]
Calculating Macro-F1@X: 100%|██████████| 1463/1463 [00:00<00:00, 3746.13it/s]


--------------------------------------------------
City: midwest


Calculating MRR@10: 100%|██████████| 1033/1033 [00:00<00:00, 6908.93it/s]
Calculating R-precision: 100%|██████████| 1033/1033 [00:00<00:00, 5056.07it/s]
Calculating Macro-F1@X: 100%|██████████| 1033/1033 [00:00<00:00, 5006.04it/s]


--------------------------------------------------
City: west


Calculating MRR@10: 100%|██████████| 520/520 [00:00<00:00, 9556.94it/s]
Calculating R-precision: 100%|██████████| 520/520 [00:00<00:00, 6788.25it/s]
Calculating Macro-F1@X: 100%|██████████| 520/520 [00:00<00:00, 6729.44it/s]


--------------------------------------------------
City: northeast


Calculating MRR@10: 100%|██████████| 591/591 [00:00<00:00, 10672.07it/s]
Calculating R-precision: 100%|██████████| 591/591 [00:00<00:00, 8218.40it/s]
Calculating Macro-F1@X: 100%|██████████| 591/591 [00:00<00:00, 8771.81it/s]


In [85]:
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_supcon", city=city_)
    
    try:
        mrr = calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, 10)
        rprecision = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        macro  = generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels)
    except Exception as e:
        print(f"Error processing {city_}: {e}")
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/mrr/trained_supcon_mrr_{city_}.pickle', 'wb') as handle:
        pickle.dump(mrr, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/rprecision/trained_supcon_rprecision_{city_}.pickle', 'wb') as handle:
        pickle.dump(rprecision, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/macro-f1/trained_supcon_macro_{city_}.pickle', 'wb') as handle:
        pickle.dump(macro, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------
City: south


Calculating MRR@10: 100%|██████████| 1463/1463 [00:00<00:00, 5513.89it/s]
Calculating R-precision: 100%|██████████| 1463/1463 [00:00<00:00, 3802.41it/s]
Calculating Macro-F1@X: 100%|██████████| 1463/1463 [00:00<00:00, 3755.45it/s]


--------------------------------------------------
City: midwest


Calculating MRR@10: 100%|██████████| 1033/1033 [00:00<00:00, 7218.78it/s]
Calculating R-precision: 100%|██████████| 1033/1033 [00:00<00:00, 5031.86it/s]
Calculating Macro-F1@X: 100%|██████████| 1033/1033 [00:00<00:00, 5003.49it/s]


--------------------------------------------------
City: west


Calculating MRR@10: 100%|██████████| 520/520 [00:00<00:00, 9912.41it/s]
Calculating R-precision: 100%|██████████| 520/520 [00:00<00:00, 6660.17it/s]
Calculating Macro-F1@X: 100%|██████████| 520/520 [00:00<00:00, 6812.25it/s]


--------------------------------------------------
City: northeast


Calculating MRR@10: 100%|██████████| 591/591 [00:00<00:00, 10978.64it/s]
Calculating R-precision: 100%|██████████| 591/591 [00:00<00:00, 8231.53it/s]
Calculating Macro-F1@X: 100%|██████████| 591/591 [00:00<00:00, 8793.25it/s]


In [89]:
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_ce+supcon", city=city_)
    
    try:
        mrr = calculate_mrr_at_k(train_embeddings, train_labels, test_embeddings, test_labels, 10)
        rprecision = generate_rprecision_results(train_embeddings, train_labels, test_embeddings, test_labels)
        # _, _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        macro  = generate_macro_f1_at_x_results(train_embeddings, train_labels, test_embeddings, test_labels)
    except Exception as e:
        print(f"Error processing {city_}: {e}")
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/mrr/trained_ce+supcon_mrr_{city_}.pickle', 'wb') as handle:
        pickle.dump(mrr, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/rprecision/trained_ce+supcon_rprecision_{city_}.pickle', 'wb') as handle:
        pickle.dump(rprecision, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'/workspace/persistent/HTClipper/results/retrieval/text/macro-f1/trained_ce+supcon_macro_{city_}.pickle', 'wb') as handle:
        pickle.dump(macro, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------
City: south


Calculating MRR@10: 100%|██████████| 1463/1463 [00:00<00:00, 5560.75it/s]
Calculating R-precision: 100%|██████████| 1463/1463 [00:00<00:00, 3799.29it/s]
Calculating Macro-F1@X: 100%|██████████| 1463/1463 [00:00<00:00, 3740.44it/s]


--------------------------------------------------
City: midwest


Calculating MRR@10: 100%|██████████| 1033/1033 [00:00<00:00, 7035.34it/s]
Calculating R-precision: 100%|██████████| 1033/1033 [00:00<00:00, 4926.93it/s]
Calculating Macro-F1@X: 100%|██████████| 1033/1033 [00:00<00:00, 4996.88it/s]


--------------------------------------------------
City: west


Calculating MRR@10: 100%|██████████| 520/520 [00:00<00:00, 9394.91it/s]
Calculating R-precision: 100%|██████████| 520/520 [00:00<00:00, 7003.23it/s]
Calculating Macro-F1@X: 100%|██████████| 520/520 [00:00<00:00, 6772.70it/s]


--------------------------------------------------
City: northeast


Calculating MRR@10: 100%|██████████| 591/591 [00:00<00:00, 10885.30it/s]
Calculating R-precision: 100%|██████████| 591/591 [00:00<00:00, 8173.07it/s]
Calculating Macro-F1@X: 100%|██████████| 591/591 [00:00<00:00, 8767.37it/s]


In [77]:
with open('/workspace/persistent/HTClipper/results/retrieval/text/mrr/zs_mrr_northeast.pickle', 'rb') as handle:
    mrr_zs = pickle.load(handle)
    
with open('/workspace/persistent/HTClipper/results/retrieval/text/rprecision/zs_rprecision_northeast.pickle', 'rb') as handle:
    rprecision_zs = pickle.load(handle)
    
with open('/workspace/persistent/HTClipper/results/retrieval/text/macro-f1/zs_macro_northeast.pickle', 'rb') as handle:
    macro_zs = pickle.load(handle)

# Error Analysis

In [14]:
def generate_mrr_at_1_results(test_embeddings, test_labels, k):
    unique_labels = torch.unique(test_labels)
    mrr_score_dict = {}

    for label in tqdm(unique_labels, total=len(unique_labels), desc=f"Calculating MRR@{k}"):
        label_id = int(label)
        test_idx = (test_labels == label_id).nonzero(as_tuple=True)[0]

        if len(test_idx) == 0:
            continue

        test_embeddings_np = test_embeddings[test_idx].numpy()

        D, I = index.search(test_embeddings_np, k)

        predicted_label_list = [train_labels_tensor[I[index]] for index in range(len(test_idx))]
        true_label_list = [train_labels_tensor[torch.where(train_labels_tensor == label_id)[0]] for _ in range(len(test_idx))]

        mrr_score = mrr_at_k(true_label_list, predicted_label_list, k_=k)
        mrr_score_dict[label_id] = round(mrr_score, 4)

    return mrr_score_dict


def generate_rprecision_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    r_precision_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]

        r_precision_score = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
        r_precision_score_dict[vendor_id] = round(r_precision_score, 4)

    return r_precision_score_dict

def generate_macro_f1_at_x_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    f1_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating Macro-F1@X"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]

        f1_score = np.mean(f1_at_k(true_label_list, predicted_label_list, k))
        f1_score_dict[vendor_id] = round(f1_score, 4)

    return f1_score_dict

In [15]:
import pickle

In [17]:
def convert_to_serializable(data):
    """
    Recursively convert NumPy objects in dictionaries to Python-native types.
    """
    if isinstance(data, dict):
        return {k: convert_to_serializable(v) for k, v in data.items()}
    elif isinstance(data, (np.ndarray, np.generic)):
        return data.tolist()
    elif isinstance(data, torch.Tensor):
        return data.detach().cpu().numpy().tolist()
    return data

In [18]:
# All
mean_total, std_total = 0, 0
for city_ in ["south", "midwest", "west", "northeast"]:
    print("-"*50)
    print("City:", city_)
    train_embeddings, train_labels, test_embeddings, test_labels = load_embeddings(model_name="declutr_ce+SupCon", city=city_)
    
    train_embeddings = torch.tensor(train_embeddings)
    train_labels = torch.tensor(train_labels)
    test_embeddings = torch.tensor(test_embeddings)
    test_labels = torch.tensor(test_labels)
    
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)
    
    initialize_globals(train_embeddings, train_labels)
    try:
        mrr_dict = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        rprecision_dict = generate_rprecision_results(test_embeddings, test_labels)
        macro_f1_dict  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        
    except Exception as e:
        print(f"Error processing {city}: {e}")
        
    # Convert to serializable formats before saving
    mrr_dict = convert_to_serializable(mrr_dict)
    rprecision_dict = convert_to_serializable(rprecision_dict)
    macro_f1_dict = convert_to_serializable(macro_f1_dict)
        
    with open(f'../error_analysis/declutr_{city_}_mrr.pickle', 'wb') as handle:
        pickle.dump(mrr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    with open(f'../error_analysis/declutr_{city_}_rprecision.pickle', 'wb') as handle:
        pickle.dump(rprecision_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    with open(f'../error_analysis/declutr_{city_}_macro.pickle', 'wb') as handle:
        pickle.dump(macro_f1_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    # Clear cache and free memory
    del mrr_dict, rprecision_dict, macro_f1_dict
    gc.collect()

--------------------------------------------------
City: south
Added batch 1/2 to index.
Added batch 2/2 to index.
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 964/964 [00:09<00:00, 98.66it/s] 
Calculating R-precision: 100%|██████████| 964/964 [00:10<00:00, 93.74it/s] 
Calculating Macro-F1@X: 100%|██████████| 964/964 [00:10<00:00, 94.90it/s] 


--------------------------------------------------
City: midwest
Added batch 1/1 to index.
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 624/624 [00:04<00:00, 153.76it/s]
Calculating R-precision: 100%|██████████| 624/624 [00:04<00:00, 150.55it/s]
Calculating Macro-F1@X: 100%|██████████| 624/624 [00:04<00:00, 148.55it/s]


--------------------------------------------------
City: west
Added batch 1/1 to index.
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 259/259 [00:00<00:00, 349.05it/s]
Calculating R-precision: 100%|██████████| 259/259 [00:00<00:00, 317.84it/s]
Calculating Macro-F1@X: 100%|██████████| 259/259 [00:00<00:00, 339.38it/s]


--------------------------------------------------
City: northeast
Added batch 1/1 to index.
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 293/293 [00:00<00:00, 468.54it/s]
Calculating R-precision: 100%|██████████| 293/293 [00:00<00:00, 459.96it/s]
Calculating Macro-F1@X: 100%|██████████| 293/293 [00:00<00:00, 459.23it/s]
