# Importing libraries

In [2]:
import os
import time
import gc

import numpy as np
from itertools import product

from collections import Counter
from tqdm import tqdm
import torch
import faiss

import warnings
warnings.filterwarnings('ignore')

# Loading the Pre-trained embeddings

In [3]:
def load_embeddings(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "CE": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CE",
        "CE-concat": "/workspace/persistent/HTClipper/models/pickled/embeddings/multimodal_embeddings/declutr-vit/mean/CE-concat/",
        "CE+SupCon": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CE+SupCon",
        "CE+SupCon-noAug": "/workspace/persistent/HTClipper/models/pickled/embeddings/multimodal_embeddings/declutr-vit/mean/CE+SupCon:noAug/",
        "CE+SupCon:2": "/workspace/persistent/HTClipper/models/pickled/embeddings/multimodal_embeddings/declutr-vit/mean/CE+SupCon:2",
        "CE+SupCon+ITM": "/workspace/persistent/HTClipper/models/pickled/embeddings/multimodal_embeddings/declutr-vit/mean/CE+SupCon+ITM",
        "CE+SupCon-bs:28-neg:20-gradsteps:1": "/workspace/persistent/HTClipper/models/pickled/embeddings/multimodal_embeddings/declutr-vit/mean/CE+SupCon-bs:28-neg:20-gradsteps:1/",
        "CE+NTXENT": "/workspace/persistent/HTClipper/models/pickled/embeddings/multimodal_embeddings/declutr-vit/mean/CE-NTXent"
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]

    filenames = {
        "train_emb": f"{city}_{mode}data_train.pt",
        "train_labels": f"{city}_labels_train.pt",
        "test_emb": f"{city}_{mode}data_test.pt",
        "test_labels": f"{city}_labels_test.pt"
    }
    
    # Load embeddings and labels
    train_emb = torch.load(os.path.join(emb_dir, filenames["train_emb"]), map_location=torch.device('cpu'))
    train_labels = torch.load(os.path.join(emb_dir, filenames["train_labels"]), map_location=torch.device('cpu'))
    test_emb = torch.load(os.path.join(emb_dir, filenames["test_emb"]), map_location=torch.device('cpu'))
    test_labels = torch.load(os.path.join(emb_dir, filenames["test_labels"]), map_location=torch.device('cpu'))
    
    return train_emb, train_labels, test_emb, test_labels

In [4]:
def load_embeddings_for_clipstylemodels(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "CLIP": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CLIP",
        "CLIPITM": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CLIPITM/",
        "BLIP2": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/BLIP2",
        "BLIP2conditional": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/BLIP2Conditional",
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]

    if mode == "multimodal":
        filenames = {
            "train_emb": f"train_{mode}_embeddings_{city}_vendor.npy",
            "train_labels": f"train_labels_{city}_vendor.npy",
            "test_emb": f"test_{mode}_embeddings_{city}_vendor.npy",
            "test_labels": f"test_labels_{city}_vendor.npy",
        }
        
    else:
        filenames = {
            "train_emb": f"train_{mode}_embeddings_{city}_vendors.npy",
            "train_labels": f"train_{mode}_labels_{city}_vendors.npy",
            "test_emb": f"test_{mode}_embeddings_{city}_vendors.npy",
            "test_labels": f"test_{mode}_labels_{city}_vendors.npy",
        }
        
    # Load embeddings and labels
    train_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_emb"])))
    train_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_labels"])))
    test_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_emb"])))
    test_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_labels"])))
    
    return train_emb, train_labels, test_emb, test_labels

In [5]:
def load_embeddings_for_newclipstylemodels(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "BLIP2conditional": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/BLIP2Conditional",
        "BigCLIP": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/BigCLIP",    
        "CLIP_CLS": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CLIP-CLS"
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]
    
    filenames = {
        "train_emb": f"train_{mode}_embeddings_{city}_vendors.npy",
        "train_labels": f"train_labels_{city}_vendors.npy",
        "test_emb": f"test_{mode}_embeddings_{city}_vendors.npy",
        "test_labels": f"test_labels_{city}_vendors.npy",
    }
    
    # Load embeddings and labels
    train_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_emb"])))
    train_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_labels"])))
    test_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_emb"])))
    test_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_labels"])))
    
    return train_emb, train_labels, test_emb, test_labels

In [6]:
def load_embeddings_for_finetunedclipstylemodels(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "CLIP": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/finetuned_declutr_vit/CLIP/",
        "CLIPITM": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/finetuned_declutr_vit/CLIPITM/",    
        "BLIP2": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/finetuned_declutr_vit/BLIP2/",
        "CLIP-EOS": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/finetuned_declutr_vit/CLIP-EOS/",
        "BLIP2-CESupCon": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/finetuned_declutr_vit/BLIP2-CESupCon",
        "unCLIP": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/un-normalized/finetuned_declutr_vit/CLIP/",
        "CLIP-EOS-CESupCon": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/finetuned_declutr_vit/CLIP-EOS-CESupCon/"
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]
    
    filenames = {
        "train_emb": f"train_{mode}_embeddings_{city}_vendor.npy",
        "train_labels": f"train_labels_{city}_vendor.npy",
        "test_emb": f"test_{mode}_embeddings_{city}_vendor.npy",
        "test_labels": f"test_labels_{city}_vendor.npy",
    }
    
    # Load embeddings and labels
    train_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_emb"])))
    train_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_labels"])))
    test_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_emb"])))
    test_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_labels"])))
    
    return train_emb, train_labels, test_emb, test_labels

In [7]:
def load_embeddings_for_clipstylemodels_ids(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "CLIP": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CLIP",
        "CLIPITM": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CLIPITM/",
        "BLIP2": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/BLIP2"
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]

    filenames = {
        "train_emb": f"train_{mode}_embeddings_{city}_ids.npy",
        "train_labels": f"train_{mode}_labels_{city}_ids.npy",
        "test_emb": f"test_{mode}_embeddings_{city}_ids.npy",
        "test_labels": f"test_{mode}_labels_{city}_ids.npy",
    }
    
    # Load embeddings and labels
    train_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_emb"])))
    train_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_labels"])))
    test_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_emb"])))
    test_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_labels"])))
    
    return train_emb, train_labels, test_emb, test_labels

In [8]:
def load_embeddings_for_updatedclipstylemodels(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "BLIPConditional": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/updatedBLIP2Conditional/",
        "CLIPITM": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/updatedCLIPITM/",
        "BLIP2": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/updatedBLIP2"
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]

    filenames = {
        "train_emb": f"train_{mode}_embeddings_{city}_vendor.npy",
        "train_labels": f"train_labels_{city}_vendor.npy",
        "test_emb": f"test_{mode}_embeddings_{city}_vendor.npy",
        "test_labels": f"test_labels_{city}_vendor.npy",
    }
    
    # Load embeddings and labels
    train_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_emb"])))
    train_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_labels"])))
    test_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_emb"])))
    test_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_labels"])))
    
    return train_emb, train_labels, test_emb, test_labels

In [9]:
def load_embedddings_for_e2e(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "CE": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CE",
        "CE+SupCon": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CE+SupCon",
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]
    
    if model_name == "CE+SupCon":
        filenames = {
            "train_emb": f"{city}_{mode}data_train.pt",
            "train_labels": f"{city}_labels_{mode}_train.pt",
            "test_emb": f"{city}_{mode}data_test.pt",
            "test_labels": f"{city}_labels_{mode}_test.pt",
        }
    else:
        filenames = {
            "train_emb": f"{city}_{mode}data_train.pt",
            "train_labels": f"{city}_labels_train.pt",
            "test_emb": f"{city}_{mode}data_test.pt",
            "test_labels": f"{city}_labels_test.pt",
        }
    
    # Load embeddings and labels
    train_emb = torch.load(os.path.join(emb_dir, filenames["train_emb"]), map_location=torch.device('cpu'))
    train_labels = torch.load(os.path.join(emb_dir, filenames["train_labels"]), map_location=torch.device('cpu'))
    test_emb = torch.load(os.path.join(emb_dir, filenames["test_emb"]), map_location=torch.device('cpu'))
    test_labels = torch.load(os.path.join(emb_dir, filenames["test_labels"]), map_location=torch.device('cpu'))
    
    return train_emb, train_labels, test_emb, test_labels

# Loading new embeddings

In [11]:
def load_e2e_classifier_embeddings(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "CE-SupCon-mean-0.1": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/E2E/CE-SupCon-mean-0.1",
        "CE-SupCon-mean-0.5": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/E2E/CE-SupCon-mean-0.5",
        "CE-attention": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/E2E/CE-attention",
        "CE-concat": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/E2E/CE-concat",
        "CE-mean": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/E2E/CE-mean",
        "CE-learned_fusion": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/E2E/CE-learned_fusion",
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]    
    
    filenames = {
        "train_emb": f"{city}_{mode}data_train.pt",
        "train_labels": f"{city}_labels_train.pt",
        "test_emb": f"{city}_{mode}data_test.pt",
        "test_labels": f"{city}_labels_test.pt"
    }
    
    # Load embeddings and labels
    train_emb = torch.load(os.path.join(emb_dir, filenames["train_emb"]), map_location=torch.device('cpu'))
    train_labels = torch.load(os.path.join(emb_dir, filenames["train_labels"]), map_location=torch.device('cpu'))
    test_emb = torch.load(os.path.join(emb_dir, filenames["test_emb"]), map_location=torch.device('cpu'))
    test_labels = torch.load(os.path.join(emb_dir, filenames["test_labels"]), map_location=torch.device('cpu'))
    
    return train_emb, train_labels, test_emb, test_labels

In [12]:
def load_embeddings_for_finetunedclipstylemodels(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "CLIP": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/finetuned/CLIP-CE/",
        "CLIPITM": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/finetuned/CLIPITM-CE/",    
        "BLIP2": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/finetuned/BLIP2-CE/",
        "BLIP2-SupCon": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/finetuned/BLIP2-CE-SupCon",
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]
    
    
    filenames = {
        "train_emb": f"train_{mode}_embeddings_{city}_vendor.npy",
        "train_labels": f"train_labels_{city}_vendor.npy",
        "test_emb": f"test_{mode}_embeddings_{city}_vendor.npy",
        "test_labels": f"test_labels_{city}_vendor.npy",
    }
    
    # Load embeddings and labels
    train_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_emb"])))
    train_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["train_labels"])))
    test_emb = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_emb"])))
    test_labels = torch.from_numpy(np.load(os.path.join(emb_dir, filenames["test_labels"])))
    
    return train_emb, train_labels, test_emb, test_labels

# Helper functions

In [13]:
# Global variables
dim = None
index = None
train_embeddings_np = None
train_labels_np = None

# @profile
def initialize_globals(train_embeddings, train_labels):
    global dim, index, train_embeddings_np, train_labels_tensor
    start_time = time.time()
    dim = train_embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    
    train_embeddings_np = train_embeddings.numpy()
    batch_size = 10000
    num_batches = (train_embeddings_np.shape[0] + batch_size - 1) // batch_size

    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, train_embeddings_np.shape[0])
        batch_embeddings = train_embeddings_np[start_idx:end_idx]
        index.add(batch_embeddings)
        # print(f"Added batch {i+1}/{num_batches} to index.")
    
    train_labels_tensor = train_labels
    elapsed_time = time.time() - start_time
    print(f"Global variables initialized in {elapsed_time:.2f} seconds.")

def recall_at_k(actual, predicted, k):
    recall_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        pred_set = set(pred[:k].numpy())
        recall_list.append(len(act_set & pred_set) / float(len(act_set)))
    return recall_list

def generate_rprecision_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    r_precision_score = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
    
    r_precision_mean = np.mean(list(r_precision_score.values()))
    r_precision_std = np.std(list(r_precision_score.values()))

    print(f"R precision mean: {round(r_precision_mean, 4)} ± {round(r_precision_std, 2)}")
    # print(f"R precision std: {r_precision_std}")
    
    return r_precision_mean, r_precision_std

def mrr_at_k(actual, predicted, k_):
    mrr = 0.0
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        for i, p in enumerate(pred[:k_].numpy(), 1):
            if p in act_set:
                mrr += 1 / i
                break
    return mrr / len(actual) if actual else 0

def generate_mrr_at_1_results(test_embeddings, test_labels, k):
    unique_labels = torch.unique(test_labels)
    mrr_score = []

    for label in tqdm(unique_labels, total=len(unique_labels), desc=f"Calculating MRR@{k}"):
        label_id = int(label)
        test_idx = (test_labels == label_id).nonzero(as_tuple=True)[0]

        if len(test_idx) == 0:
            continue

        test_embeddings_np = test_embeddings[test_idx].numpy()

        D, I = index.search(test_embeddings_np, 1)

        predicted_label_list = [train_labels_tensor[I[index]] for index in range(len(test_idx))]
        true_label_list = [train_labels_tensor[torch.where(train_labels_tensor == label_id)[0]] for _ in range(len(test_idx))]
        
        mrr_score.append(mrr_at_k(true_label_list, predicted_label_list, k_=k))

    mrr_mean = np.mean(mrr_score)
    mrr_std = np.std(mrr_score)
    print(f"MRR@{k} mean: {round(mrr_mean, 4)} ± {round(mrr_std, 2)}")
    # print(f"MRR@{k} std: {mrr_std}")
    
    return mrr_mean, mrr_std

def generate_macro_rprecision_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    r_precision_score = {}
    total_samples = 0

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        r_precision_score[vendor_id] = np.mean(recall_at_k(true_label_list, predicted_label_list, k))
        total_samples += len(test_adsidx)

    weighted_r_precision_sum = sum(r_precision_score[vendor_id] * len((test_labels == vendor_id).nonzero(as_tuple=True)[0]) for vendor_id in r_precision_score.keys())
    macro_r_precision = weighted_r_precision_sum / total_samples

    print(f"Macro R-precision: {macro_r_precision}")
    
    return macro_r_precision

In [14]:
def precision_at_k(actual, predicted, k):
    precision_list = []
    for act, pred in zip(actual, predicted):
        act_set = set(act.numpy())
        pred_set = set(pred[:k].numpy())
        precision_list.append(len(act_set & pred_set) / float(k))
    return precision_list

def f1_at_k(actual, predicted, k):
    f1_list = []
    precision_list = precision_at_k(actual, predicted, k)
    recall_list = recall_at_k(actual, predicted, k)
    for precision, recall in zip(precision_list, recall_list):
        if precision + recall > 0:
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 0
        f1_list.append(f1)
    return f1_list

def generate_macro_f1_at_x_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    f1_score_list = []

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating Macro-F1@X"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]
        
        f1_score_list.extend(f1_at_k(true_label_list, predicted_label_list, k))

    macro_f1_mean = np.mean(f1_score_list)
    macro_f1_std = np.std(f1_score_list)

    print(f"Macro F1@X: {round(macro_f1_mean, 4)} ± {round(macro_f1_std, 2)}")
    
    return macro_f1_mean, macro_f1_std

In [15]:
import torch
import hashlib

def remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels):
    """
    Removes duplicate embeddings between train and test sets using hashing.

    Args:
        train_embeddings (torch.Tensor): Embeddings from the training set.
        train_labels (torch.Tensor): Labels corresponding to the training embeddings.
        test_embeddings (torch.Tensor): Embeddings from the test set.
        test_labels (torch.Tensor): Labels corresponding to the test embeddings.

    Returns:
        unique_train_embeddings (torch.Tensor): Unique embeddings from the training set.
        unique_train_labels (torch.Tensor): Labels corresponding to the unique training embeddings.
        unique_test_embeddings (torch.Tensor): Unique embeddings from the test set (excluding duplicates with train set).
        unique_test_labels (torch.Tensor): Labels corresponding to the unique test embeddings.
    """

    def hash_embedding(embedding):
        # Ensure the embedding is contiguous in memory and of type float32
        embedding = embedding.contiguous().view(-1).float()
        # Round to reduce the impact of floating-point precision errors
        embedding = torch.round(embedding * 1e6) / 1e6  # Adjust precision as needed
        # Convert the embedding to bytes
        emb_bytes = embedding.numpy().tobytes()
        # Compute MD5 hash
        return hashlib.md5(emb_bytes).hexdigest()

    # Create dictionaries mapping hashes to embeddings and labels for the train set
    train_hash_embedding_label_dict = {}
    for emb, label in zip(train_embeddings, train_labels):
        emb_hash = hash_embedding(emb)
        # Store the embedding and label only if the hash is not already in the dictionary
        if emb_hash not in train_hash_embedding_label_dict:
            train_hash_embedding_label_dict[emb_hash] = (emb, label.item())

    # Create dictionaries mapping hashes to embeddings and labels for the test set
    test_hash_embedding_label_dict = {}
    for emb, label in zip(test_embeddings, test_labels):
        emb_hash = hash_embedding(emb)
        # Store the embedding and label only if the hash is not already in the dictionary
        if emb_hash not in test_hash_embedding_label_dict:
            test_hash_embedding_label_dict[emb_hash] = (emb, label.item())

    # Identify common hashes between train and test sets
    common_hashes = set(train_hash_embedding_label_dict.keys()).intersection(set(test_hash_embedding_label_dict.keys()))

    # Remove duplicates from the test set
    unique_test_hashes = set(test_hash_embedding_label_dict.keys()) - common_hashes

    # Reconstruct unique embeddings and labels for the train set
    unique_train_embeddings_list = [emb_label[0] for emb_label in train_hash_embedding_label_dict.values()]
    unique_train_labels_list = [emb_label[1] for emb_label in train_hash_embedding_label_dict.values()]

    # Reconstruct unique embeddings and labels for the test set
    unique_test_embeddings_list = [test_hash_embedding_label_dict[h][0] for h in unique_test_hashes]
    unique_test_labels_list = [test_hash_embedding_label_dict[h][1] for h in unique_test_hashes]

    # Convert lists to tensors
    unique_train_embeddings = torch.stack(unique_train_embeddings_list)
    unique_train_labels = torch.tensor(unique_train_labels_list)

    unique_test_embeddings = torch.stack(unique_test_embeddings_list)
    unique_test_labels = torch.tensor(unique_test_labels_list)

    return unique_train_embeddings, unique_train_labels, unique_test_embeddings, unique_test_labels

In [16]:
def find_members(train_embeddings, train_labels, test_embeddings, test_labels):
    """
    Filters test_embeddings and test_labels by removing entries whose labels do not exist in train_labels.

    Parameters:
    - train_embeddings (torch.Tensor): Embeddings for the training data.
    - train_labels (torch.Tensor): Labels for the training data.
    - test_embeddings (torch.Tensor): Embeddings for the test data.
    - test_labels (torch.Tensor): Labels for the test data.

    Returns:
    - train_embeddings (torch.Tensor): Original training embeddings (unchanged).
    - train_labels (torch.Tensor): Original training labels (unchanged).
    - filtered_test_embeddings (torch.Tensor): Filtered test embeddings.
    - filtered_test_labels (torch.Tensor): Filtered test labels.
    """

    # Ensure labels are on the same device
    if train_labels.device != test_labels.device:
        train_labels = train_labels.to(test_labels.device)

    # Use torch.isin to create a mask of test labels that exist in train labels
    if hasattr(torch, 'isin'):
        # torch.isin is available in PyTorch 1.10 and later
        mask = torch.isin(test_labels, train_labels)
    else:
        # For older versions of PyTorch, convert to NumPy arrays
        train_labels_np = train_labels.cpu().numpy()
        test_labels_np = test_labels.cpu().numpy()
        mask_np = np.isin(test_labels_np, train_labels_np)
        mask = torch.from_numpy(mask_np).to(test_labels.device)

    # Apply the mask to filter test embeddings and labels
    filtered_test_embeddings = test_embeddings[mask]
    filtered_test_labels = test_labels[mask]

    return train_embeddings, train_labels, filtered_test_embeddings, filtered_test_labels

# Evaluation of E2E classifiers: CE-SupCon-mean-0.1', 'CE-SupCon-mean-0.5', 'CE-attention', 'CE-concat', 'CE-mean', 'CE-learned_fusion'

In [32]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_e2e_classifier_embeddings("CE-mean", city, mode="text")
    # text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    # test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    # train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    # train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)
    
    # Normalize training embeddings
    faiss.normalize_L2(text_train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    faiss.normalize_L2(text_test_embeddings.detach().cpu().numpy())

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.21 seconds.


Calculating MRR@10: 100%|██████████| 1098/1098 [01:14<00:00, 14.66it/s]


MRR@10 mean: 0.7675 ± 0.39


Calculating R-precision: 100%|██████████| 1098/1098 [01:15<00:00, 14.50it/s]


R precision mean: 0.6362 ± 0.38


Calculating Macro-F1@X:  53%|█████▎    | 579/1098 [00:47<00:42, 12.17it/s]


KeyboardInterrupt: 

In [27]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_finetunedclipstylemodels("BLIP2", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    # test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    # train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    # train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)
    
    # Normalize training embeddings
    # faiss.normalize_L2(text_train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    # faiss.normalize_L2(text_test_embeddings.detach().cpu().numpy())

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.09 seconds.


Calculating MRR@10: 100%|██████████| 732/732 [00:17<00:00, 42.93it/s]


MRR@10 mean: 0.2691 ± 0.36


Calculating R-precision: 100%|██████████| 732/732 [00:17<00:00, 42.88it/s]


R precision mean: 0.0855 ± 0.16


Calculating Macro-F1@X: 100%|██████████| 732/732 [00:17<00:00, 42.70it/s]


Macro F1@X: 0.1109 ± 0.22
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 400/400 [00:05<00:00, 71.62it/s]


MRR@10 mean: 0.2609 ± 0.35


Calculating R-precision: 100%|██████████| 400/400 [00:05<00:00, 70.87it/s]


R precision mean: 0.0675 ± 0.11


Calculating Macro-F1@X: 100%|██████████| 400/400 [00:05<00:00, 71.03it/s]


Macro F1@X: 0.073 ± 0.13
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 165/165 [00:01<00:00, 150.70it/s]


MRR@10 mean: 0.2602 ± 0.34


Calculating R-precision: 100%|██████████| 165/165 [00:01<00:00, 154.48it/s]


R precision mean: 0.0956 ± 0.14


Calculating Macro-F1@X: 100%|██████████| 165/165 [00:01<00:00, 144.47it/s]


Macro F1@X: 0.1092 ± 0.19
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 262/262 [00:02<00:00, 101.09it/s]


MRR@10 mean: 0.2309 ± 0.34


Calculating R-precision: 100%|██████████| 262/262 [00:02<00:00, 98.84it/s] 


R precision mean: 0.1013 ± 0.17


Calculating Macro-F1@X: 100%|██████████| 262/262 [00:02<00:00, 100.80it/s]


Macro F1@X: 0.0932 ± 0.18


In [30]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_finetunedclipstylemodels("BLIP2", city, mode="text")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    # test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    # train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    # train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)
    
    # Normalize training embeddings
    faiss.normalize_L2(text_train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    faiss.normalize_L2(text_test_embeddings.detach().cpu().numpy())

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 1097/1097 [00:10<00:00, 99.88it/s] 


MRR@10 mean: 0.453 ± 0.44


Calculating R-precision:  31%|███       | 337/1097 [00:03<00:08, 89.21it/s] 


KeyboardInterrupt: 

In [17]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embedddings_for_e2e("CE+SupCon", city, mode="text")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    # test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    # train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    # train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)
    
    # Normalize training embeddings
    faiss.normalize_L2(text_train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    faiss.normalize_L2(text_test_embeddings.detach().cpu().numpy())

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 1411/1411 [00:20<00:00, 67.37it/s]


MRR@10 mean: 0.988 ± 0.09


Calculating R-precision: 100%|██████████| 1411/1411 [00:21<00:00, 66.76it/s]


R precision mean: 0.9206 ± 0.16


Calculating Macro-F1@X: 100%|██████████| 1411/1411 [00:21<00:00, 67.11it/s]


Macro F1@X: 0.9445 ± 0.15
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 975/975 [00:08<00:00, 108.49it/s]


MRR@10 mean: 0.9694 ± 0.14


Calculating R-precision: 100%|██████████| 975/975 [00:09<00:00, 106.48it/s]


R precision mean: 0.6492 ± 0.3


Calculating Macro-F1@X: 100%|██████████| 975/975 [00:09<00:00, 104.09it/s]


Macro F1@X: 0.5656 ± 0.33
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 490/490 [00:02<00:00, 230.54it/s]


MRR@10 mean: 0.9921 ± 0.06


Calculating R-precision: 100%|██████████| 490/490 [00:02<00:00, 217.30it/s]


R precision mean: 0.7955 ± 0.24


Calculating Macro-F1@X: 100%|██████████| 490/490 [00:02<00:00, 224.96it/s]


Macro F1@X: 0.7363 ± 0.26
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 566/566 [00:02<00:00, 235.64it/s]


MRR@10 mean: 0.9746 ± 0.13


Calculating R-precision: 100%|██████████| 566/566 [00:02<00:00, 242.78it/s]


R precision mean: 0.7233 ± 0.29


Calculating Macro-F1@X: 100%|██████████| 566/566 [00:02<00:00, 242.11it/s]


Macro F1@X: 0.7235 ± 0.31


In [15]:
a = torch.rand(4)
b = torch.rand(4)

c = torch.stack((a, b), 0)

print(c)

tensor([[0.4824, 0.6623, 0.8904, 0.1944],
        [0.1663, 0.1387, 0.6894, 0.5556]])


# Text Alignment task

In [22]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels_ids("CLIP", city, mode="text")
    vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = load_embeddings_for_clipstylemodels_ids("CLIP", city, mode="image")
    
    # text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)

    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)
    test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    train_embeddings = torch.cat((vision_train_embeddings, vision_test_embeddings), 0)
    train_labels = torch.cat((vision_train_labels, vision_test_labels), 0)
    
    # vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = remove_duplicate_embeddings(vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels)
    # vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = find_members(vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels)
    
    # Normalize training embeddings
    # faiss.normalize_L2(train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    # faiss.normalize_L2(test_embeddings.detach().cpu().numpy())

    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.09 seconds.


Calculating MRR@10: 100%|██████████| 13677/13677 [04:15<00:00, 53.63it/s]


MRR@10 mean: 0.001 ± 0.03


Calculating R-precision: 100%|██████████| 13677/13677 [02:25<00:00, 94.13it/s] 


R precision mean: 0.0017 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 13677/13677 [02:25<00:00, 94.18it/s] 


Macro F1@X: 0.0017 ± 0.02
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.06 seconds.


Calculating MRR@10: 100%|██████████| 8285/8285 [01:23<00:00, 99.76it/s] 


MRR@10 mean: 0.0027 ± 0.05


Calculating R-precision: 100%|██████████| 8285/8285 [00:43<00:00, 192.15it/s]


R precision mean: 0.0049 ± 0.04


Calculating Macro-F1@X: 100%|██████████| 8285/8285 [00:43<00:00, 192.47it/s]


Macro F1@X: 0.0049 ± 0.04
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 3158/3158 [00:10<00:00, 287.67it/s]


MRR@10 mean: 0.0063 ± 0.08


Calculating R-precision: 100%|██████████| 3158/3158 [00:05<00:00, 598.42it/s] 


R precision mean: 0.0103 ± 0.06


Calculating Macro-F1@X: 100%|██████████| 3158/3158 [00:05<00:00, 601.66it/s] 


Macro F1@X: 0.0103 ± 0.06
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 2558/2558 [00:13<00:00, 195.37it/s]


MRR@10 mean: 0.0098 ± 0.1


Calculating R-precision: 100%|██████████| 2558/2558 [00:08<00:00, 284.52it/s]


R precision mean: 0.0104 ± 0.06


Calculating Macro-F1@X: 100%|██████████| 2558/2558 [00:08<00:00, 285.66it/s]


Macro F1@X: 0.0104 ± 0.06


In [28]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels_ids("CLIPITM", city, mode="text")
    vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = load_embeddings_for_clipstylemodels_ids("CLIPITM", city, mode="image")
    
    # text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)

    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)
    test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    train_embeddings = torch.cat((vision_train_embeddings, vision_test_embeddings), 0)
    train_labels = torch.cat((vision_train_labels, vision_test_labels), 0)
    
    train_embeddings = torch.mean(train_embeddings, dim=1)
    # test_embeddings = torch.mean(test_embeddings, dim=1)
    
    # vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = remove_duplicate_embeddings(vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels)
    # vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = find_members(vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels)
    
    # Normalize training embeddings
    # faiss.normalize_L2(train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    # faiss.normalize_L2(test_embeddings.detach().cpu().numpy())

    initialize_globals(train_embeddings, train_labels)

    mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
    mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
    # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
    mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
    # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
    mean += mean_temp
    std += std_temp

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.10 seconds.


Calculating MRR@10: 100%|██████████| 13677/13677 [04:14<00:00, 53.67it/s]


MRR@10 mean: 0.0001 ± 0.01


Calculating R-precision: 100%|██████████| 13677/13677 [02:24<00:00, 94.34it/s] 


R precision mean: 0.0002 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 13677/13677 [02:24<00:00, 94.64it/s] 


Macro F1@X: 0.0002 ± 0.01
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 8285/8285 [01:23<00:00, 99.82it/s] 


MRR@10 mean: 0.0001 ± 0.01


Calculating R-precision: 100%|██████████| 8285/8285 [00:42<00:00, 192.73it/s]


R precision mean: 0.0002 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 8285/8285 [00:42<00:00, 193.22it/s]


Macro F1@X: 0.0002 ± 0.01
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 3158/3158 [00:10<00:00, 288.87it/s]


MRR@10 mean: 0.0003 ± 0.02


Calculating R-precision: 100%|██████████| 3158/3158 [00:05<00:00, 599.93it/s] 


R precision mean: 0.0006 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 3158/3158 [00:05<00:00, 603.97it/s] 


Macro F1@X: 0.0006 ± 0.01
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 2558/2558 [00:13<00:00, 196.63it/s]


MRR@10 mean: 0.0008 ± 0.03


Calculating R-precision: 100%|██████████| 2558/2558 [00:08<00:00, 286.59it/s]


R precision mean: 0.0007 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 2558/2558 [00:08<00:00, 288.27it/s]


Macro F1@X: 0.0007 ± 0.01


In [29]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels_ids("BLIP2", city, mode="text")
    vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = load_embeddings_for_clipstylemodels_ids("BLIP2", city, mode="image")
    
    # text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)

    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)
    test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    train_embeddings = torch.cat((vision_train_embeddings, vision_test_embeddings), 0)
    train_labels = torch.cat((vision_train_labels, vision_test_labels), 0)
    
    # vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = remove_duplicate_embeddings(vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels)
    # vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels = find_members(vision_train_embeddings, vision_train_labels, vision_test_embeddings, vision_test_labels)
    
    # Normalize training embeddings
    # faiss.normalize_L2(train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    # faiss.normalize_L2(test_embeddings.detach().cpu().numpy())
    train_embeddings = torch.mean(train_embeddings, dim=1)
    # test_embeddings = torch.mean(test_embeddings, dim=1)

    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.08 seconds.


Calculating MRR@10: 100%|██████████| 13677/13677 [04:14<00:00, 53.84it/s]


MRR@10 mean: 0.0001 ± 0.01


Calculating R-precision: 100%|██████████| 13677/13677 [02:24<00:00, 94.39it/s] 


R precision mean: 0.0 ± 0.0


Calculating Macro-F1@X: 100%|██████████| 13677/13677 [02:24<00:00, 94.62it/s] 


Macro F1@X: 0.0 ± 0.0
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 8285/8285 [01:23<00:00, 99.77it/s] 


MRR@10 mean: 0.0001 ± 0.01


Calculating R-precision: 100%|██████████| 8285/8285 [00:42<00:00, 192.70it/s]


R precision mean: 0.0002 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 8285/8285 [00:42<00:00, 193.38it/s]


Macro F1@X: 0.0002 ± 0.01
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 3158/3158 [00:10<00:00, 289.10it/s]


MRR@10 mean: 0.0003 ± 0.02


Calculating R-precision: 100%|██████████| 3158/3158 [00:05<00:00, 597.63it/s] 


R precision mean: 0.0013 ± 0.03


Calculating Macro-F1@X: 100%|██████████| 3158/3158 [00:05<00:00, 601.95it/s] 


Macro F1@X: 0.0013 ± 0.03
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 2558/2558 [00:13<00:00, 195.93it/s]


MRR@10 mean: 0.0004 ± 0.02


Calculating R-precision: 100%|██████████| 2558/2558 [00:08<00:00, 285.88it/s]


R precision mean: 0.0005 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 2558/2558 [00:08<00:00, 287.51it/s]

Macro F1@X: 0.0005 ± 0.01





# Text Retrieval

In [138]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings("CE", city, mode="text")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)

    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embedddings_for_e2e("CE+SupCon", city, mode="image")

    # test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    # test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    # train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    # train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 1404/1404 [00:19<00:00, 70.84it/s]


MRR@10 mean: 0.9836 ± 0.1


Calculating R-precision: 100%|██████████| 1404/1404 [00:20<00:00, 69.27it/s]


R precision mean: 0.8687 ± 0.19


Calculating Macro-F1@X: 100%|██████████| 1404/1404 [00:20<00:00, 69.38it/s]


Macro F1@X: 0.8726 ± 0.2
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 976/976 [00:09<00:00, 108.29it/s]


MRR@10 mean: 0.9694 ± 0.15


Calculating R-precision: 100%|██████████| 976/976 [00:09<00:00, 107.83it/s]


R precision mean: 0.6612 ± 0.31


Calculating Macro-F1@X: 100%|██████████| 976/976 [00:09<00:00, 105.15it/s]


Macro F1@X: 0.5819 ± 0.34
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 490/490 [00:02<00:00, 229.72it/s]


MRR@10 mean: 0.9932 ± 0.07


Calculating R-precision: 100%|██████████| 490/490 [00:02<00:00, 228.05it/s]


R precision mean: 0.8008 ± 0.25


Calculating Macro-F1@X: 100%|██████████| 490/490 [00:02<00:00, 228.45it/s]


Macro F1@X: 0.7466 ± 0.26
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 561/561 [00:02<00:00, 243.59it/s]


MRR@10 mean: 0.9821 ± 0.12


Calculating R-precision: 100%|██████████| 561/561 [00:02<00:00, 235.66it/s]


R precision mean: 0.7365 ± 0.28


Calculating Macro-F1@X: 100%|██████████| 561/561 [00:02<00:00, 245.63it/s]


Macro F1@X: 0.7242 ± 0.31


In [84]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_updatedclipstylemodels("CLIPITM", city, mode="image")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # print(text_train_embeddings.shape)
    # print(text_test_embeddings.shape)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.10 seconds.


Calculating MRR@10: 100%|██████████| 1017/1017 [00:21<00:00, 47.96it/s]


MRR@10 mean: 0.3281 ± 0.37


Calculating R-precision: 100%|██████████| 1017/1017 [00:21<00:00, 47.36it/s]


R precision mean: 0.065 ± 0.1


Calculating Macro-F1@X: 100%|██████████| 1017/1017 [00:21<00:00, 47.41it/s]


Macro F1@X: 0.0614 ± 0.1
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 605/605 [00:07<00:00, 80.96it/s]


MRR@10 mean: 0.3434 ± 0.39


Calculating R-precision: 100%|██████████| 605/605 [00:07<00:00, 80.65it/s]


R precision mean: 0.0826 ± 0.14


Calculating Macro-F1@X: 100%|██████████| 605/605 [00:07<00:00, 80.87it/s]


Macro F1@X: 0.0675 ± 0.11
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 312/312 [00:01<00:00, 209.48it/s]


MRR@10 mean: 0.3683 ± 0.43


Calculating R-precision: 100%|██████████| 312/312 [00:01<00:00, 203.87it/s]


R precision mean: 0.1218 ± 0.17


Calculating Macro-F1@X: 100%|██████████| 312/312 [00:01<00:00, 201.14it/s]


Macro F1@X: 0.107 ± 0.15
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 357/357 [00:02<00:00, 134.10it/s]


MRR@10 mean: 0.3442 ± 0.4


Calculating R-precision: 100%|██████████| 357/357 [00:02<00:00, 137.33it/s]


R precision mean: 0.1003 ± 0.14


Calculating Macro-F1@X: 100%|██████████| 357/357 [00:02<00:00, 129.09it/s]

Macro F1@X: 0.0933 ± 0.13
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0






In [27]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    # BLIP2-CESupCon
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_finetunedclipstylemodels("BLIP2-CESupCon", city, mode="multimodal")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # print(text_train_embeddings.shape)
    # print(text_test_embeddings.shape)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.18 seconds.


Calculating MRR@10: 100%|██████████| 1338/1338 [01:23<00:00, 16.04it/s]


MRR@10 mean: 0.9814 ± 0.1


Calculating R-precision: 100%|██████████| 1338/1338 [01:23<00:00, 16.02it/s]


R precision mean: 0.795 ± 0.19


Calculating Macro-F1@X: 100%|██████████| 1338/1338 [01:20<00:00, 16.60it/s]


Macro F1@X: 0.8487 ± 0.2
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.12 seconds.


Calculating MRR@10: 100%|██████████| 914/914 [00:32<00:00, 28.46it/s]


MRR@10 mean: 0.9378 ± 0.2


Calculating R-precision: 100%|██████████| 914/914 [00:29<00:00, 30.94it/s]


R precision mean: 0.5524 ± 0.25


Calculating Macro-F1@X: 100%|██████████| 914/914 [00:29<00:00, 30.70it/s]


Macro F1@X: 0.5325 ± 0.28
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 452/452 [00:05<00:00, 76.04it/s]


MRR@10 mean: 0.9559 ± 0.18


Calculating R-precision: 100%|██████████| 452/452 [00:06<00:00, 73.23it/s]


R precision mean: 0.6759 ± 0.23


Calculating Macro-F1@X: 100%|██████████| 452/452 [00:06<00:00, 71.97it/s]


Macro F1@X: 0.705 ± 0.24
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 515/515 [00:06<00:00, 76.59it/s]


MRR@10 mean: 0.969 ± 0.14


Calculating R-precision: 100%|██████████| 515/515 [00:06<00:00, 76.07it/s]


R precision mean: 0.6691 ± 0.27


Calculating Macro-F1@X: 100%|██████████| 515/515 [00:06<00:00, 74.86it/s]

Macro F1@X: 0.6944 ± 0.29
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0






In [16]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    # BLIP2-CESupCon
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_finetunedclipstylemodels("BLIP2-CESupCon", city, mode="text")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # print(text_train_embeddings.shape)
    # print(text_test_embeddings.shape)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    # Normalize training embeddings
    faiss.normalize_L2(text_train_embeddings.detach().cpu().numpy())
    # Normalize test embeddings
    faiss.normalize_L2(text_test_embeddings.detach().cpu().numpy())
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.06 seconds.


Calculating MRR@10: 100%|██████████| 213/213 [00:02<00:00, 94.53it/s] 


MRR@10 mean: 0.8886 ± 0.31


Calculating R-precision: 100%|██████████| 213/213 [00:02<00:00, 93.98it/s] 


R precision mean: 0.7632 ± 0.32


Calculating Macro-F1@X: 100%|██████████| 213/213 [00:02<00:00, 94.05it/s] 


Macro F1@X: 0.7879 ± 0.29
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 146/146 [00:00<00:00, 159.09it/s]


MRR@10 mean: 0.7397 ± 0.43


Calculating R-precision: 100%|██████████| 146/146 [00:00<00:00, 160.13it/s]


R precision mean: 0.5666 ± 0.4


Calculating Macro-F1@X: 100%|██████████| 146/146 [00:00<00:00, 159.53it/s]


Macro F1@X: 0.5762 ± 0.39
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 50/50 [00:00<00:00, 337.96it/s]


MRR@10 mean: 0.86 ± 0.35


Calculating R-precision: 100%|██████████| 50/50 [00:00<00:00, 314.32it/s]


R precision mean: 0.7652 ± 0.31


Calculating Macro-F1@X: 100%|██████████| 50/50 [00:00<00:00, 325.26it/s]


Macro F1@X: 0.7482 ± 0.29
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 32/32 [00:00<00:00, 491.16it/s]


MRR@10 mean: 0.7604 ± 0.42


Calculating R-precision: 100%|██████████| 32/32 [00:00<00:00, 522.41it/s]


R precision mean: 0.5869 ± 0.4


Calculating Macro-F1@X: 100%|██████████| 32/32 [00:00<00:00, 552.40it/s]

Macro F1@X: 0.5912 ± 0.38
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0






In [32]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_finetunedclipstylemodels("CLIPITM", city, mode="multimodal")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # print(text_train_embeddings.shape)
    # print(text_test_embeddings.shape)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.16 seconds.


Calculating MRR@10: 100%|██████████| 1338/1338 [01:22<00:00, 16.27it/s]


MRR@10 mean: 0.9739 ± 0.12


Calculating R-precision: 100%|██████████| 1338/1338 [01:23<00:00, 16.09it/s]


R precision mean: 0.7282 ± 0.22


Calculating Macro-F1@X: 100%|██████████| 1338/1338 [01:23<00:00, 16.11it/s]


Macro F1@X: 0.7313 ± 0.25
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.08 seconds.


Calculating MRR@10: 100%|██████████| 914/914 [00:30<00:00, 30.22it/s]


MRR@10 mean: 0.9285 ± 0.2


Calculating R-precision: 100%|██████████| 914/914 [00:30<00:00, 29.97it/s]


R precision mean: 0.4968 ± 0.23


Calculating Macro-F1@X: 100%|██████████| 914/914 [00:30<00:00, 29.81it/s]


Macro F1@X: 0.4538 ± 0.26
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 452/452 [00:06<00:00, 72.47it/s]


MRR@10 mean: 0.9498 ± 0.19


Calculating R-precision: 100%|██████████| 452/452 [00:06<00:00, 70.74it/s]


R precision mean: 0.6109 ± 0.23


Calculating Macro-F1@X: 100%|██████████| 452/452 [00:06<00:00, 69.17it/s]


Macro F1@X: 0.6275 ± 0.24
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 515/515 [00:07<00:00, 70.76it/s]


MRR@10 mean: 0.9655 ± 0.15


Calculating R-precision: 100%|██████████| 515/515 [00:07<00:00, 68.31it/s]


R precision mean: 0.6419 ± 0.27


Calculating Macro-F1@X: 100%|██████████| 515/515 [00:07<00:00, 68.15it/s]


Macro F1@X: 0.6591 ± 0.28
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0



In [33]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_finetunedclipstylemodels("BLIP2", city, mode="multimodal")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # print(text_train_embeddings.shape)
    # print(text_test_embeddings.shape)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.12 seconds.


Calculating MRR@10: 100%|██████████| 1338/1338 [01:22<00:00, 16.21it/s]


MRR@10 mean: 0.9774 ± 0.11


Calculating R-precision: 100%|██████████| 1338/1338 [01:23<00:00, 15.97it/s]


R precision mean: 0.7723 ± 0.2


Calculating Macro-F1@X: 100%|██████████| 1338/1338 [01:23<00:00, 15.96it/s]


Macro F1@X: 0.7973 ± 0.22
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.08 seconds.


Calculating MRR@10: 100%|██████████| 914/914 [00:29<00:00, 30.75it/s]


MRR@10 mean: 0.9426 ± 0.19


Calculating R-precision: 100%|██████████| 914/914 [00:30<00:00, 30.43it/s]


R precision mean: 0.5564 ± 0.25


Calculating Macro-F1@X: 100%|██████████| 914/914 [00:30<00:00, 30.19it/s]


Macro F1@X: 0.5446 ± 0.29
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 452/452 [00:06<00:00, 72.66it/s]


MRR@10 mean: 0.9648 ± 0.15


Calculating R-precision: 100%|██████████| 452/452 [00:06<00:00, 70.82it/s]


R precision mean: 0.6943 ± 0.23


Calculating Macro-F1@X: 100%|██████████| 452/452 [00:06<00:00, 68.79it/s]


Macro F1@X: 0.725 ± 0.24
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 515/515 [00:07<00:00, 70.03it/s]


MRR@10 mean: 0.9759 ± 0.12


Calculating R-precision: 100%|██████████| 515/515 [00:07<00:00, 71.32it/s]


R precision mean: 0.6809 ± 0.26


Calculating Macro-F1@X: 100%|██████████| 515/515 [00:07<00:00, 69.45it/s]


Macro F1@X: 0.7077 ± 0.29
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0



In [87]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_updatedclipstylemodels("BLIPConditional", city, mode="image")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # print(text_train_embeddings.shape)
    # print(text_test_embeddings.shape)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.08 seconds.


Calculating MRR@10: 100%|██████████| 1017/1017 [00:21<00:00, 47.19it/s]


MRR@10 mean: 0.2049 ± 0.32


Calculating R-precision: 100%|██████████| 1017/1017 [00:21<00:00, 47.88it/s]


R precision mean: 0.0563 ± 0.13


Calculating Macro-F1@X: 100%|██████████| 1017/1017 [00:21<00:00, 46.94it/s]


Macro F1@X: 0.0805 ± 0.18
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 605/605 [00:07<00:00, 80.57it/s]


MRR@10 mean: 0.1855 ± 0.31


Calculating R-precision: 100%|██████████| 605/605 [00:07<00:00, 80.12it/s]


R precision mean: 0.0569 ± 0.13


Calculating Macro-F1@X: 100%|██████████| 605/605 [00:07<00:00, 80.14it/s] 


Macro F1@X: 0.0776 ± 0.16
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 312/312 [00:01<00:00, 205.63it/s]


MRR@10 mean: 0.2488 ± 0.39


Calculating R-precision: 100%|██████████| 312/312 [00:01<00:00, 206.48it/s]


R precision mean: 0.1001 ± 0.18


Calculating Macro-F1@X: 100%|██████████| 312/312 [00:01<00:00, 205.85it/s]


Macro F1@X: 0.1074 ± 0.2
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 357/357 [00:02<00:00, 136.68it/s]


MRR@10 mean: 0.245 ± 0.36


Calculating R-precision: 100%|██████████| 357/357 [00:02<00:00, 132.61it/s]


R precision mean: 0.1115 ± 0.2


Calculating Macro-F1@X: 100%|██████████| 357/357 [00:02<00:00, 129.72it/s]


Macro F1@X: 0.1088 ± 0.19
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0



In [None]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_updatedclipstylemodels("BLIPConditional", city, mode="image")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    # print(text_train_embeddings.shape)
    # print(text_test_embeddings.shape)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

In [59]:
from sklearn.preprocessing import normalize

mean, std = 0, 0
model_type = "BLIP2"
    
for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels(model_type, city, mode="image")
    if model_type == "CLIPITM" or model_type == "BLIP2":
        text_train_embeddings = text_train_embeddings.mean(axis=1)
        text_train_embeddings = normalize(text_train_embeddings, norm='l2', axis=1)
        text_train_embeddings = torch.tensor(text_train_embeddings.astype('float32'))
        
        text_test_embeddings = text_test_embeddings.mean(axis=1)
        text_test_embeddings = normalize(text_test_embeddings, norm='l2', axis=1)
        text_test_embeddings = torch.tensor(text_test_embeddings.astype('float32'))

        
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.06 seconds.


Calculating MRR@10: 100%|██████████| 1226/1226 [00:26<00:00, 47.01it/s]


MRR@10 mean: 0.3425 ± 0.36


Calculating R-precision: 100%|██████████| 1226/1226 [00:26<00:00, 46.61it/s]


R precision mean: 0.108 ± 0.18


Calculating Macro-F1@X: 100%|██████████| 1226/1226 [00:26<00:00, 46.40it/s]


Macro F1@X: 0.135 ± 0.22
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 783/783 [00:09<00:00, 81.85it/s]


MRR@10 mean: 0.3353 ± 0.37


Calculating R-precision: 100%|██████████| 783/783 [00:09<00:00, 80.50it/s]


R precision mean: 0.1194 ± 0.18


Calculating Macro-F1@X: 100%|██████████| 783/783 [00:09<00:00, 79.48it/s]


Macro F1@X: 0.1346 ± 0.2
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 397/397 [00:01<00:00, 205.96it/s]


MRR@10 mean: 0.3967 ± 0.41


Calculating R-precision: 100%|██████████| 397/397 [00:01<00:00, 201.95it/s]


R precision mean: 0.1699 ± 0.22


Calculating Macro-F1@X: 100%|██████████| 397/397 [00:01<00:00, 200.53it/s]


Macro F1@X: 0.1973 ± 0.26
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 472/472 [00:03<00:00, 137.66it/s]


MRR@10 mean: 0.4182 ± 0.42


Calculating R-precision: 100%|██████████| 472/472 [00:03<00:00, 131.89it/s]


R precision mean: 0.2232 ± 0.29


Calculating Macro-F1@X: 100%|██████████| 472/472 [00:03<00:00, 131.84it/s]


Macro F1@X: 0.2086 ± 0.27
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0



In [123]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("CLIPITM", city, mode="text")
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 947/947 [00:09<00:00, 99.21it/s] 


MRR@10 mean: 0.7615 ± 0.38


Calculating R-precision: 100%|██████████| 947/947 [00:09<00:00, 98.25it/s] 


R precision mean: 0.4956 ± 0.34


Calculating Macro-F1@X: 100%|██████████| 947/947 [00:09<00:00, 100.16it/s]


Macro F1@X: 0.5154 ± 0.34
Error processing south: I/O operation on closed file.

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 618/618 [00:03<00:00, 161.44it/s]


MRR@10 mean: 0.6809 ± 0.43


Calculating R-precision: 100%|██████████| 618/618 [00:03<00:00, 163.04it/s]


R precision mean: 0.5681 ± 0.38


Calculating Macro-F1@X: 100%|██████████| 618/618 [00:03<00:00, 169.75it/s]


Macro F1@X: 0.5671 ± 0.37
Error processing midwest: I/O operation on closed file.

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 271/271 [00:00<00:00, 385.81it/s]


MRR@10 mean: 0.8077 ± 0.38


Calculating R-precision: 100%|██████████| 271/271 [00:00<00:00, 395.25it/s]


R precision mean: 0.7315 ± 0.36


Calculating Macro-F1@X: 100%|██████████| 271/271 [00:00<00:00, 386.51it/s]


Macro F1@X: 0.7267 ± 0.3
Error processing west: I/O operation on closed file.

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 310/310 [00:00<00:00, 482.58it/s]


MRR@10 mean: 0.7557 ± 0.41


Calculating R-precision: 100%|██████████| 310/310 [00:00<00:00, 483.13it/s]


R precision mean: 0.679 ± 0.38


Calculating Macro-F1@X: 100%|██████████| 310/310 [00:00<00:00, 497.60it/s]

Macro F1@X: 0.7029 ± 0.37
Error processing northeast: I/O operation on closed file.

Average score: 0.0 ± 0.0






In [17]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("BLIP2", city, mode="text")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 947/947 [00:09<00:00, 99.69it/s] 


MRR@10 mean: 0.6795 ± 0.42


Calculating R-precision: 100%|██████████| 947/947 [00:09<00:00, 99.87it/s] 


R precision mean: 0.4264 ± 0.34


Calculating Macro-F1@X: 100%|██████████| 947/947 [00:09<00:00, 101.63it/s]


Macro F1@X: 0.444 ± 0.34
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 618/618 [00:03<00:00, 160.45it/s]


MRR@10 mean: 0.6189 ± 0.46


Calculating R-precision: 100%|██████████| 618/618 [00:03<00:00, 163.63it/s]


R precision mean: 0.4999 ± 0.39


Calculating Macro-F1@X: 100%|██████████| 618/618 [00:03<00:00, 163.73it/s]


Macro F1@X: 0.5059 ± 0.38
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 271/271 [00:00<00:00, 359.64it/s]


MRR@10 mean: 0.7534 ± 0.42


Calculating R-precision: 100%|██████████| 271/271 [00:00<00:00, 379.81it/s]


R precision mean: 0.6516 ± 0.38


Calculating Macro-F1@X: 100%|██████████| 271/271 [00:00<00:00, 376.49it/s]


Macro F1@X: 0.6498 ± 0.33
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 310/310 [00:00<00:00, 472.19it/s]


MRR@10 mean: 0.7113 ± 0.44


Calculating R-precision: 100%|██████████| 310/310 [00:00<00:00, 487.49it/s]


R precision mean: 0.5998 ± 0.4


Calculating Macro-F1@X: 100%|██████████| 310/310 [00:00<00:00, 486.74it/s]

Macro F1@X: 0.6306 ± 0.39
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0






In [23]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_newclipstylemodels("BigCLIP", city, mode="text")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 215/215 [00:02<00:00, 98.58it/s] 


MRR@10 mean: 0.0047 ± 0.07


Calculating R-precision: 100%|██████████| 215/215 [00:02<00:00, 97.66it/s] 


R precision mean: 0.0022 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 215/215 [00:02<00:00, 97.83it/s] 


Macro F1@X: 0.003 ± 0.01
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 147/147 [00:00<00:00, 163.68it/s]


MRR@10 mean: 0.0 ± 0.0


Calculating R-precision: 100%|██████████| 147/147 [00:00<00:00, 164.65it/s]


R precision mean: 0.003 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 147/147 [00:00<00:00, 164.33it/s]


Macro F1@X: 0.0044 ± 0.01
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 51/51 [00:00<00:00, 343.43it/s]


MRR@10 mean: 0.0051 ± 0.03


Calculating R-precision: 100%|██████████| 51/51 [00:00<00:00, 333.04it/s]


R precision mean: 0.0112 ± 0.03


Calculating Macro-F1@X: 100%|██████████| 51/51 [00:00<00:00, 345.11it/s]


Macro F1@X: 0.0261 ± 0.04
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 32/32 [00:00<00:00, 562.35it/s]


MRR@10 mean: 0.0 ± 0.0


Calculating R-precision: 100%|██████████| 32/32 [00:00<00:00, 567.78it/s]


R precision mean: 0.002 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 32/32 [00:00<00:00, 571.31it/s]

Macro F1@X: 0.0016 ± 0.01
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0






# Image Retrieval 

In [137]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings("CE", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)

    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embedddings_for_e2e("CE+SupCon", city, mode="image")

    # test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    # test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    # train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    # train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 1078/1078 [00:22<00:00, 47.71it/s]


MRR@10 mean: 0.4038 ± 0.38


Calculating R-precision: 100%|██████████| 1078/1078 [00:22<00:00, 48.74it/s]


R precision mean: 0.1115 ± 0.15


Calculating Macro-F1@X: 100%|██████████| 1078/1078 [00:22<00:00, 48.05it/s]


Macro F1@X: 0.1152 ± 0.17
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 636/636 [00:07<00:00, 84.75it/s] 


MRR@10 mean: 0.3905 ± 0.4


Calculating R-precision: 100%|██████████| 636/636 [00:07<00:00, 82.90it/s] 


R precision mean: 0.1141 ± 0.16


Calculating Macro-F1@X: 100%|██████████| 636/636 [00:07<00:00, 83.58it/s] 


Macro F1@X: 0.1049 ± 0.14
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 332/332 [00:01<00:00, 235.08it/s]


MRR@10 mean: 0.4603 ± 0.45


Calculating R-precision: 100%|██████████| 332/332 [00:01<00:00, 240.15it/s]


R precision mean: 0.1768 ± 0.21


Calculating Macro-F1@X: 100%|██████████| 332/332 [00:01<00:00, 238.22it/s]


Macro F1@X: 0.1739 ± 0.21
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 383/383 [00:02<00:00, 146.57it/s]


MRR@10 mean: 0.4509 ± 0.42


Calculating R-precision: 100%|██████████| 383/383 [00:02<00:00, 146.33it/s]


R precision mean: 0.1646 ± 0.19


Calculating Macro-F1@X: 100%|██████████| 383/383 [00:02<00:00, 140.35it/s]


Macro F1@X: 0.1493 ± 0.18


In [72]:
mean, std = 0, 0


for city in ["south" ,"midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embedddings_for_e2e("CE+SupCon", city, mode="multimodal")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = find_members(image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels)
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embedddings_for_e2e("CE+SupCon", city, mode="image")

    # test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    # test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    # train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    # train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    # train_embeddings = torch.mean(train_embeddings, dim=1)

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.27 seconds.


Calculating MRR@10: 100%|██████████| 1413/1413 [01:20<00:00, 17.65it/s]


MRR@10 mean: 0.9859 ± 0.1


Calculating R-precision: 100%|██████████| 1413/1413 [01:20<00:00, 17.56it/s]


R precision mean: 0.9248 ± 0.14


Calculating Macro-F1@X: 100%|██████████| 1413/1413 [01:20<00:00, 17.50it/s]


Macro F1@X: 0.9595 ± 0.12
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.12 seconds.


Calculating MRR@10: 100%|██████████| 976/976 [00:29<00:00, 33.12it/s]


MRR@10 mean: 0.9658 ± 0.15


Calculating R-precision: 100%|██████████| 976/976 [00:29<00:00, 33.01it/s]


R precision mean: 0.6567 ± 0.3


Calculating Macro-F1@X: 100%|██████████| 976/976 [00:29<00:00, 33.04it/s]


Macro F1@X: 0.5671 ± 0.33
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 490/490 [00:06<00:00, 80.56it/s] 


MRR@10 mean: 0.9834 ± 0.11


Calculating R-precision: 100%|██████████| 490/490 [00:06<00:00, 78.32it/s] 


R precision mean: 0.7861 ± 0.25


Calculating Macro-F1@X: 100%|██████████| 490/490 [00:06<00:00, 77.47it/s] 


Macro F1@X: 0.756 ± 0.26
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 567/567 [00:07<00:00, 80.02it/s] 


MRR@10 mean: 0.9735 ± 0.13


Calculating R-precision: 100%|██████████| 567/567 [00:07<00:00, 77.09it/s] 


R precision mean: 0.7178 ± 0.3


Calculating Macro-F1@X: 100%|██████████| 567/567 [00:07<00:00, 77.13it/s] 


Macro F1@X: 0.7333 ± 0.3


In [129]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("CLIP", city, mode="image")
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")

    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.15 seconds.


Calculating MRR@10: 100%|██████████| 1231/1231 [00:26<00:00, 46.19it/s]


MRR@10 mean: 0.232 ± 0.3


Calculating R-precision: 100%|██████████| 1231/1231 [00:26<00:00, 46.61it/s]


R precision mean: 0.0537 ± 0.09


Calculating Macro-F1@X: 100%|██████████| 1231/1231 [00:26<00:00, 45.83it/s]


Macro F1@X: 0.0689 ± 0.11
Error processing south: I/O operation on closed file.

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 791/791 [00:09<00:00, 82.19it/s] 


MRR@10 mean: 0.2312 ± 0.32


Calculating R-precision: 100%|██████████| 791/791 [00:09<00:00, 82.89it/s] 


R precision mean: 0.0752 ± 0.13


Calculating Macro-F1@X: 100%|██████████| 791/791 [00:09<00:00, 82.56it/s] 


Macro F1@X: 0.0892 ± 0.14
Error processing midwest: I/O operation on closed file.

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 399/399 [00:01<00:00, 201.84it/s]


MRR@10 mean: 0.2969 ± 0.39


Calculating R-precision: 100%|██████████| 399/399 [00:02<00:00, 197.36it/s]


R precision mean: 0.1275 ± 0.17


Calculating Macro-F1@X: 100%|██████████| 399/399 [00:02<00:00, 195.50it/s]


Macro F1@X: 0.1415 ± 0.19
Error processing west: I/O operation on closed file.

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 475/475 [00:03<00:00, 130.83it/s]


MRR@10 mean: 0.2945 ± 0.37


Calculating R-precision: 100%|██████████| 475/475 [00:03<00:00, 124.41it/s]


R precision mean: 0.1143 ± 0.16


Calculating Macro-F1@X: 100%|██████████| 475/475 [00:03<00:00, 124.38it/s]

Macro F1@X: 0.1072 ± 0.15
Error processing northeast: I/O operation on closed file.

Average score: 0.0 ± 0.0






In [130]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("CLIPITM", city, mode="image")
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings = torch.mean(text_train_embeddings, dim=1)
    text_test_embeddings = torch.mean(text_test_embeddings, dim=1)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.08 seconds.


Calculating MRR@10: 100%|██████████| 1231/1231 [00:25<00:00, 47.80it/s]


MRR@10 mean: 0.0069 ± 0.06


Calculating R-precision: 100%|██████████| 1231/1231 [00:25<00:00, 47.51it/s]


R precision mean: 0.0038 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 1231/1231 [00:26<00:00, 46.93it/s]


Macro F1@X: 0.0049 ± 0.02
Error processing south: I/O operation on closed file.

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 791/791 [00:09<00:00, 83.06it/s] 


MRR@10 mean: 0.0076 ± 0.07


Calculating R-precision: 100%|██████████| 791/791 [00:09<00:00, 82.00it/s] 


R precision mean: 0.004 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 791/791 [00:09<00:00, 82.21it/s] 


Macro F1@X: 0.0072 ± 0.02
Error processing midwest: I/O operation on closed file.

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 399/399 [00:01<00:00, 200.05it/s]


MRR@10 mean: 0.0168 ± 0.09


Calculating R-precision: 100%|██████████| 399/399 [00:02<00:00, 198.11it/s]


R precision mean: 0.0078 ± 0.03


Calculating Macro-F1@X: 100%|██████████| 399/399 [00:02<00:00, 192.32it/s]


Macro F1@X: 0.0125 ± 0.03
Error processing west: I/O operation on closed file.

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 475/475 [00:03<00:00, 128.05it/s]


MRR@10 mean: 0.015 ± 0.09


Calculating R-precision: 100%|██████████| 475/475 [00:03<00:00, 123.35it/s]


R precision mean: 0.0064 ± 0.03


Calculating Macro-F1@X: 100%|██████████| 475/475 [00:03<00:00, 126.65it/s]

Macro F1@X: 0.0094 ± 0.03
Error processing northeast: I/O operation on closed file.

Average score: 0.0 ± 0.0






In [131]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("BLIP2", city, mode="image")
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings = torch.mean(text_train_embeddings, dim=1)
    text_test_embeddings = torch.mean(text_test_embeddings, dim=1)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.08 seconds.


Calculating MRR@10: 100%|██████████| 1231/1231 [00:26<00:00, 46.91it/s]


MRR@10 mean: 0.151 ± 0.27


Calculating R-precision: 100%|██████████| 1231/1231 [00:26<00:00, 47.31it/s]


R precision mean: 0.0557 ± 0.14


Calculating Macro-F1@X: 100%|██████████| 1231/1231 [00:25<00:00, 47.49it/s]


Macro F1@X: 0.0856 ± 0.2
Error processing south: I/O operation on closed file.

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.04 seconds.


Calculating MRR@10: 100%|██████████| 791/791 [00:09<00:00, 84.19it/s] 


MRR@10 mean: 0.1494 ± 0.29


Calculating R-precision: 100%|██████████| 791/791 [00:09<00:00, 82.86it/s] 


R precision mean: 0.0558 ± 0.14


Calculating Macro-F1@X: 100%|██████████| 791/791 [00:09<00:00, 82.97it/s] 


Macro F1@X: 0.0762 ± 0.16
Error processing midwest: I/O operation on closed file.

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 399/399 [00:01<00:00, 201.64it/s]


MRR@10 mean: 0.1867 ± 0.34


Calculating R-precision: 100%|██████████| 399/399 [00:02<00:00, 192.62it/s]


R precision mean: 0.0802 ± 0.16


Calculating Macro-F1@X: 100%|██████████| 399/399 [00:02<00:00, 188.31it/s]


Macro F1@X: 0.1047 ± 0.21
Error processing west: I/O operation on closed file.

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 475/475 [00:03<00:00, 128.66it/s]


MRR@10 mean: 0.1894 ± 0.34


Calculating R-precision: 100%|██████████| 475/475 [00:03<00:00, 127.59it/s]


R precision mean: 0.1056 ± 0.2


Calculating Macro-F1@X: 100%|██████████| 475/475 [00:03<00:00, 124.99it/s]


Macro F1@X: 0.1063 ± 0.2
Error processing northeast: I/O operation on closed file.

Average score: 0.0 ± 0.0



# Image-Text Retrieval

In [101]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels_ids("CLIP", city, mode="text")
    image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings_for_clipstylemodels_ids("CLIP", city, mode="image")

    test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    train_labels = torch.cat((image_train_labels, image_test_labels), 0)

    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.09 seconds.


Calculating MRR@10: 100%|██████████| 13677/13677 [04:11<00:00, 54.41it/s]


MRR@10 mean: 0.001 ± 0.03


Calculating R-precision: 100%|██████████| 13677/13677 [02:23<00:00, 95.54it/s] 


R precision mean: 0.0017 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 13677/13677 [02:22<00:00, 95.70it/s] 


Macro F1@X: 0.0017 ± 0.02
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 8285/8285 [01:22<00:00, 100.83it/s]


MRR@10 mean: 0.0027 ± 0.05


Calculating R-precision: 100%|██████████| 8285/8285 [00:42<00:00, 194.83it/s]


R precision mean: 0.0049 ± 0.04


Calculating Macro-F1@X: 100%|██████████| 8285/8285 [00:42<00:00, 195.25it/s]


Macro F1@X: 0.0049 ± 0.04
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 3158/3158 [00:10<00:00, 291.36it/s]


MRR@10 mean: 0.0063 ± 0.08


Calculating R-precision: 100%|██████████| 3158/3158 [00:05<00:00, 607.20it/s] 


R precision mean: 0.0103 ± 0.06


Calculating Macro-F1@X: 100%|██████████| 3158/3158 [00:05<00:00, 609.87it/s] 


Macro F1@X: 0.0103 ± 0.06
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 2558/2558 [00:13<00:00, 196.37it/s]


MRR@10 mean: 0.0098 ± 0.1


Calculating R-precision: 100%|██████████| 2558/2558 [00:08<00:00, 287.17it/s]


R precision mean: 0.0104 ± 0.06


Calculating Macro-F1@X: 100%|██████████| 2558/2558 [00:08<00:00, 289.59it/s]


Macro F1@X: 0.0104 ± 0.06


In [102]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels_ids("CLIPITM", city, mode="text")
    image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings_for_clipstylemodels_ids("CLIPITM", city, mode="image")    
    
    test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    train_embeddings = torch.mean(train_embeddings, dim=1)
    
    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.11 seconds.


Calculating MRR@10: 100%|██████████| 13677/13677 [04:11<00:00, 54.42it/s]


MRR@10 mean: 0.0001 ± 0.01


Calculating R-precision: 100%|██████████| 13677/13677 [02:23<00:00, 95.51it/s] 


R precision mean: 0.0001 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 13677/13677 [02:22<00:00, 95.70it/s] 


Macro F1@X: 0.0001 ± 0.01
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 8285/8285 [01:21<00:00, 101.05it/s]


MRR@10 mean: 0.0 ± 0.0


Calculating R-precision: 100%|██████████| 8285/8285 [00:42<00:00, 194.65it/s]


R precision mean: 0.0003 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 8285/8285 [00:42<00:00, 195.08it/s]


Macro F1@X: 0.0003 ± 0.01
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 3158/3158 [00:10<00:00, 292.35it/s]


MRR@10 mean: 0.0003 ± 0.02


Calculating R-precision: 100%|██████████| 3158/3158 [00:05<00:00, 606.52it/s] 


R precision mean: 0.001 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 3158/3158 [00:05<00:00, 610.05it/s] 


Macro F1@X: 0.001 ± 0.02
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 2558/2558 [00:12<00:00, 197.49it/s]


MRR@10 mean: 0.0008 ± 0.03


Calculating R-precision: 100%|██████████| 2558/2558 [00:08<00:00, 288.99it/s]


R precision mean: 0.0005 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 2558/2558 [00:08<00:00, 289.75it/s]


Macro F1@X: 0.0005 ± 0.01


In [103]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels_ids("BLIP2", city, mode="text")
    image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings_for_clipstylemodels_ids("BLIP2", city, mode="image")

    test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    test_labels = torch.cat((text_train_labels, text_test_labels), 0)

    train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    train_labels = torch.cat((image_train_labels, image_test_labels), 0)
    
    # Performing mean pooling
    train_embeddings = torch.mean(train_embeddings, dim=1)

    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.10 seconds.


Calculating MRR@10: 100%|██████████| 13677/13677 [04:11<00:00, 54.42it/s]


MRR@10 mean: 0.0001 ± 0.01


Calculating R-precision: 100%|██████████| 13677/13677 [02:23<00:00, 95.64it/s] 


R precision mean: 0.0001 ± 0.0


Calculating Macro-F1@X: 100%|██████████| 13677/13677 [02:22<00:00, 95.74it/s] 


Macro F1@X: 0.0001 ± 0.0
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 8285/8285 [01:21<00:00, 101.06it/s]


MRR@10 mean: 0.0001 ± 0.01


Calculating R-precision: 100%|██████████| 8285/8285 [00:42<00:00, 194.71it/s]


R precision mean: 0.0002 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 8285/8285 [00:42<00:00, 195.04it/s]


Macro F1@X: 0.0002 ± 0.01
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 3158/3158 [00:10<00:00, 292.22it/s]


MRR@10 mean: 0.0003 ± 0.02


Calculating R-precision: 100%|██████████| 3158/3158 [00:05<00:00, 607.33it/s] 


R precision mean: 0.0013 ± 0.03


Calculating Macro-F1@X: 100%|██████████| 3158/3158 [00:05<00:00, 610.68it/s] 


Macro F1@X: 0.0013 ± 0.03
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 2558/2558 [00:12<00:00, 197.42it/s]


MRR@10 mean: 0.0004 ± 0.02


Calculating R-precision: 100%|██████████| 2558/2558 [00:08<00:00, 289.18it/s]


R precision mean: 0.0006 ± 0.01


Calculating Macro-F1@X: 100%|██████████| 2558/2558 [00:08<00:00, 290.06it/s]

Macro F1@X: 0.0006 ± 0.01





In [59]:
text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels_ids("CLIP", "south", mode="text")
image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings_for_clipstylemodels_ids("CLIP", "south", mode="image")

test_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
test_labels = torch.cat((text_train_labels, text_test_labels), 0)

train_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
train_labels = torch.cat((image_train_labels, image_test_labels), 0)

In [67]:
np.unique(test_embeddings, axis=1).shape

(65544, 768)

In [63]:
train_embeddings.shape

torch.Size([65544, 768])

In [19]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("CLIPITM", city, mode="text")
    image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings_for_clipstylemodels("CLIPITM", city, mode="image")

    train_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    train_labels = torch.cat((text_train_labels, text_test_labels), 0)

    test_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    test_labels = torch.cat((image_train_labels, image_test_labels), 0)

    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.25 seconds.


Calculating MRR@10: 100%|██████████| 1434/1434 [05:01<00:00,  4.76it/s]


MRR@10 mean: 0.0007 ± 0.03


Calculating R-precision: 100%|██████████| 1434/1434 [05:07<00:00,  4.67it/s]


R precision mean: 0.0008 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 1434/1434 [05:03<00:00,  4.72it/s]


Macro F1@X: 0.0011 ± 0.02
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.16 seconds.


Calculating MRR@10: 100%|██████████| 992/992 [01:39<00:00,  9.95it/s]


MRR@10 mean: 0.001 ± 0.03


Calculating R-precision: 100%|██████████| 992/992 [01:41<00:00,  9.80it/s]


R precision mean: 0.0012 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 992/992 [01:42<00:00,  9.67it/s]


Macro F1@X: 0.0026 ± 0.02
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 499/499 [00:16<00:00, 30.01it/s]


MRR@10 mean: 0.002 ± 0.04


Calculating R-precision: 100%|██████████| 499/499 [00:17<00:00, 28.88it/s]


R precision mean: 0.002 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 499/499 [00:17<00:00, 28.28it/s]


Macro F1@X: 0.0034 ± 0.03
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.07 seconds.


Calculating MRR@10: 100%|██████████| 574/574 [00:19<00:00, 29.23it/s]


MRR@10 mean: 0.002 ± 0.03


Calculating R-precision: 100%|██████████| 574/574 [00:19<00:00, 28.90it/s]


R precision mean: 0.002 ± 0.03


Calculating Macro-F1@X: 100%|██████████| 574/574 [00:19<00:00, 28.74it/s]


Macro F1@X: 0.0035 ± 0.03


In [20]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("BLIP2", city, mode="text")
    image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings_for_clipstylemodels("BLIP2", city, mode="image")

    train_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    train_labels = torch.cat((text_train_labels, text_test_labels), 0)

    test_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    test_labels = torch.cat((image_train_labels, image_test_labels), 0)

    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.22 seconds.


Calculating MRR@10: 100%|██████████| 1434/1434 [05:02<00:00,  4.74it/s]


MRR@10 mean: 0.0007 ± 0.03


Calculating R-precision: 100%|██████████| 1434/1434 [05:12<00:00,  4.59it/s]


R precision mean: 0.0008 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 1434/1434 [05:25<00:00,  4.41it/s]


Macro F1@X: 0.0033 ± 0.03
--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.17 seconds.


Calculating MRR@10: 100%|██████████| 992/992 [01:40<00:00,  9.91it/s]


MRR@10 mean: 0.001 ± 0.03


Calculating R-precision: 100%|██████████| 992/992 [01:41<00:00,  9.75it/s]


R precision mean: 0.001 ± 0.02


Calculating Macro-F1@X: 100%|██████████| 992/992 [01:42<00:00,  9.70it/s]


Macro F1@X: 0.0056 ± 0.03
--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.05 seconds.


Calculating MRR@10: 100%|██████████| 499/499 [00:16<00:00, 30.45it/s]


MRR@10 mean: 0.002 ± 0.04


Calculating R-precision: 100%|██████████| 499/499 [00:17<00:00, 29.15it/s]


R precision mean: 0.0024 ± 0.04


Calculating Macro-F1@X: 100%|██████████| 499/499 [00:18<00:00, 27.70it/s]


Macro F1@X: 0.012 ± 0.09
--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.06 seconds.


Calculating MRR@10: 100%|██████████| 574/574 [00:19<00:00, 29.32it/s]


MRR@10 mean: 0.0035 ± 0.06


Calculating R-precision: 100%|██████████| 574/574 [00:19<00:00, 29.11it/s]


R precision mean: 0.002 ± 0.03


Calculating Macro-F1@X: 100%|██████████| 574/574 [00:19<00:00, 28.90it/s]


Macro F1@X: 0.0063 ± 0.06


In [106]:
mean, std = 0, 0


for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)

    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings("CE+SupCon", city, mode="text")
    image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon", city, mode="image")

    train_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
    train_labels = torch.cat((text_train_labels, text_test_labels), 0)

    test_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
    test_labels = torch.cat((image_train_labels, image_test_labels), 0)

    initialize_globals(train_embeddings, train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
        # print(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        file.write(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del train_embeddings, train_labels, test_embeddings, test_labels
    gc.collect()

--------------------------------------------------south--------------------------------------------------


FileNotFoundError: [Errno 2] No such file or directory: '/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CE+SupCon/south_labels_train.pt'

In [None]:
mean, std = 0, 0

# Open the file in write mode
with open("CE+SupCon-noAug.txt", "w") as file:
    for city in ["chicago", "dallas", "houston", "detroit", "atlanta", "sf", "ny"]:
        file.write("-" * 50 + "\n")
        
        text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings("CE+SupCon-noAug", city, mode="text")
        image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon-noAug", city, mode="image")
        
        train_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
        train_labels = torch.cat((text_train_labels, text_test_labels), 0)
        
        test_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
        test_labels = torch.cat((image_train_labels, image_test_labels), 0)
        
        initialize_globals(train_embeddings, train_labels)
    
        try:
            # mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
            # mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
            # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
            mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
            file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
            mean += mean_temp
            std += std_temp
        except Exception as e:
            file.write(f"Error processing {city}: {e}\n")
    
        # Clear cache and free memory
        del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
        del image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels
        del train_embeddings, train_labels, test_embeddings, test_labels
        gc.collect()
            
    file.write(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

In [None]:
mean, std = 0, 0

# Open the file in write mode
with open("CE+SupCon-bs:28-neg:20-gradsteps:1.txt", "w") as file:
    for city in ["chicago", "dallas", "houston", "detroit", "atlanta", "sf", "ny"]:
        file.write("-" * 50 + "\n")
        
        text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings("CE+SupCon-bs:28-neg:20-gradsteps:1", city, mode="text")
        image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon-bs:28-neg:20-gradsteps:1", city, mode="image")
        
        train_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
        train_labels = torch.cat((text_train_labels, text_test_labels), 0)
        
        test_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
        test_labels = torch.cat((image_train_labels, image_test_labels), 0)
        
        initialize_globals(train_embeddings, train_labels)
    
        try:
            # mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
            # mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
            # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
            mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
            file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
            mean += mean_temp
            std += std_temp
        except Exception as e:
            file.write(f"Error processing {city}: {e}\n")
    
        # Clear cache and free memory
        del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
        del image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels
        del train_embeddings, train_labels, test_embeddings, test_labels
        gc.collect()
            
    file.write(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

In [None]:
mean, std = 0, 0

# Open the file in write mode
with open("CE+SupCon:2.txt", "w") as file:
    for city in ["chicago", "dallas", "houston", "detroit", "atlanta", "sf", "ny"]:
        file.write("-" * 50 + "\n")
        
        text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings("CE+SupCon:2", city, mode="text")
        image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon:2", city, mode="image")
        
        train_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
        train_labels = torch.cat((text_train_labels, text_test_labels), 0)
        
        test_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
        test_labels = torch.cat((image_train_labels, image_test_labels), 0)
        
        initialize_globals(train_embeddings, train_labels)
    
        try:
            # mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
            # mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
            # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
            mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
            file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
            mean += mean_temp
            std += std_temp
        except Exception as e:
            file.write(f"Error processing {city}: {e}\n")
    
        # Clear cache and free memory
        del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
        del image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels
        del train_embeddings, train_labels, test_embeddings, test_labels
        gc.collect()
            
    file.write(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

In [None]:
mean, std = 0, 0

# Open the file in write mode
with open("CE-concat.txt", "w") as file:
    for city in ["chicago", "dallas", "houston", "detroit", "atlanta", "sf", "ny"]:
        file.write("-" * 50 + "\n")
        
        text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings("CE-concat", city, mode="text")
        image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE-concat", city, mode="image")
        
        train_embeddings = torch.cat((text_train_embeddings, text_test_embeddings), 0)
        train_labels = torch.cat((text_train_labels, text_test_labels), 0)
        
        test_embeddings = torch.cat((image_train_embeddings, image_test_embeddings), 0)
        test_labels = torch.cat((image_train_labels, image_test_labels), 0)
        
        initialize_globals(train_embeddings, train_labels)
    
        try:
            # mean_temp, std_temp = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
            # mean_temp, std_temp = generate_rprecision_results(test_embeddings, test_labels)
            # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
            mean_temp, std_temp  = generate_macro_f1_at_x_results(test_embeddings, test_labels)
            file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
            mean += mean_temp
            std += std_temp
        except Exception as e:
            file.write(f"Error processing {city}: {e}\n")
    
        # Clear cache and free memory
        del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
        del image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels
        del train_embeddings, train_labels, test_embeddings, test_labels
        gc.collect()
            
    file.write(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

In [70]:
mean, std = 0, 0

for city in ["south", "midwest", "west", "northeast"]:
    print("-" * 50 + city + "-" * 50)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = load_embeddings_for_clipstylemodels("CLIP", city, mode="multimodal")
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings_for_clipstylemodels("CLIP", city, mode="image")
    
    # image_train_embeddings, image_train_labels, image_test_embeddings, image_test_labels = load_embeddings("CE+SupCon+ITM", city, mode="image")
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = remove_duplicate_embeddings(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels = find_members(text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels)
    
    initialize_globals(text_train_embeddings, text_train_labels)

    try:
        mean_temp, std_temp = generate_mrr_at_1_results(text_test_embeddings, text_test_labels, 10)
        mean_temp, std_temp = generate_rprecision_results(text_test_embeddings, text_test_labels)
        # _ = generate_macro_rprecision_results(test_embeddings, test_labels)
        mean_temp, std_temp  = generate_macro_f1_at_x_results(text_test_embeddings, text_test_labels)
        file.write(f"{city}-mean:{round(mean_temp, 4)} ± {round(std_temp, 2)}\n")
        mean += mean_temp
        std += std_temp
    except Exception as e:
        print(f"Error processing {city}: {e}\n")

    # Clear cache and free memory
    del text_train_embeddings, text_train_labels, text_test_embeddings, text_test_labels
    gc.collect()
            
print(f"Average score: {round(mean/7, 4)} ± {round(std/7, 2)}\n")

--------------------------------------------------south--------------------------------------------------
Global variables initialized in 0.19 seconds.


Calculating MRR@10: 100%|██████████| 1338/1338 [01:20<00:00, 16.61it/s]


MRR@10 mean: 0.6574 ± 0.35


Calculating R-precision: 100%|██████████| 1338/1338 [01:21<00:00, 16.41it/s]


R precision mean: 0.1797 ± 0.16


Calculating Macro-F1@X: 100%|██████████| 1338/1338 [01:22<00:00, 16.23it/s]


Macro F1@X: 0.2519 ± 0.23
Error processing south: name 'file' is not defined

--------------------------------------------------midwest--------------------------------------------------
Global variables initialized in 0.10 seconds.


Calculating MRR@10: 100%|██████████| 914/914 [00:30<00:00, 30.42it/s]


MRR@10 mean: 0.6822 ± 0.36


Calculating R-precision: 100%|██████████| 914/914 [00:30<00:00, 30.44it/s]


R precision mean: 0.2373 ± 0.2


Calculating Macro-F1@X: 100%|██████████| 914/914 [00:30<00:00, 30.36it/s]


Macro F1@X: 0.3254 ± 0.26
Error processing midwest: name 'file' is not defined

--------------------------------------------------west--------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 452/452 [00:06<00:00, 73.08it/s]


MRR@10 mean: 0.7396 ± 0.36


Calculating R-precision: 100%|██████████| 452/452 [00:06<00:00, 72.72it/s]


R precision mean: 0.333 ± 0.23


Calculating Macro-F1@X: 100%|██████████| 452/452 [00:06<00:00, 71.67it/s]


Macro F1@X: 0.4687 ± 0.27
Error processing west: name 'file' is not defined

--------------------------------------------------northeast--------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 514/514 [00:07<00:00, 72.21it/s]


MRR@10 mean: 0.675 ± 0.38


Calculating R-precision: 100%|██████████| 514/514 [00:07<00:00, 72.45it/s]


R precision mean: 0.3076 ± 0.24


Calculating Macro-F1@X: 100%|██████████| 514/514 [00:07<00:00, 71.52it/s]


Macro F1@X: 0.3493 ± 0.26
Error processing northeast: name 'file' is not defined

Average score: 0.0 ± 0.0



# Error Analysis

In [25]:
def load_embedddings_for_e2e(model_name, city, mode="text"):
    assert mode in ["text", "image", "multimodal"]
    
    # Define directory mapping for models
    model_dirs = {
        "declutr-vit": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/trained_declutr_vit/CE+SupCon",
        "CE-SupCon-mean": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/multimodal_baselines/E2E/CE-SupCon-mean-0.5",
        "declutr-vit-face": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/error_analysis/multimodal_baseline/trained_declutr-vit/face",
        "declutr-vit-noface": "/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/error_analysis/multimodal_baseline/trained_declutr-vit/noface",
    }
    
    # Check if model_name is valid
    if model_name not in model_dirs:
        raise ValueError(f"Model '{model_name}' not implemented")
        
    emb_dir = model_dirs[model_name]
    
    if model_name == "declutr-vit":
        filenames = {
            "train_emb": f"{city}_{mode}data_train.pt",
            "train_labels": f"{city}_labels_{mode}_train.pt",
            "test_emb": f"{city}_{mode}data_test.pt",
            "test_labels": f"{city}_labels_{mode}_test.pt",
        }
    else:
        filenames = {
            "train_emb": f"{city}_{mode}data_train.pt",
            "train_labels": f"{city}_labels_train.pt",
            "test_emb": f"{city}_{mode}data_test.pt",
            "test_labels": f"{city}_labels_test.pt",
        }
    
    # Load embeddings and labels
    train_emb = torch.load(os.path.join(emb_dir, filenames["train_emb"]), map_location=torch.device('cpu'))
    train_labels = torch.load(os.path.join(emb_dir, filenames["train_labels"]), map_location=torch.device('cpu'))
    test_emb = torch.load(os.path.join(emb_dir, filenames["test_emb"]), map_location=torch.device('cpu'))
    test_labels = torch.load(os.path.join(emb_dir, filenames["test_labels"]), map_location=torch.device('cpu'))
    
    return train_emb, train_labels, test_emb, test_labels

In [26]:
def generate_rprecision_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    r_precision_score = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating R-precision"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]

        r_precision_score[vendor_id] = round(np.mean(recall_at_k(true_label_list, predicted_label_list, k)), 4)
    
    return r_precision_score

def generate_mrr_at_1_results(test_embeddings, test_labels, k):
    unique_labels = torch.unique(test_labels)
    mrr_score_dict = {}

    for label in tqdm(unique_labels, total=len(unique_labels), desc=f"Calculating MRR@{k}"):
        label_id = int(label)
        test_idx = (test_labels == label_id).nonzero(as_tuple=True)[0]

        if len(test_idx) == 0:
            continue

        test_embeddings_np = test_embeddings[test_idx].numpy()

        D, I = index.search(test_embeddings_np, k)

        predicted_label_list = [train_labels_tensor[I[index]] for index in range(len(test_idx))]
        true_label_list = [train_labels_tensor[torch.where(train_labels_tensor == label_id)[0]] for _ in range(len(test_idx))]

        mrr_score = mrr_at_k(true_label_list, predicted_label_list, k_=k)
        mrr_score_dict[label_id] = round(mrr_score, 4)

    return mrr_score_dict


def generate_macro_f1_at_x_results(test_embeddings, test_labels):
    unique_vendors = torch.unique(test_labels)
    train_labels_np = train_labels_tensor.numpy()
    vendor_dict = {int(vendor_id): np.sum(train_labels_np == int(vendor_id)) for vendor_id in unique_vendors}
    f1_score_dict = {}

    for vendor_id in tqdm(unique_vendors, total=len(unique_vendors), desc="Calculating Macro-F1@X"):
        vendor_id = int(vendor_id)
        test_adsidx = (test_labels == vendor_id).nonzero(as_tuple=True)[0]

        if len(test_adsidx) == 0 or vendor_dict[vendor_id] == 0:
            continue

        test_vendor_embeddings = test_embeddings[test_adsidx]
        if test_vendor_embeddings.ndim == 1:
            test_vendor_embeddings = test_vendor_embeddings.unsqueeze(0)
        test_vendor_embeddings_np = test_vendor_embeddings.numpy()

        k = vendor_dict[vendor_id]
        if k > train_embeddings_np.shape[0]:
            k = train_embeddings_np.shape[0]

        _, I = index.search(test_vendor_embeddings_np, int(k))

        predicted_label_list = [torch.tensor(I[index]) for index in range(len(test_adsidx))]
        true_label_list = [torch.where(train_labels_tensor == vendor_id)[0] for _ in range(len(test_adsidx))]

        f1_score = np.mean(f1_at_k(true_label_list, predicted_label_list, k))
        f1_score_dict[vendor_id] = round(f1_score, 4)

    return f1_score_dict

In [27]:
# Function to convert data to JSON-serializable format
def convert_to_json_serializable(data):
    """
    Recursively convert NumPy and PyTorch objects in dictionaries to Python-native types.
    """
    if isinstance(data, dict):
        return {k: convert_to_json_serializable(v) for k, v in data.items()}
    elif isinstance(data, (np.ndarray, np.generic)):
        return data.tolist()
    elif isinstance(data, torch.Tensor):
        return data.detach().cpu().numpy().tolist()
    elif isinstance(data, set):
        return list(data)
    return data

In [28]:
import json

def process_and_save_metrics(model_name, city, mode_):
    print("-" * 50 + f" Model: {model_name}, City: {city}, Mode: {mode_} " + "-" * 50)

    try:
        # Load embeddings
        train_embeddings, train_labels, test_embeddings, test_labels = load_embedddings_for_e2e(model_name, city, mode=mode_)
    except Exception as e:
        print(f"Error loading embeddings for {city}, {mode_}: {e}")
        return

    # Preprocess embeddings
    train_embeddings, train_labels, test_embeddings, test_labels = remove_duplicate_embeddings(train_embeddings, train_labels, test_embeddings, test_labels)
    train_embeddings, train_labels, test_embeddings, test_labels = find_members(train_embeddings, train_labels, test_embeddings, test_labels)

    # Initialize FAISS globals
    initialize_globals(train_embeddings, train_labels)

    try:
        # Generate metrics
        mrr_dict = generate_mrr_at_1_results(test_embeddings, test_labels, 10)
        rprecision_dict = generate_rprecision_results(test_embeddings, test_labels)
        macro_f1_dict = generate_macro_f1_at_x_results(test_embeddings, test_labels)

        # Print the size of the generated dictionaries
        print(f"Generated MRR: {len(mrr_dict)} keys")
        print(f"Generated R-Precision: {len(rprecision_dict)} keys")
        print(f"Generated Macro-F1: {len(macro_f1_dict)} keys")
    except Exception as e:
        print(f"Error generating metrics for {city}, {mode_}: {e}")
        return

    # Check if dictionaries are valid before saving
    if not mrr_dict:
        print(f"Skipping saving MRR for {model_name}_{city}_{mode_}: Empty or None.")
    else:
        mrr_dict = convert_to_json_serializable(mrr_dict)
        with open(f'../error_analysis/{model_name}_{city}_{mode_}_mrr.json', 'w') as json_file:
            json.dump(mrr_dict, json_file, indent=4)

    if not rprecision_dict:
        print(f"Skipping saving R-Precision for {model_name}_{city}_{mode_}: Empty or None.")
    else:
        rprecision_dict = convert_to_json_serializable(rprecision_dict)
        with open(f'../error_analysis/{model_name}_{city}_{mode_}_rprecision.json', 'w') as json_file:
            json.dump(rprecision_dict, json_file, indent=4)

    if not macro_f1_dict:
        print(f"Skipping saving Macro-F1 for {model_name}_{city}_{mode_}: Empty or None.")
    else:
        macro_f1_dict = convert_to_json_serializable(macro_f1_dict)
        with open(f'../error_analysis/{model_name}_{city}_{mode_}_macro.json', 'w') as json_file:
            json.dump(macro_f1_dict, json_file, indent=4)

    # Free memory
    del mrr_dict, rprecision_dict, macro_f1_dict
    gc.collect()

In [29]:
for city in ["south", "midwest", "west", "northeast"]:
    for mode_ in ["text", "image", "multimodal"]:
        process_and_save_metrics("CE-SupCon-mean", city, mode_)

-------------------------------------------------- Model: CE-SupCon-mean, City: south, Mode: text --------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 1098/1098 [00:10<00:00, 106.92it/s]
Calculating R-precision: 100%|██████████| 1098/1098 [00:10<00:00, 108.09it/s]
Calculating Macro-F1@X: 100%|██████████| 1098/1098 [00:10<00:00, 106.59it/s]


Generated MRR: 1098 keys
Generated R-Precision: 1098 keys
Generated Macro-F1: 1098 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: south, Mode: image --------------------------------------------------
Global variables initialized in 0.06 seconds.


Calculating MRR@10: 100%|██████████| 731/731 [00:16<00:00, 44.03it/s]
Calculating R-precision: 100%|██████████| 731/731 [00:16<00:00, 43.08it/s]
Calculating Macro-F1@X: 100%|██████████| 731/731 [00:16<00:00, 43.32it/s]


Generated MRR: 731 keys
Generated R-Precision: 731 keys
Generated Macro-F1: 731 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: south, Mode: multimodal --------------------------------------------------
Global variables initialized in 0.20 seconds.


Calculating MRR@10: 100%|██████████| 1098/1098 [01:08<00:00, 15.95it/s]
Calculating R-precision: 100%|██████████| 1098/1098 [01:08<00:00, 15.94it/s]
Calculating Macro-F1@X: 100%|██████████| 1098/1098 [01:09<00:00, 15.77it/s]


Generated MRR: 1098 keys
Generated R-Precision: 1098 keys
Generated Macro-F1: 1098 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: midwest, Mode: text --------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 672/672 [00:03<00:00, 168.46it/s]
Calculating R-precision: 100%|██████████| 672/672 [00:04<00:00, 164.66it/s]
Calculating Macro-F1@X: 100%|██████████| 672/672 [00:04<00:00, 163.37it/s]


Generated MRR: 672 keys
Generated R-Precision: 672 keys
Generated Macro-F1: 672 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: midwest, Mode: image --------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 399/399 [00:05<00:00, 72.90it/s]
Calculating R-precision: 100%|██████████| 399/399 [00:05<00:00, 70.47it/s]
Calculating Macro-F1@X: 100%|██████████| 399/399 [00:05<00:00, 72.16it/s]


Generated MRR: 399 keys
Generated R-Precision: 399 keys
Generated Macro-F1: 399 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: midwest, Mode: multimodal --------------------------------------------------
Global variables initialized in 0.09 seconds.


Calculating MRR@10: 100%|██████████| 672/672 [00:23<00:00, 28.12it/s]
Calculating R-precision: 100%|██████████| 672/672 [00:24<00:00, 27.90it/s]
Calculating Macro-F1@X: 100%|██████████| 672/672 [00:24<00:00, 27.59it/s]


Generated MRR: 672 keys
Generated R-Precision: 672 keys
Generated Macro-F1: 672 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: west, Mode: text --------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 299/299 [00:00<00:00, 328.23it/s]
Calculating R-precision: 100%|██████████| 299/299 [00:00<00:00, 354.41it/s]
Calculating Macro-F1@X: 100%|██████████| 299/299 [00:00<00:00, 365.21it/s]


Generated MRR: 299 keys
Generated R-Precision: 299 keys
Generated Macro-F1: 299 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: west, Mode: image --------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 165/165 [00:01<00:00, 146.23it/s]
Calculating R-precision: 100%|██████████| 165/165 [00:01<00:00, 153.95it/s]
Calculating Macro-F1@X: 100%|██████████| 165/165 [00:01<00:00, 148.87it/s]


Generated MRR: 165 keys
Generated R-Precision: 165 keys
Generated Macro-F1: 165 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: west, Mode: multimodal --------------------------------------------------
Global variables initialized in 0.02 seconds.


Calculating MRR@10: 100%|██████████| 299/299 [00:05<00:00, 58.35it/s]
Calculating R-precision: 100%|██████████| 299/299 [00:05<00:00, 56.77it/s]
Calculating Macro-F1@X: 100%|██████████| 299/299 [00:05<00:00, 57.22it/s]


Generated MRR: 299 keys
Generated R-Precision: 299 keys
Generated Macro-F1: 299 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: northeast, Mode: text --------------------------------------------------
Global variables initialized in 0.00 seconds.


Calculating MRR@10: 100%|██████████| 408/408 [00:00<00:00, 583.32it/s]
Calculating R-precision: 100%|██████████| 408/408 [00:00<00:00, 584.24it/s]
Calculating Macro-F1@X: 100%|██████████| 408/408 [00:00<00:00, 566.62it/s]


Generated MRR: 408 keys
Generated R-Precision: 408 keys
Generated Macro-F1: 408 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: northeast, Mode: image --------------------------------------------------
Global variables initialized in 0.01 seconds.


Calculating MRR@10: 100%|██████████| 261/261 [00:02<00:00, 103.41it/s]
Calculating R-precision: 100%|██████████| 261/261 [00:02<00:00, 99.63it/s] 
Calculating Macro-F1@X: 100%|██████████| 261/261 [00:02<00:00, 101.67it/s]


Generated MRR: 261 keys
Generated R-Precision: 261 keys
Generated Macro-F1: 261 keys
-------------------------------------------------- Model: CE-SupCon-mean, City: northeast, Mode: multimodal --------------------------------------------------
Global variables initialized in 0.03 seconds.


Calculating MRR@10: 100%|██████████| 408/408 [00:06<00:00, 64.75it/s]
Calculating R-precision: 100%|██████████| 408/408 [00:06<00:00, 63.09it/s]
Calculating Macro-F1@X: 100%|██████████| 408/408 [00:06<00:00, 61.07it/s]


Generated MRR: 408 keys
Generated R-Precision: 408 keys
Generated Macro-F1: 408 keys
