In [2]:
import torch
import scipy
from torchmetrics.retrieval import RetrievalNormalizedDCG, RetrievalMAP
from src.dataset import TestDataset, OnlineCoverSongDataset
from src.evaluation import RetrievalEvaluation
from src.baselines.blocking import Blocker
from rapidfuzz import fuzz


mAP = RetrievalMAP(empty_target_action="skip")


fuzzy_valset = TestDataset(
        "shs100k2_val",
        "/data/csi_datasets/",
        "/data/yt_metadata.parquet",
        tokenizer="roberta-base"
        )



  from .autonotebook import tqdm as notebook_tqdm


# Fuzzy Matching

In [2]:
# get audio preds
audio_preds = fuzzy_valset.get_csi_pred_matrix("coverhunter").to("cuda")


# get text preds
blocker = Blocker(blocking_func=fuzz.token_ratio, threshold=0.5)
left_df, right_df = fuzzy_valset.get_dfs_by_task("svShort")
text_preds = blocker.predict(left_df, right_df).to("cuda")
text_preds = text_preds.fill_diagonal_(-float('inf')) / 100
text_preds

# get ground truth
Y = fuzzy_valset.get_target_matrix().to(float).to("cuda")

m, n = Y.shape
indexes = torch.arange(m).view(-1, 1).expand(-1, n).to("cuda")


## Grid Search

In [6]:
import numpy as np

def predict(x1,x2, w1):
    return (w1 * x1 + (1 - w1) * x2)

for i in np.arange(0.1, 1, 0.1):
    preds = predict(text_preds, audio_preds, i)
    map_result = mAP(preds, Y, indexes)
    print(f"MAP: {map_result} for text weight of {i}")


RuntimeError: The size of tensor a (8619) must match the size of tensor b (8819) at non-singleton dimension 1

# S-BERT

In [8]:
sbert_valset = OnlineCoverSongDataset(
        "shs100k2_test",
        "/data/csi_datasets/",
        "/data/yt_metadata.parquet",
        "tvShort"
)  


# get audio preds
audio_preds = sbert_valset.get_csi_pred_matrix("coverhunter").to("cuda")


# get text preds
text_preds = torch.load("preds/sentence-transformers/shs100k2_test/preds.pt")
text_preds = text_preds.fill_diagonal_(-float('inf')) / 100
text_preds

# get ground truth
Y = sbert_valset.get_target_matrix().to(float).to("cuda")

m, n = Y.shape
indexes = torch.arange(m).view(-1, 1).expand(-1, n).to("cuda")



In [10]:

for i in np.arange(0.1, 1, 0.05):
    preds = predict(text_preds, audio_preds, i)
    map_result = mAP(preds, Y, indexes)
    print(f"MAP: {map_result} for text weight of {i}")
    

MAP: 0.9231217503547668 for text weight of 0.1
MAP: 0.9243493676185608 for text weight of 0.2
MAP: 0.9255906343460083 for text weight of 0.30000000000000004
MAP: 0.9273486733436584 for text weight of 0.4
MAP: 0.9295580387115479 for text weight of 0.5
MAP: 0.9328274130821228 for text weight of 0.6
MAP: 0.9377639293670654 for text weight of 0.7000000000000001
MAP: 0.9464327096939087 for text weight of 0.8
MAP: 0.9627877473831177 for text weight of 0.9
