In [1]:
import datatable as dt

# Variables that contains the file location
from files import *
from functions import *

In [2]:
# if we modify the file we need to reload it with this
import importlib
import functions  #import the module here, so that it can be reloaded.

importlib.reload(functions)

<module 'functions' from '/DATA/myprograms_Python/MMSR/project/functions.py'>

In [3]:
DATA_TYPE = np.float16

# Data
Load the Data

In [4]:
genres = dt.fread(file_genres_2).to_pandas()
genres.set_index('id', inplace=True)

In [5]:
# Our special child
blf_logfluc = dt.fread(file_blf_logfluc)
blf_logfluc[dt.float64] = dt.float32
new_cols = ['id']
new_cols.extend(list(blf_logfluc.names[2:]))
new_cols = tuple(new_cols)
del blf_logfluc[:, -1]
blf_logfluc.names = new_cols
blf_logfluc = blf_logfluc.to_pandas()
blf_logfluc.set_index('id', inplace=True)

In [6]:
files = [
    # Lyrics
    file_tfidf_2,
    file_word2vec_2,
    file_bert_2,
    # Audio
    # file_essentia, # zeros
    file_blf_correlation,
    file_blf_deltaspectral,
    file_blf_spectral,
    file_blf_spectralcontrast,
    file_blf_vardeltaspectral,
    # file_blf_logfluc, # zeros
    blf_logfluc,
    file_mfcc_bow,
    file_mfcc_stats,
    # Video
    file_incp,
    file_resnet,
    file_vgg19,
]

In [7]:
functions = [get_cosine_similarity]

In [8]:
common_index = blf_logfluc.index.values

In [9]:
import csv


def load_data(file):
    if type(file) == str:
        with open(file, "r") as f:
            column_names = next(csv.reader(f, delimiter="\t"))
        dtypes = {
            x: np.float32
            for x in column_names
            if x != "id"
        }
        file = pd.read_csv(file, dtype=dtypes, index_col=0, delimiter="\t")
    file = file.loc[common_index]
    assert np.mean(file.index.values == blf_logfluc.index.values) == 1.0
    return file

In [10]:
# Extract readable names
names = []
for file in files:
    if type(file) == str:
        names.append(file.replace("./../task2/id_", "").replace(".tsv", ""))
    else:
        names.append("blf_logfluc")

# Late Fusion

In [11]:
def compute_in_batches_topIds(results: np.array, idx_values: np.array, top: int = 100, batches: int = 1):
    splits_b = np.array_split(results, batches, axis=0)
    return np.concatenate([idx_values[np.argsort(b * -1, axis=1)][:, :top] for b in tqdm(splits_b)], axis=0)

# Find optimal feature similarity weights

In [12]:
subset = np.random.choice(len(blf_logfluc), 1024 * 4, replace=False)
subset.shape

(4096,)

In [13]:
subset_ids = blf_logfluc.index.values[subset]

similarities = []

# for every data
i = 0
for d in tqdm(files, desc="Processing"):
    file = load_data(d).to_numpy()[subset, :].astype(np.float32)
    for f in functions:
        result = np.nan_to_num(f(file, file), copy=False)
        similarities.append(result)
    i += 1

print(f"{len(similarities)} similarity matrices")

Processing: 100%|██████████| 14/14 [02:57<00:00, 12.71s/it]

14 similarity matrices





In [14]:
def evaluate(similarities, weights):
    if len(similarities) == 1:
        accumulated_similarity = similarities[0]
    else:
        accumulated_similarity = np.zeros((len(subset), len(subset)))
        for sim, weight in zip(similarities, weights):
            accumulated_similarity += (sim * weight)

    top_ids = pd.DataFrame(compute_in_batches_topIds(accumulated_similarity, subset_ids, 100, 10), subset_ids)

    return {
        "MAP@10": meanAveragePrecision(top_ids, 10, genres),
        "MRR@10": meanReciprocalRank(top_ids, 10, genres),
        "NDCG@10": ndcgMean(top_ids, 10, genres)[1],
    }

In [15]:
print("Baseline", evaluate(similarities, np.zeros((len(similarities),))))

100%|██████████| 10/10 [00:00<00:00, 38.44it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1670.96it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1833.02it/s]
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  nd

Baseline {'MAP@10': 0.5228031456203177, 'MRR@10': 0.6751056973896329, 'NDCG@10': nan}





In [16]:
print("Fair", evaluate(similarities, np.ones((len(similarities),))))

100%|██████████| 10/10 [00:00<00:00, 11.59it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1822.90it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1947.76it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1810.08it/s]

Fair {'MAP@10': 0.8581923691107302, 'MRR@10': 1.0, 'NDCG@10': 0.9060482474110523}





In [17]:
weights = np.zeros((len(similarities),))
for i, sim in enumerate(similarities):
    result = np.nan_to_num(np.asarray(list(evaluate([sim], [1]).values())), copy=False)
    weights[i] = np.mean(result, where=result > 0)
print(weights)

100%|██████████| 10/10 [00:00<00:00, 11.81it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1791.64it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1913.49it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1779.93it/s]
100%|██████████| 10/10 [00:00<00:00, 11.65it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1799.75it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1914.23it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1766.82it/s]
100%|██████████| 10/10 [00:00<00:00, 11.78it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1803.17it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1923.61it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1792.48it/s]
100%|██████████| 10/10 [00:00<00:00, 11.76it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1792.09it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1913.25it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1770.24it/s]
100%|██████████| 10/10 [00:00<00:00, 11.77it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1797.87it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1903.75it/s]
100%

[0.89834306 0.8990285  0.90240972 0.90317382 0.90478601 0.91075214
 0.9158896  0.90650437 0.91416988 0.90965728 0.90672442 0.90500545
 0.90552266 0.90272872]





In [18]:
print("Performance", evaluate(similarities, weights))

100%|██████████| 10/10 [00:00<00:00, 11.48it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1828.35it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1930.65it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1809.65it/s]

Performance {'MAP@10': 0.858436658363794, 'MRR@10': 1.0, 'NDCG@10': 0.9062485966343592}





In [19]:
norm_weights = (weights - weights.min()) / (weights.max() - weights.min())
print(norm_weights)
print("Normalized Performance", evaluate(similarities, norm_weights))

[0.         0.03906398 0.23176407 0.27531116 0.36719205 0.70720943
 1.         0.4651235  0.90199083 0.64481174 0.47766435 0.37969793
 0.40917449 0.24994444]


100%|██████████| 10/10 [00:00<00:00, 11.56it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1801.05it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1921.70it/s]
100%|██████████| 4096/4096 [00:02<00:00, 1796.48it/s]

Normalized Performance {'MAP@10': 0.8599203391254143, 'MRR@10': 1.0, 'NDCG@10': 0.9064573547900158}





In [20]:
del similarities

# Final Processing
Normalized Performance seems to be the best approach

In [21]:
final_ids = blf_logfluc.index.values
final_accumulated_similarity = np.zeros((len(final_ids), len(final_ids)), dtype=DATA_TYPE)

In [22]:
# for every data
batches = 100
i = 0
for d in tqdm(files, desc="Processing"):
    file = load_data(d).to_numpy()
    for f in functions:
        splits = np.array_split(file, batches, axis=0)
        y = 0
        for b in tqdm(splits):
            result = f(file, b).astype(np.float32)

            result = np.nan_to_num(result, copy=False)

            # normalize
            np.subtract(result, result.mean(), out=result)

            # normalize std and apply weight
            result = np.multiply(result, norm_weights[i] / result[::64, ::64].std(), out=result)

            final_accumulated_similarity[:, y:y + b.shape[0]] += result
            y += b.shape[0]
        print("Sanity Check", final_accumulated_similarity[:1024, :1024].mean())
        i += 1

Processing:   0%|          | 0/14 [00:00<?, ?it/s]
  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:23,  1.19it/s][A
  2%|▏         | 2/100 [00:01<01:19,  1.23it/s][A
  3%|▎         | 3/100 [00:02<01:18,  1.24it/s][A
  4%|▍         | 4/100 [00:03<01:16,  1.25it/s][A
  5%|▌         | 5/100 [00:04<01:16,  1.24it/s][A
  6%|▌         | 6/100 [00:04<01:15,  1.24it/s][A
  7%|▋         | 7/100 [00:05<01:15,  1.24it/s][A
  8%|▊         | 8/100 [00:06<01:13,  1.25it/s][A
  9%|▉         | 9/100 [00:07<01:12,  1.25it/s][A
 10%|█         | 10/100 [00:08<01:11,  1.25it/s][A
 11%|█         | 11/100 [00:08<01:11,  1.25it/s][A
 12%|█▏        | 12/100 [00:09<01:09,  1.26it/s][A
 13%|█▎        | 13/100 [00:10<01:09,  1.25it/s][A
 14%|█▍        | 14/100 [00:11<01:08,  1.26it/s][A
 15%|█▌        | 15/100 [00:12<01:08,  1.25it/s][A
 16%|█▌        | 16/100 [00:12<01:06,  1.26it/s][A
 17%|█▋        | 17/100 [00:13<01:06,  1.25it/s][A
 18%|█▊        | 18/100 [00:14<

Sanity Check 0.0



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<00:59,  1.66it/s][A
  2%|▏         | 2/100 [00:01<00:58,  1.66it/s][A
  3%|▎         | 3/100 [00:01<00:57,  1.69it/s][A
  4%|▍         | 4/100 [00:02<00:56,  1.70it/s][A
  5%|▌         | 5/100 [00:02<00:55,  1.70it/s][A
  6%|▌         | 6/100 [00:03<00:55,  1.70it/s][A
  7%|▋         | 7/100 [00:04<00:54,  1.71it/s][A
  8%|▊         | 8/100 [00:04<00:53,  1.71it/s][A
  9%|▉         | 9/100 [00:05<00:53,  1.72it/s][A
 10%|█         | 10/100 [00:05<00:53,  1.69it/s][A
 11%|█         | 11/100 [00:06<00:52,  1.68it/s][A
 12%|█▏        | 12/100 [00:07<00:51,  1.69it/s][A
 13%|█▎        | 13/100 [00:07<00:51,  1.68it/s][A
 14%|█▍        | 14/100 [00:08<00:50,  1.69it/s][A
 15%|█▌        | 15/100 [00:08<00:50,  1.70it/s][A
 16%|█▌        | 16/100 [00:09<00:49,  1.71it/s][A
 17%|█▋        | 17/100 [00:10<00:48,  1.71it/s][A
 18%|█▊        | 18/100 [00:10<00:47,  1.72it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check 0.000456



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:07,  1.46it/s][A
  2%|▏         | 2/100 [00:01<01:07,  1.45it/s][A
  3%|▎         | 3/100 [00:02<01:06,  1.46it/s][A
  4%|▍         | 4/100 [00:02<01:07,  1.43it/s][A
  5%|▌         | 5/100 [00:03<01:08,  1.38it/s][A
  6%|▌         | 6/100 [00:04<01:07,  1.40it/s][A
  7%|▋         | 7/100 [00:04<01:05,  1.42it/s][A
  8%|▊         | 8/100 [00:05<01:04,  1.43it/s][A
  9%|▉         | 9/100 [00:06<01:03,  1.43it/s][A
 10%|█         | 10/100 [00:07<01:03,  1.42it/s][A
 11%|█         | 11/100 [00:07<01:02,  1.43it/s][A
 12%|█▏        | 12/100 [00:08<01:01,  1.43it/s][A
 13%|█▎        | 13/100 [00:09<01:00,  1.44it/s][A
 14%|█▍        | 14/100 [00:09<00:59,  1.44it/s][A
 15%|█▌        | 15/100 [00:10<00:58,  1.45it/s][A
 16%|█▌        | 16/100 [00:11<00:57,  1.45it/s][A
 17%|█▋        | 17/100 [00:11<00:57,  1.45it/s][A
 18%|█▊        | 18/100 [00:12<00:56,  1.46it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check 0.00344



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:23,  1.19it/s][A
  2%|▏         | 2/100 [00:01<01:24,  1.16it/s][A
  3%|▎         | 3/100 [00:02<01:21,  1.18it/s][A
  4%|▍         | 4/100 [00:03<01:20,  1.19it/s][A
  5%|▌         | 5/100 [00:04<01:20,  1.17it/s][A
  6%|▌         | 6/100 [00:05<01:19,  1.18it/s][A
  7%|▋         | 7/100 [00:05<01:18,  1.19it/s][A
  8%|▊         | 8/100 [00:06<01:18,  1.17it/s][A
  9%|▉         | 9/100 [00:07<01:16,  1.19it/s][A
 10%|█         | 10/100 [00:08<01:16,  1.18it/s][A
 11%|█         | 11/100 [00:09<01:15,  1.17it/s][A
 12%|█▏        | 12/100 [00:10<01:14,  1.18it/s][A
 13%|█▎        | 13/100 [00:10<01:13,  1.18it/s][A
 14%|█▍        | 14/100 [00:11<01:14,  1.16it/s][A
 15%|█▌        | 15/100 [00:12<01:12,  1.17it/s][A
 16%|█▌        | 16/100 [00:13<01:11,  1.18it/s][A
 17%|█▋        | 17/100 [00:14<01:11,  1.16it/s][A
 18%|█▊        | 18/100 [00:15<01:09,  1.18it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check 0.01214



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:23,  1.18it/s][A
  2%|▏         | 2/100 [00:01<01:22,  1.18it/s][A
  3%|▎         | 3/100 [00:02<01:23,  1.16it/s][A
  4%|▍         | 4/100 [00:03<01:21,  1.18it/s][A
  5%|▌         | 5/100 [00:04<01:20,  1.18it/s][A
  6%|▌         | 6/100 [00:05<01:20,  1.17it/s][A
  7%|▋         | 7/100 [00:05<01:18,  1.18it/s][A
  8%|▊         | 8/100 [00:06<01:17,  1.18it/s][A
  9%|▉         | 9/100 [00:07<01:17,  1.17it/s][A
 10%|█         | 10/100 [00:08<01:16,  1.18it/s][A
 11%|█         | 11/100 [00:09<01:15,  1.18it/s][A
 12%|█▏        | 12/100 [00:10<01:15,  1.16it/s][A
 13%|█▎        | 13/100 [00:11<01:14,  1.17it/s][A
 14%|█▍        | 14/100 [00:11<01:12,  1.18it/s][A
 15%|█▌        | 15/100 [00:12<01:12,  1.17it/s][A
 16%|█▌        | 16/100 [00:13<01:11,  1.17it/s][A
 17%|█▋        | 17/100 [00:14<01:10,  1.18it/s][A
 18%|█▊        | 18/100 [00:15<01:10,  1.16it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check 0.00544



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:13,  1.35it/s][A
  2%|▏         | 2/100 [00:01<01:12,  1.35it/s][A
  3%|▎         | 3/100 [00:02<01:12,  1.34it/s][A
  4%|▍         | 4/100 [00:02<01:11,  1.35it/s][A
  5%|▌         | 5/100 [00:03<01:10,  1.35it/s][A
  6%|▌         | 6/100 [00:04<01:09,  1.35it/s][A
  7%|▋         | 7/100 [00:05<01:08,  1.35it/s][A
  8%|▊         | 8/100 [00:05<01:08,  1.35it/s][A
  9%|▉         | 9/100 [00:06<01:07,  1.34it/s][A
 10%|█         | 10/100 [00:07<01:07,  1.34it/s][A
 11%|█         | 11/100 [00:08<01:06,  1.35it/s][A
 12%|█▏        | 12/100 [00:08<01:05,  1.34it/s][A
 13%|█▎        | 13/100 [00:09<01:04,  1.35it/s][A
 14%|█▍        | 14/100 [00:10<01:03,  1.35it/s][A
 15%|█▌        | 15/100 [00:11<01:03,  1.35it/s][A
 16%|█▌        | 16/100 [00:11<01:02,  1.35it/s][A
 17%|█▋        | 17/100 [00:12<01:01,  1.35it/s][A
 18%|█▊        | 18/100 [00:13<01:01,  1.34it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check 0.007996



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:08,  1.45it/s][A
  2%|▏         | 2/100 [00:01<01:07,  1.46it/s][A
  3%|▎         | 3/100 [00:02<01:06,  1.45it/s][A
  4%|▍         | 4/100 [00:02<01:06,  1.45it/s][A
  5%|▌         | 5/100 [00:03<01:05,  1.46it/s][A
  6%|▌         | 6/100 [00:04<01:04,  1.46it/s][A
  7%|▋         | 7/100 [00:04<01:03,  1.47it/s][A
  8%|▊         | 8/100 [00:05<01:02,  1.47it/s][A
  9%|▉         | 9/100 [00:06<01:01,  1.47it/s][A
 10%|█         | 10/100 [00:06<01:01,  1.46it/s][A
 11%|█         | 11/100 [00:07<01:00,  1.46it/s][A
 12%|█▏        | 12/100 [00:08<01:00,  1.47it/s][A
 13%|█▎        | 13/100 [00:08<00:59,  1.46it/s][A
 14%|█▍        | 14/100 [00:09<00:58,  1.46it/s][A
 15%|█▌        | 15/100 [00:10<00:58,  1.46it/s][A
 16%|█▌        | 16/100 [00:10<00:57,  1.46it/s][A
 17%|█▋        | 17/100 [00:11<00:56,  1.46it/s][A
 18%|█▊        | 18/100 [00:12<00:56,  1.46it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check -0.01791



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:23,  1.19it/s][A
  2%|▏         | 2/100 [00:01<01:22,  1.19it/s][A
  3%|▎         | 3/100 [00:02<01:20,  1.20it/s][A
  4%|▍         | 4/100 [00:03<01:20,  1.19it/s][A
  5%|▌         | 5/100 [00:04<01:19,  1.20it/s][A
  6%|▌         | 6/100 [00:05<01:18,  1.19it/s][A
  7%|▋         | 7/100 [00:05<01:17,  1.20it/s][A
  8%|▊         | 8/100 [00:06<01:16,  1.20it/s][A
  9%|▉         | 9/100 [00:07<01:15,  1.20it/s][A
 10%|█         | 10/100 [00:08<01:15,  1.19it/s][A
 11%|█         | 11/100 [00:09<01:13,  1.20it/s][A
 12%|█▏        | 12/100 [00:10<01:13,  1.19it/s][A
 13%|█▎        | 13/100 [00:10<01:12,  1.20it/s][A
 14%|█▍        | 14/100 [00:11<01:12,  1.19it/s][A
 15%|█▌        | 15/100 [00:12<01:10,  1.20it/s][A
 16%|█▌        | 16/100 [00:13<01:10,  1.20it/s][A
 17%|█▋        | 17/100 [00:14<01:08,  1.21it/s][A
 18%|█▊        | 18/100 [00:15<01:08,  1.20it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check -0.01153



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:02<03:41,  2.24s/it][A
  2%|▏         | 2/100 [00:04<03:40,  2.25s/it][A
  3%|▎         | 3/100 [00:06<03:41,  2.28s/it][A
  4%|▍         | 4/100 [00:09<03:41,  2.31s/it][A
  5%|▌         | 5/100 [00:11<03:40,  2.32s/it][A
  6%|▌         | 6/100 [00:13<03:43,  2.38s/it][A
  7%|▋         | 7/100 [00:16<03:40,  2.37s/it][A
  8%|▊         | 8/100 [00:18<03:37,  2.37s/it][A
  9%|▉         | 9/100 [00:20<03:32,  2.34s/it][A
 10%|█         | 10/100 [00:23<03:33,  2.37s/it][A
 11%|█         | 11/100 [00:25<03:31,  2.37s/it][A
 12%|█▏        | 12/100 [00:28<03:28,  2.37s/it][A
 13%|█▎        | 13/100 [00:30<03:24,  2.35s/it][A
 14%|█▍        | 14/100 [00:32<03:24,  2.38s/it][A
 15%|█▌        | 15/100 [00:35<03:21,  2.37s/it][A
 16%|█▌        | 16/100 [00:37<03:19,  2.37s/it][A
 17%|█▋        | 17/100 [00:40<03:17,  2.39s/it][A
 18%|█▊        | 18/100 [00:42<03:14,  2.38s/it][A
 19%|█▉        | 19/100 [00:4

Sanity Check -0.0699



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<01:32,  1.07it/s][A
  2%|▏         | 2/100 [00:01<01:16,  1.29it/s][A
  3%|▎         | 3/100 [00:02<01:13,  1.32it/s][A
  4%|▍         | 4/100 [00:02<01:08,  1.39it/s][A
  5%|▌         | 5/100 [00:03<01:09,  1.36it/s][A
  6%|▌         | 6/100 [00:04<01:09,  1.35it/s][A
  7%|▋         | 7/100 [00:05<01:06,  1.40it/s][A
  8%|▊         | 8/100 [00:05<01:03,  1.46it/s][A
  9%|▉         | 9/100 [00:06<01:01,  1.49it/s][A
 10%|█         | 10/100 [00:07<00:59,  1.51it/s][A
 11%|█         | 11/100 [00:07<01:00,  1.46it/s][A
 12%|█▏        | 12/100 [00:08<00:59,  1.49it/s][A
 13%|█▎        | 13/100 [00:09<00:58,  1.50it/s][A
 14%|█▍        | 14/100 [00:09<01:01,  1.41it/s][A
 15%|█▌        | 15/100 [00:10<00:59,  1.42it/s][A
 16%|█▌        | 16/100 [00:11<00:57,  1.46it/s][A
 17%|█▋        | 17/100 [00:11<00:56,  1.48it/s][A
 18%|█▊        | 18/100 [00:12<00:54,  1.51it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check -0.0746



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<00:50,  1.95it/s][A
  2%|▏         | 2/100 [00:01<00:51,  1.90it/s][A
  3%|▎         | 3/100 [00:01<00:52,  1.86it/s][A
  4%|▍         | 4/100 [00:02<00:50,  1.90it/s][A
  5%|▌         | 5/100 [00:02<00:49,  1.90it/s][A
  6%|▌         | 6/100 [00:03<00:50,  1.87it/s][A
  7%|▋         | 7/100 [00:03<00:48,  1.90it/s][A
  8%|▊         | 8/100 [00:04<00:48,  1.91it/s][A
  9%|▉         | 9/100 [00:04<00:48,  1.87it/s][A
 10%|█         | 10/100 [00:05<00:47,  1.89it/s][A
 11%|█         | 11/100 [00:05<00:47,  1.88it/s][A
 12%|█▏        | 12/100 [00:06<00:47,  1.84it/s][A
 13%|█▎        | 13/100 [00:06<00:46,  1.87it/s][A
 14%|█▍        | 14/100 [00:07<00:45,  1.88it/s][A
 15%|█▌        | 15/100 [00:07<00:45,  1.86it/s][A
 16%|█▌        | 16/100 [00:08<00:44,  1.89it/s][A
 17%|█▋        | 17/100 [00:09<00:43,  1.89it/s][A
 18%|█▊        | 18/100 [00:09<00:43,  1.86it/s][A
 19%|█▉        | 19/100 [00:1

Sanity Check -0.08374



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:03<05:10,  3.14s/it][A
  2%|▏         | 2/100 [00:06<04:58,  3.04s/it][A
  3%|▎         | 3/100 [00:09<04:49,  2.98s/it][A
  4%|▍         | 4/100 [00:12<04:47,  3.00s/it][A
  5%|▌         | 5/100 [00:15<04:49,  3.05s/it][A
  6%|▌         | 6/100 [00:18<04:41,  2.99s/it][A
  7%|▋         | 7/100 [00:21<04:42,  3.04s/it][A
  8%|▊         | 8/100 [00:24<04:46,  3.11s/it][A
  9%|▉         | 9/100 [00:27<04:47,  3.16s/it][A
 10%|█         | 10/100 [00:30<04:42,  3.14s/it][A
 11%|█         | 11/100 [00:34<04:43,  3.18s/it][A
 12%|█▏        | 12/100 [00:37<04:43,  3.22s/it][A
 13%|█▎        | 13/100 [00:40<04:37,  3.19s/it][A
 14%|█▍        | 14/100 [00:43<04:36,  3.21s/it][A
 15%|█▌        | 15/100 [00:47<04:39,  3.29s/it][A
 16%|█▌        | 16/100 [00:51<04:58,  3.56s/it][A
 17%|█▋        | 17/100 [00:54<04:54,  3.54s/it][A
 18%|█▊        | 18/100 [00:58<04:51,  3.56s/it][A
 19%|█▉        | 19/100 [01:0

Sanity Check -0.1012



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:04<06:39,  4.04s/it][A
  2%|▏         | 2/100 [00:07<06:15,  3.83s/it][A
  3%|▎         | 3/100 [00:11<06:15,  3.88s/it][A
  4%|▍         | 4/100 [00:15<06:08,  3.83s/it][A
  5%|▌         | 5/100 [00:19<06:04,  3.84s/it][A
  6%|▌         | 6/100 [00:23<06:00,  3.83s/it][A
  7%|▋         | 7/100 [00:27<05:59,  3.86s/it][A
  8%|▊         | 8/100 [00:31<06:00,  3.92s/it][A
  9%|▉         | 9/100 [00:35<06:22,  4.20s/it][A
 10%|█         | 10/100 [00:39<06:11,  4.12s/it][A
 11%|█         | 11/100 [00:43<06:08,  4.14s/it][A
 12%|█▏        | 12/100 [00:48<06:08,  4.19s/it][A
 13%|█▎        | 13/100 [00:52<06:02,  4.17s/it][A
 14%|█▍        | 14/100 [00:56<05:55,  4.14s/it][A
 15%|█▌        | 15/100 [01:00<05:49,  4.12s/it][A
 16%|█▌        | 16/100 [01:04<05:51,  4.19s/it][A
 17%|█▋        | 17/100 [01:09<05:49,  4.21s/it][A
 18%|█▊        | 18/100 [01:13<05:44,  4.20s/it][A
 19%|█▉        | 19/100 [01:1

Sanity Check -0.1123



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:05<08:57,  5.42s/it][A
  2%|▏         | 2/100 [00:10<08:45,  5.36s/it][A
  3%|▎         | 3/100 [00:15<08:35,  5.31s/it][A
  4%|▍         | 4/100 [00:21<08:29,  5.31s/it][A
  5%|▌         | 5/100 [00:26<08:19,  5.26s/it][A
  6%|▌         | 6/100 [00:31<08:18,  5.31s/it][A
  7%|▋         | 7/100 [00:37<08:14,  5.32s/it][A
  8%|▊         | 8/100 [00:42<08:05,  5.28s/it][A
  9%|▉         | 9/100 [00:47<08:01,  5.29s/it][A
 10%|█         | 10/100 [00:53<07:56,  5.29s/it][A
 11%|█         | 11/100 [00:58<07:45,  5.23s/it][A
 12%|█▏        | 12/100 [01:03<07:48,  5.32s/it][A
 13%|█▎        | 13/100 [01:09<07:57,  5.48s/it][A
 14%|█▍        | 14/100 [01:15<08:01,  5.59s/it][A
 15%|█▌        | 15/100 [01:20<07:53,  5.57s/it][A
 16%|█▌        | 16/100 [01:26<07:45,  5.54s/it][A
 17%|█▋        | 17/100 [01:33<08:12,  5.93s/it][A
 18%|█▊        | 18/100 [01:39<08:09,  5.97s/it][A
 19%|█▉        | 19/100 [01:4

Sanity Check -0.121





In [23]:
# replace diagonals by -inf to prevent them being picked
np.fill_diagonal(final_accumulated_similarity, -np.inf)

In [24]:
np.save("final_similarity.p", final_accumulated_similarity)

In [26]:
top_ids = pd.DataFrame(compute_in_batches_topIds(final_accumulated_similarity, final_ids, 100, 100), final_ids)

100%|██████████| 100/100 [07:59<00:00,  4.79s/it]


In [27]:
dt.Frame(pd.DataFrame(top_ids, index=final_ids).reset_index()).to_csv('./top_ids_late_fusion.csv')

In [29]:
top_ids

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0009fFIM1eYThaPg,xP8c9HS5rqvmriRg,aLdUurk83dJm8w7W,0XqU6v83ZMOeh4wc,fyQyjwU2vsgFEl1r,9cF6XHF80kGFAZc1,aHSX00Tar4620YO6,BlyUBjyyZ8wEC4Lg,dMcy1JLT4iEPweUI,ZjM2cKNxK9U3i1xY,cWXpvnMaoOLOcPZx,...,5B1lk8SkefODDlF9,07AjGMk31TkPW3O9,mMNCmXVgaCTVDSuc,7EjNBvMzGWHNeUyN,H06n5EiDyQyxwII6,0WniuVWwli1ez0mR,MHjzLyvCm9q7I5JN,wLCTKkzIN7y6BLoZ,wVRYBhesJywYtx5A,W0RexNIZuGKa3Nl9
0010xmHR6UICBOYT,m6YIrcf0iG25NYtS,e9hFdGfKFyxPuQvq,UD0DWvRMwWe8Swiw,VndT0hzLfaNINtjf,6tIlBkAS3hP8ohzs,rt8pmZEOoGWbt03f,9EDGcoex4PWXVhRs,9w9gcgpKBBcZMXHL,FADoCNlS4nOrax2n,EuVg4Ou0zOYx3Zzd,...,LKyKOXUr3NkTOYNe,xdyzNr7Gisl5eeWV,TcClCCLPQHTLN5BX,phrRAxDxoOmOm2VH,KDBrwVMA0iltXJit,0FBYAHnVYyF7w6Vm,vhVXY7BApvrxuCTs,5HsgZD8nSKlQxrVd,UMnH6qMfqkSmhACq,2WwpcPbpW7WUN85S
002Jyd0vN4HyCpqL,YHupaKUf3ndWCU0v,VllNteG3w18TMKYP,XaWFvftldmxTyE23,a2dMh46H35G46DRb,SB1NTRVKoQiFQUR0,dG79PeWA0lQCVaHu,SxGfnDda6gKj1dxH,ZlcuTUFRxPYCuBwJ,TKTKbwbDl845STJF,b93f0GYGzdKlq4I7,...,2QonsnOn5Tc7Rt5h,QMqyB2OWGCjPOs41,LxaXC6gP3Z9RsD2d,axIWadanvvzn3Pvx,ceRyPWY7XBnvF92A,IyOjhsVVei0LXUw6,XL9ttv6KtHJ4vyhO,lwjCqRX8vexafTOd,W5i36XhNBJv4RiIU,EezQt3u48rZ7scpC
006TYKNjNxWjfKjy,0cYTIIElIl2tFyoW,uMRJJ3QnZ1mGkCQX,6GneFoGYxFqPeH4S,zI3j3T5qkiylIBaD,9eZiNeqg0CQXDRIk,QTxf0P1qd3Dsrc5D,HOC0zk7yRxRPkCTJ,7857AeIHifquoNmK,UceDke4Q33GCS6nY,iA9KEeDlCsgMyQRo,...,Ly4mxnhHOl4QPHBx,gcbSCgxVlxiHvwic,XYivSoFfW0AiMqr3,wR4F3A90kVplf4bJ,7TTPpNjvr4oqjCM4,Y8qPSzqRMxSHEjRR,qqtMvrE3derdpeCk,BUuoaQWXJ1QxJl5o,0sxXq0PI2wB9TryG,0RRt9hQD1qGd8pfz
007LIJOPQ4Sb98qV,qsmfGwfCd4FFaDet,ljHcf8k4kcRQ525P,cyRMqYRBDcRleNsa,9epP1yOXfLKlkR3S,S1rroi8TiRqPNWDX,a2bC8PID2eQzeNFn,0Hhuy13DlIS8loEY,0Sc2Xuj448Am1PkD,wOhO1MgW1CcEKaAx,2z29YJphtXmG8xqV,...,jNQVI254qWEotBEw,H60atNftm3VPVItO,HMvRdVsvXUPwROWw,2qwvTDCDqz7BkpDO,rHxMh2TRcYgoOYrR,bSg4cOjecHcp2Zmt,0VWbBD7x9WKDXitt,9h29AurKytmnDTUS,ldQsK4wsh2IaeGe9,Xzpc3XmT0qJsl3Hf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzx8CWdM7qkxKQpC,pF2UjM0abOPwWfBm,23z7bAcQF1DACqpE,CGz0z5iv0jZmIa7X,9833DTWDgiQRwUqO,UEH28vtjfDBIbDRd,wZWxThEcPCOUnikc,r38azxVlSiKyvYp1,6dV3AoxUu9q2Uwof,1WmILK0emIsdTSl2,9HylO3KnucN7ifb4,...,U5SwPqTnpXjk9bC6,Ha9EdV4sQhxFJgo1,7dqVUMxVHgaOVns4,RmdOD9pdZW2rxWA8,2f72N3aUrcGbVsm6,UDb8MYNDkrMToclA,IBvdSGHX9PGVNKOw,lTuFCTvvBnrDPW5z,Ix3E8QzXaW2l60z0,QyCGEzZqTCrx7IdL
zzyb5LvKJTWLVnrk,D5z0dWO50sLyTDWE,3bejCrhNQlm97r9A,0Pn0EYWzup45hJB5,qaeZAwbRSlV5IB4o,IbdeoyURhiHVZuIX,pF5dqrJwRreBWon6,R860DzF4ox5cXCoz,avSFhKA5GZMDAuLM,ffw6DfI0KN2VIIck,QO41p5UUGWGmiCDr,...,fW8ddcjGyQXRLBp0,qAURhbnXZ0tXwDPx,8Wn5e3q1UnNucOtP,JRrK2n6spoDttKOj,pkzbnslE6Gk7KYlF,zVBVi5hr9JqO5QYg,UCHsDKSTmeSEouRV,RTsAYYQvkPu0YjUU,2549kZXl7oTGSd8j,V4IMQpm3iIKay5RE
zzz0n04uuTUA7fNh,uRNge3sN74NdIaYi,lNdmadPCTL6vlaaQ,HwFgTWD3DyWJnDOm,9EdjT3tfTfcfq2NM,0YxiZtygt6zXSnTr,e42BDZS0sZlTx5mu,Es6aw5ozgNnKbPrF,uHNKT1Meu35bah3W,U42mMUpgGJnphDDh,Pk2dvqU8cnutBoI5,...,ZOyppcq7tVe4WoiS,DPGhN6RAvGYvEVYY,aRG8XY1YZQMi9UCL,7wubDavxrYllvCMx,0SwtGR8MlkcIxCMm,0m1MyuwLx1EjuQqD,iqmbCdX5wUMYhLwI,05ogun9U0TYSNQDO,6eMUgKOa9q4mlow1,4S8s0vwC5BSH2Npt
zzznMjZAKnJJXQSj,m3bU7wEiG8i3QgLU,dfMDtC99XxwJBTho,TTZ6nOkz6a3yO45h,2mzlldXgCYcvYcg7,igPQmB6xcoLu5kvm,GmU3UHKt1NXz19w4,7PCTOXUwR2lO6jQy,okMdFbMjVmEb4JZK,arJV2x5YxfzhWhHT,IOnrB7htO4atg5dt,...,SQ1Lgea7H5jpASWg,OuH9eQhzvWEg0T78,ydLADICszvB61OtR,Pqx3xJee7SI6aSkP,0Lgc4YsFJTKIOTZQ,Tu4wqjgfjYm3DCmM,TIDAYplHydmOIiMl,jwNo0P17Ji0EVyIC,CY2PCIr7j680lCzs,fL7PtttkjSX8s6tP


In [30]:
results = {
    "MAP@10": meanAveragePrecision(top_ids, 10, genres),
    "MAP@100": meanAveragePrecision(top_ids, 100, genres),
    "MRR@10": meanReciprocalRank(top_ids, 10, genres),
    "MRR@100": meanReciprocalRank(top_ids, 100, genres),
    "NDCG@10": ndcgMean(top_ids, 10, genres)[1],
    "NDCG@100": ndcgMean(top_ids, 100, genres)[1],
}

100%|██████████| 68641/68641 [00:38<00:00, 1790.31it/s]
100%|██████████| 68641/68641 [01:23<00:00, 820.39it/s]
100%|██████████| 68641/68641 [00:35<00:00, 1920.69it/s]
100%|██████████| 68641/68641 [01:20<00:00, 847.51it/s]
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(dcg) / np.sum(idcg))
  ndcg.append(np.sum(d

In [31]:
results

{'MAP@10': 0.7448601544809241,
 'MAP@100': 0.6557212476718379,
 'MRR@10': 0.8058978991916305,
 'MRR@100': 0.8079706530244403,
 'NDCG@10': nan,
 'NDCG@100': nan}