# Retrieval (Tag-to-Music Similarity)

In [17]:
import os
import torch
import pandas as pd
import pandas as pd
from IPython.display import display, HTML
from datasets import load_dataset

In [2]:
mwe_dataset = load_dataset("seungheondoh/musical-word-embedding")
msd_dataset = load_dataset("seungheondoh/multimodal_msd",split="train")

In [15]:
id2url = {i["msd_track_id"]:i["youtube_url"] for i in msd_dataset}

In [16]:
track_ids = [i["token"] for i in mwe_dataset['track']]

In [4]:
track_list = [i["content"] for i in mwe_dataset['track']]
track_vector = torch.tensor([i["vector"] for i in mwe_dataset['track']])
track2vector = {token:embs for token, embs in zip(track_list, track_vector)}

In [5]:
track_vector = torch.nn.functional.normalize(track_vector)

In [32]:
def music_retrieval(user_query, topk=10):
    token_list = [i.strip() for i in user_query.split()]
    query_vector = torch.stack([tag2vector[i] for i in token_list])
    query_vector = torch.nn.functional.normalize(query_vector)
    if query_vector.size(0) > 1:
        query_vector = query_vector.mean(0, True)
    score_matrix = query_vector @ track_vector.T
    sorted_indices = torch.flip(torch.argsort(score_matrix, dim=1), dims=[1])
    
    score_matrix = score_matrix.squeeze(0)
    sorted_indices = sorted_indices.squeeze(0)
    
    top5_indices = sorted_indices[:topk]
    results = []
    for i in top5_indices:
        tid = track_ids[i]
        if tid in id2url:
            vid = id2url[tid]
            vid = vid[-11:]
            results.append({
                "youtube": f'<iframe width="360" height="115" src="https://www.youtube.com/embed/{vid}" frameborder="0" allowfullscreen></iframe>',
                "entity": track_list[i],
                "score": float(score_matrix[i]),
            })
    df = pd.DataFrame(results)
    display(HTML(df.to_html(escape=False)))

In [83]:
music_retrieval("piano classical", topk=5)

Unnamed: 0,youtube,entity,score
0,,"Piano Trio No. 7 In B Flat Major_ Op. 97_ ""Archduke"": I. Allegro moderato by Pablo Casals from Beethoven: Piano Trios_ Op. 70/2 & 97 / Schumann: Piano Trios_ Op. 80 & 110",0.726394
1,,Fantaisie-Impromptu_ Op. 66 by Frederic Chopin from Classical Favorites,0.723254
2,,Mazurka in A Minor_ Op. 67-V by Byron Janis from Absolutely Classical_ Volume 162,0.716819
3,,Mazurka Op. 68 No. 3 in C Sharp Minor by Sergey Rachmaninov from The Chopin Recordings,0.715834
4,,Waltz No. 14 Op. Posth. in E Minor by Sergey Rachmaninov from The Chopin Recordings,0.714101
