In [8]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import ast

In [9]:
def get_max_scorers(sample, genre_list):
    print(sample, genre_list)
    max_score = max(sample.values())

    # Extract keys with the highest score
    highest_scoring_genres = [genre for genre, score in sample.items() if score == max_score and genre in genre_list]
    return highest_scoring_genres


def create_interaction_matrix(df, genres, inter, num_rows):
    # Cache genre lists for each ID to avoid repeating lookups
    id_to_genres = genres.set_index('id')['genre'].to_dict()

    # Loop through each pair (i, j) to populate the interaction matrix
    for i in tqdm(range(num_rows)):
        # Read the first row (i)
        id_i = df.iloc[i]['id']
        sample_i = ast.literal_eval(df.iloc[i]['(tag, weight)'])
        genre_list_i = ast.literal_eval(id_to_genres.get(id_i, '[]'))  # Safely fetch genres for id_i
        top_i = np.array(get_max_scorers(sample_i, genre_list_i))

        for j in range(i + 1, num_rows):  # Only need to process pairs (i, j) with i < j
            # Read the second row (j)
            id_j = df.iloc[j]['id']
            sample_j = ast.literal_eval(df.iloc[j]['(tag, weight)'])
            genre_list_j = ast.literal_eval(id_to_genres.get(id_j, '[]'))  # Safely fetch genres for id_j
            top_j = np.array(get_max_scorers(sample_j, genre_list_j))

            # Calculate intersection of top genres
            intersection = np.intersect1d(top_i, top_j)
            is_interaction = 1 if len(intersection) > 0 else 0

            print(is_interaction)

            # Update the interaction matrix symmetrically
            inter[i][j] = inter[j][i] = is_interaction

In [10]:

df = pd.read_csv("./dataset/id_tags_dict.tsv", sep="\t")
genres = pd.read_csv("./dataset/id_genres_mmsr.tsv", sep="\t")
inter = np.zeros((len(df), len(df)))
num_rows = len(df)
create_interaction_matrix(df, genres, inter, num_rows)

inter

  0%|          | 0/5148 [00:00<?, ?it/s]

{'rock': 100, 'alternative': 100, 'pop punk': 100} ['rock', 'pop punk']
{'italian': 100, 'laura pausini': 91, 'pop': 60, 'female vocalists': 50, 'italian pop': 46, 'romantic': 23, 'female vocalist': 19, 'baladas': 19, 'laura': 19, 'sexy': 14, 'beautiful': 14, 'sea': 14, 'latin': 10, 'favorite artists': 10, 'favourite': 10, 'italy': 10, 'loneliness after dusk': 10, 'europop': 10, '2000': 10, 'angelic voices': 10, 'breathtaking': 10, 'tra te e il mare': 10, 'pausini': 10, 'italianissima': 10, 'spanish': 5, 'female': 5, 'ambient': 5, 'pop rock': 5, 'love': 5, 'easy listening': 5, 'world': 5, 'vocals': 5, 'peaceful': 5, 'water': 5, 'top': 5, 'ethereal': 5, 'nice': 5, 'world music': 5, 'suave': 5, '00s': 5, 'ballad': 5, 'heartbreaking': 5, 'latin pop': 5, 'relax': 5, 'female voices': 5, 'female singers': 5, 'perfection': 5, 'sentimento': 5, 'inspiring': 5, 'vocalist': 5, 'pop folk': 5, 'heavenly voices': 5, 'ladies': 5, 'italia': 5, 'italo': 5, 'italiano': 5, 'note': 5, 'italiana': 5, 'favo

  0%|          | 0/5148 [00:01<?, ?it/s]

0
{'punk rock': 100, '2010s': 67, '2016': 67, 'punk': 34, 'male vocalist': 34, 'english lyrics': 34, 'descendents': 34, 'k1r7m': 34, 'hypercaffium spazzinate': 34} ['punk']
0
{'indie rock': 100, 'indie': 60, 'guitar': 40, 'upbeat': 40, 'rockin': 40, 'seen live': 20, 'rock': 20, 'power pop': 20, 'american': 20, 'sex': 20, '00s': 20, 'relax': 20, 'remember': 20, '2000s': 20, 'new jersey': 20, 'drug': 20, 'somafm': 20, 'coffee break': 20, 'pflicht': 20, 'bagel': 20, 'the wrens': 20, 'dfa 79': 20, 'a very good find': 20} ['indie rock', 'rock', 'power pop']
0
{'avant garde': 100, 'rock': 67, 'alternative rock': 67, 'loud': 67, 'art punk': 67, 'indie rock': 34, '2008': 34, 'american': 34, 'male vocalist': 34, 'post hardcore': 34, 'nerdcore': 34, 'us': 34, 'progressive punk': 34, 'avant rock': 34, 'better than you': 34, 'arena punk': 34, 'foxy shazam': 34, 'too fucking good': 34, 'checkback': 34, 'post nerdcore': 34, 'jaguar lovetastic': 34, 'white stripestastic': 34} ['avant garde', 'rock', 




KeyboardInterrupt: 