In [13]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import ast
from experimental_metrics import *

In [14]:
popularity_df = pd.read_csv("dataset/id_metadata_mmsr.tsv", sep="\t")
popularity = popularity_df['popularity'].values
pop = pd.read_csv("dataset/id_total_listens.tsv", sep="\t")['total_listens'].values
infos = pd.read_csv("dataset/id_information_mmsr.tsv", sep="\t")
inter_true = np.loadtxt("./predictions/binary_relevancy_matrix_0000.csv", delimiter="\t")
tags = pd.read_csv("./dataset/id_tags_dict.tsv", sep="\t")
genres = pd.read_csv("./dataset/id_genres_mmsr.tsv", sep="\t")

In [11]:
models = [  
    # Baseline
    #'random',

    # TEXT
    #"lyrics_tf-idf", 
    #"lyrics_bert",
    #"lyrics_word2vec",

    # AUDIO
    #"ivec256", 
    #"mfcc_bow",
    #"mfcc_stats",
    #"musicnn",

    # VIDEO
    #"resnet",
    #"vgg19", 
    #"incp",

    # Hybrid
    'early_fusion',
    'graph_early_fusion',
    #'late_fusion',
    #'quick_late_fusion',

    # Creative
    'gm_nn',
    #'gm_comm',
    #'mgm_nn',
    #'mgm_comm',

    # Diversity Aware
    #'shifted',
    #'p_shifted',
    #'exp',
    #'power',
    #'mmr'
]
evaluation = []
topK=100

for model in tqdm(models):
    inter_pred = np.loadtxt(f"predictions/rets_{model}_{topK}_matrix.csv", delimiter=",")
    pop_at_k = avg_popularity_at_k(inter_pred, popularity, topK)
    pop_at_k_2 = avg_popularity_at_k(inter_pred, pop, topK)
    cov_at_k = avg_coverage_at_k(inter_pred, inter_true.shape[0], topK)
    div_at_k = diversity_at_k(inter_pred,tags, genres, k=topK)

    evaluation.append({
        "model": model,
        f"pop-sp@{topK}":    pop_at_k,
        f"pop-lfm@{topK}":   pop_at_k_2,
        f"cov@{topK}":       cov_at_k,
        f"div@{topK}":       div_at_k
    })

  0%|          | 0/6 [00:08<?, ?it/s]


AttributeError: 'list' object has no attribute 'shape'

In [6]:
evaluation_df = pd.DataFrame(evaluation)

In [7]:
evaluation_df.to_csv("./results/base_beyond_at_10_th_0.csv", index=False)

In [8]:
evaluation_df

Unnamed: 0,model,pop-sp@10,pop-lfm@10,cov@10,div@10
0,random,1.00592,1.012576,0.871989,0.948478
1,lyrics_tf-idf,1.030722,0.990218,0.976496,0.928037
2,lyrics_bert,1.064769,1.206656,0.833139,0.904043
3,ivec256,1.015168,1.003331,0.998834,0.917285
4,mfcc_bow,1.048302,0.988205,0.908897,0.896752
5,mfcc_stats,1.033329,0.987518,0.865579,0.902588
6,musicnn,1.0136,0.997526,0.991453,0.889304
7,vgg19,1.031843,1.087203,0.876457,0.918906
8,resnet,1.027815,1.128008,0.891414,0.912393
9,early_fusion,1.039066,1.13286,0.911616,0.899672


In [17]:
models = [
    # Baseline
    #'random',
    
    # TEXT
    #"lyrics_tf-idf", 
    "lyrics_bert",
    #"lyrics_word2vec",

    # AUDIO
    #"ivec256", 
    #"mfcc_bow",
    #"mfcc_stats",
    "musicnn",

    # VIDEO
    #"vgg19", 
    #"resnet",
    #"incp",

    # Hybrid
    'early_fusion',
    #'graph_early_fusion',
    #'simple_late_norm',
    'late_fusion',

    # Creative
    #'gm_nn',
    #'gm_comm',
    #'mgm_nn',
    #'mgm_comm',
    'auto_enc',
    'MKGCN'

    # Diversity Aware
    #'shifted',
    #'p_shifted',
    #'exp',
    #'power',
    #'mmr'
]

comma_models = [
    "lyrics_tf-idf", 
    "lyrics_bert",
    "ivec256", 
    "mfcc_bow",
    "mfcc_stats",
    "musicnn",
    "vgg19", 
    "resnet",

    'early_fusion',
    'graph_early_fusion',
    'simple_late_norm',
    'late_fusion',

    'auto_enc'
]

evaluation = []
topK = 10

x = np.linspace(5, 100, 20, dtype=int)
for model in tqdm(models):
    if model in comma_models:
        inter_pred = np.loadtxt(f"predictions/rets_{model}_100_matrix.csv", delimiter=",")
    else:
        inter_pred = np.loadtxt(f"predictions/rets_{model}_100_matrix.csv", delimiter="\t")
    for k in x:
        pop_at_k = avg_popularity_at_k(inter_pred, popularity, k)
        pop_at_k_2 = avg_popularity_at_k(inter_pred, pop, k)
        cov_at_k = avg_coverage_at_k(inter_pred, inter_true.shape[0], k)
        div_at_k = diversity_at_k(inter_pred,tags, genres, k=k)

        evaluation.append({
            "model": model,
            "threshold": k,
            f"pop-sp":    pop_at_k,
            f"pop-lfm":   pop_at_k_2,
            f"cov":       cov_at_k,
            f"div":       div_at_k
        })

100%|██████████| 6/6 [07:44<00:00, 77.39s/it]


In [18]:
evaluation_df = pd.DataFrame(evaluation)
evaluation_df.to_csv("./results/base_beyond_all_k_th_0.csv", index=False)
evaluation_df

Unnamed: 0,model,threshold,pop-sp,pop-lfm,cov,div
0,lyrics_bert,5,1.065208,1.215238,0.722416,0.928141
1,lyrics_bert,10,1.064769,1.206656,0.833139,0.904043
2,lyrics_bert,15,1.063592,1.201342,0.878982,0.887472
3,lyrics_bert,20,1.061741,1.197193,0.906371,0.875337
4,lyrics_bert,25,1.059476,1.188137,0.922688,0.865457
...,...,...,...,...,...,...
115,MKGCN,80,1.001794,1.028022,1.000000,0.806113
116,MKGCN,85,1.001575,1.028100,1.000000,0.803143
117,MKGCN,90,1.001705,1.029439,1.000000,0.800246
118,MKGCN,95,1.001830,1.030334,1.000000,0.797481
