# Grape Launcher

In [None]:
!pip install -r requirements.txt

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from utils.jupyter_utils import log_cell
from utils.jupyter_utils import clear_logs

In [None]:
clear_logs()

In [2]:
from grape_recommender import GrapeRecommender
from evaluate import ArenaEvaluator
from lib.new_grape import MelonGraph

In [3]:
song_meta_fname = './res/song_meta.json'
genre_fname = './res/genre_gn_all.json'
train_fname = './arena_data/orig/train.json'
question_fname = './arena_data/questions/val.json'
answer_fname = './arena_data/answers/val.json'
result_fname = './arena_data/results/results.json'
jobs = 12

In [4]:
recommender = GrapeRecommender()
evaluator = ArenaEvaluator()

In [5]:
%%log_cell
%%time
recommender.read_and_fit(
    song_meta_fname,
    genre_fname,
    train_fname,
)

Loading song meta...
Loading genre...
Loading train file...


Parsing data:   1%|          | 5296/822738 [00:00<00:15, 51812.21it/s]

Fitting train data...


Parsing data: 100%|██████████| 822738/822738 [00:39<00:00, 20905.61it/s]
Creating graph: 100%|██████████| 47119216/47119216 [02:06<00:00, 372391.02it/s] 
Caching graph: 100%|██████████| 34405782/34405782 [00:47<00:00, 721088.36it/s] 


In [12]:
recommender.set_params(**{
            'max_songs': 100,
            'max_tags': 10,
            'max_depth': 8,
            'relation_weight': {
                MelonGraph.Relation.ALBUM_TO_SONG: 0.01,
                MelonGraph.Relation.ALBUM_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_TO_SONG: 0.01,
                MelonGraph.Relation.ARTIST_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.MONTH_TO_SONG: 0,
                MelonGraph.Relation.PLAYLIST_TO_SONG: 0.01,
                MelonGraph.Relation.PLAYLIST_TO_TAG: 0.025,
                MelonGraph.Relation.PLAYLIST_TO_WORD: 0.025,
                MelonGraph.Relation.SONG_TO_ALBUM: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST_GENRE: 0,
                MelonGraph.Relation.SONG_TO_DETAILED_GENRE: 0,
                MelonGraph.Relation.SONG_TO_GENRE: 0,
                MelonGraph.Relation.SONG_TO_MONTH: 0,
                MelonGraph.Relation.SONG_TO_PLAYLIST: 0.0075,
                MelonGraph.Relation.SONG_TO_YEAR: 0,
                MelonGraph.Relation.TAG_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.TAG_TO_WORD: 0.01,
                MelonGraph.Relation.WORD_TO_ALBUM: 0,
                MelonGraph.Relation.WORD_TO_ARTIST: 0,
                MelonGraph.Relation.WORD_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.WORD_TO_TAG: 0.001,
                MelonGraph.Relation.YEAR_TO_SONG: 0,
            },
            'song_relation_weight': None,
            'tag_relation_weight': None,
        })

In [25]:
recommender.set_params(**{
            'max_songs': 100,
            'max_tags': 10,
            'max_depth': 8,
            'relation_weight': {
                MelonGraph.Relation.ALBUM_TO_SONG: 0.01,
                MelonGraph.Relation.ALBUM_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_TO_SONG: 0.01,
                MelonGraph.Relation.ARTIST_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.MONTH_TO_SONG: 0,
                MelonGraph.Relation.PLAYLIST_TO_SONG: 0.01,
                MelonGraph.Relation.PLAYLIST_TO_TAG: 0.025,
                MelonGraph.Relation.PLAYLIST_TO_WORD: 0.025,
                MelonGraph.Relation.SONG_TO_ALBUM: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: 0.0005,
                MelonGraph.Relation.SONG_TO_ARTIST_GENRE: 0,
                MelonGraph.Relation.SONG_TO_DETAILED_GENRE: 0,
                MelonGraph.Relation.SONG_TO_GENRE: 0,
                MelonGraph.Relation.SONG_TO_MONTH: 0,
                MelonGraph.Relation.SONG_TO_PLAYLIST: 0.0075,
                MelonGraph.Relation.SONG_TO_YEAR: 0,
                MelonGraph.Relation.TAG_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.TAG_TO_WORD: 0.01,
                MelonGraph.Relation.WORD_TO_ALBUM: 0,
                MelonGraph.Relation.WORD_TO_ARTIST: 0,
                MelonGraph.Relation.WORD_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.WORD_TO_TAG: 0.001,
                MelonGraph.Relation.YEAR_TO_SONG: 0,
            },
            'song_relation_weight': None,
            'tag_relation_weight': None,
        })

In [27]:
%%log_cell
%%time
recommender.read_and_predict(
    question_fname,
    result_fname,
    jobs,
)
evaluator.evaluate(answer_fname, result_fname, question_fname)

Loading question file...
Predicting...


100%|██████████| 48/48 [00:37<00:00,  1.27it/s]
100%|██████████| 48/48 [00:38<00:00,  1.25it/s]
100%|██████████| 48/48 [00:38<00:00,  1.25it/s]
100%|██████████| 48/48 [00:39<00:00,  1.23it/s]
100%|██████████| 48/48 [00:39<00:00,  1.22it/s]
100%|██████████| 48/48 [00:39<00:00,  1.20it/s]
100%|██████████| 48/48 [00:40<00:00,  1.19it/s]
100%|██████████| 48/48 [00:40<00:00,  1.19it/s]
100%|██████████| 48/48 [00:40<00:00,  1.19it/s]
100%|██████████| 48/48 [00:40<00:00,  1.18it/s]
100%|██████████| 48/48 [00:40<00:00,  1.17it/s]
100%|██████████| 48/48 [00:44<00:00,  1.07it/s]


Writing results...
=== Total score ===
Music nDCG: 0.283733
Tag nDCG: 0.488398
Score: 0.314433
=== SONG_TAG score ===
Music nDCG: 0.329807
Tag nDCG: 0.472016
Score: 0.351138
=== TAG_TITLE score ===
Music nDCG: 0.10212
Tag nDCG: 0.538602
Score: 0.167593
=== SONG_ONLY score ===
Music nDCG: 0.331227
Tag nDCG: 0.516787
Score: 0.359061
=== TITLE_ONLY score ===
Music nDCG: 0.091708
Tag nDCG: 0.33445
Score: 0.128119
CPU times: user 438 ms, sys: 997 ms, total: 1.43 s
Wall time: 46.3 s


## Debug

In [34]:
from utils import read_json
from utils import write_json
from evaluate import print_scores

In [29]:
questions = read_json(question_fname)
questions = questions[:10]
answers = read_json(answer_fname)

In [36]:
%%log_cell
%%time
results = recommender.predict(questions, jobs)

(total_music_ndcgs, total_tag_ndcgs, total_scores,
                case_music_ndcgs, case_tag_ndcgs, case_scores) = evaluator._eval(results, questions, answers)
print_scores(
    total_music_ndcgs, total_tag_ndcgs, total_scores,
    case_music_ndcgs, case_tag_ndcgs, case_scores,
)

0it [00:00, ?it/s]/1 [00:00<?, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:00<00:00,  4.17it/s]
100%|██████████| 1/1 [00:00<00:00,  3.68it/s]
100%|██████████| 1/1 [00:00<00:00,  2.65it/s]
100%|██████████| 1/1 [00:00<00:00,  2.40it/s]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s]
100%|██████████| 1/1 [00:00<00:00,  2.21it/s]
100%|██████████| 1/1 [00:00<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  2.01it/s]
100%|██████████| 1/1 [00:00<00:00,  1.57it/s]
100%|██████████| 1/1 [00:00<00:00,  1.60it/s]


=== Total score ===
Music nDCG: 0.2808
Tag nDCG: 0.38219
Score: 0.296008
=== SONG_TAG score ===
Music nDCG: 0.423083
Tag nDCG: 0.444444
Score: 0.426287
=== TAG_TITLE score ===
Music nDCG: 0.187567
Tag nDCG: 0.213986
Score: 0.19153
=== SONG_ONLY score ===
Music nDCG: 0.270236
Tag nDCG: 0.254917
Score: 0.267938
=== TITLE_ONLY score ===
Music nDCG: 0.0
Tag nDCG: 1.0
Score: 0.15
