# Grape Launcher

In [None]:
!pip install -r requirements.txt

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils.jupyter_utils import log_cell
from utils.jupyter_utils import clear_logs

In [3]:
clear_logs()

In [4]:
from grape_recommender import GrapeRecommender
from evaluate import ArenaEvaluator
from lib.grape import MelonGraph

In [5]:
song_meta_fname = './res/song_meta.json'
genre_fname = './res/genre_gn_all.json'
train_fname = './arena_data/orig/train.json'
question_fname = './arena_data/questions/val.json'
answer_fname = './arena_data/answers/val.json'
result_fname = './arena_data/results/results.json'
jobs = 12

In [6]:
recommender = GrapeRecommender()
evaluator = ArenaEvaluator()

In [7]:
%%log_cell
%%time
recommender.read_and_fit(
    song_meta_fname,
    genre_fname,
    train_fname,
)

Loading song meta...
Loading genre...
Loading train file...


Parsing data:   1%|          | 5349/822738 [00:00<00:15, 53488.14it/s]

Fitting train data...


Parsing data: 100%|##########| 822738/822738 [00:39<00:00, 21027.37it/s]
Creating graph: 100%|##########| 47119216/47119216 [02:06<00:00, 371919.06it/s] 
Caching graph: 100%|##########| 34405782/34405782 [00:47<00:00, 724508.60it/s] 


CPU times: user 3min 41s, sys: 6.23 s, total: 3min 47s
Wall time: 3min 46s


In [8]:
recommender.set_params(**{
            'max_songs': 100,
            'max_tags': 10,
            'max_depth': 8,
            'relation_weight': {
                MelonGraph.Relation.ALBUM_TO_SONG: 0.01,
                MelonGraph.Relation.ALBUM_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_TO_SONG: 0.01,
                MelonGraph.Relation.ARTIST_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.MONTH_TO_SONG: 0,
                MelonGraph.Relation.PLAYLIST_TO_SONG: 0.01,
                MelonGraph.Relation.PLAYLIST_TO_TAG: 0.025,
                MelonGraph.Relation.PLAYLIST_TO_WORD: 0.025,
                MelonGraph.Relation.SONG_TO_ALBUM: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: 0.0005,
                MelonGraph.Relation.SONG_TO_ARTIST_GENRE: 0,
                MelonGraph.Relation.SONG_TO_DETAILED_GENRE: 0,
                MelonGraph.Relation.SONG_TO_GENRE: 0,
                MelonGraph.Relation.SONG_TO_MONTH: 0,
                MelonGraph.Relation.SONG_TO_PLAYLIST: 0.0075,
                MelonGraph.Relation.SONG_TO_YEAR: 0,
                MelonGraph.Relation.TAG_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.TAG_TO_WORD: 0.01,
                MelonGraph.Relation.WORD_TO_ALBUM: 0,
                MelonGraph.Relation.WORD_TO_ARTIST: 0,
                MelonGraph.Relation.WORD_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.WORD_TO_TAG: 0.001,
                MelonGraph.Relation.YEAR_TO_SONG: 0,
            },
            'song_relation_weight': None,
            'tag_relation_weight': None,
        })

In [9]:
%%log_cell
%%time
recommender.read_and_predict(
    question_fname,
    result_fname,
    jobs,
)
evaluator.evaluate(answer_fname, result_fname, question_fname)

Loading question file...
Predicting...


100%|##########| 48/48 [00:27<00:00,  1.71it/s]
100%|##########| 48/48 [00:28<00:00,  1.69it/s]
100%|##########| 48/48 [00:28<00:00,  1.68it/s]
100%|##########| 48/48 [00:37<00:00,  1.26it/s]
100%|##########| 48/48 [00:37<00:00,  1.26it/s]
100%|##########| 48/48 [00:38<00:00,  1.26it/s]
100%|##########| 48/48 [00:38<00:00,  1.25it/s]
100%|##########| 48/48 [00:38<00:00,  1.24it/s]
100%|##########| 48/48 [00:38<00:00,  1.23it/s]
100%|##########| 48/48 [00:39<00:00,  1.23it/s]
100%|##########| 48/48 [00:39<00:00,  1.20it/s]
100%|##########| 48/48 [00:39<00:00,  1.20it/s]


Writing results...
=== Total score ===
Music nDCG: 0.283733
Tag nDCG: 0.488398
Score: 0.314433
=== SONG_TAG score ===
Music nDCG: 0.329807
Tag nDCG: 0.472016
Score: 0.351138
=== TAG_TITLE score ===
Music nDCG: 0.10212
Tag nDCG: 0.538602
Score: 0.167593
=== SONG_ONLY score ===
Music nDCG: 0.331227
Tag nDCG: 0.516787
Score: 0.359061
=== TITLE_ONLY score ===
Music nDCG: 0.091708
Tag nDCG: 0.33445
Score: 0.128119
CPU times: user 652 ms, sys: 954 ms, total: 1.61 s
Wall time: 41.5 s


## Debug

In [None]:
from utils import read_json
from utils import write_json
from evaluate import print_scores

In [None]:
questions = read_json(question_fname)
questions = questions[:10]
answers = read_json(answer_fname)

In [None]:
%%log_cell
%%time
results = recommender.predict(questions, jobs)

(total_music_ndcgs, total_tag_ndcgs, total_scores,
                case_music_ndcgs, case_tag_ndcgs, case_scores) = evaluator._eval(results, questions, answers)
print_scores(
    total_music_ndcgs, total_tag_ndcgs, total_scores,
    case_music_ndcgs, case_tag_ndcgs, case_scores,
)