# Grape Launcher

In [None]:
!pip install -r requirements.txt

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils.jupyter_utils import log_cell
from utils.jupyter_utils import clear_logs

In [3]:
clear_logs()

In [4]:
from grape_recommender import GrapeRecommender
from evaluate import ArenaEvaluator
from lib.grape import MelonGraph

In [5]:
song_meta_fname = './res/song_meta.json'
genre_fname = './res/genre_gn_all.json'
train_fname = './arena_data/orig/train.json'
question_fname = './arena_data/questions/val.json'
answer_fname = './arena_data/answers/val.json'
result_fname = './arena_data/results/results.json'
jobs = 8

In [6]:
recommender = GrapeRecommender()
evaluator = ArenaEvaluator()

In [7]:
%%log_cell
%%time
recommender.read_and_fit(
    song_meta_fname,
    genre_fname,
    train_fname,
)

Loading song meta...
Loading genre...
Loading train file...


Parsing data:   1%|          | 5366/822738 [00:00<00:15, 53657.11it/s]

Fitting train data...


Parsing data: 100%|##########| 822738/822738 [00:38<00:00, 21593.61it/s]
Creating graph: 100%|##########| 47119216/47119216 [01:59<00:00, 393224.98it/s] 
Caching graph: 100%|##########| 34405782/34405782 [00:45<00:00, 758462.59it/s] 


CPU times: user 3min 31s, sys: 5.26 s, total: 3min 37s
Wall time: 3min 35s


In [64]:
recommender.set_params(**{
            'max_songs': 100,
            'max_tags': 10,
            'max_depth': 6,
            'relation_weight': {
                MelonGraph.Relation.ALBUM_TO_SONG: 0.01,
                MelonGraph.Relation.ALBUM_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_TO_SONG: 0.01,
                MelonGraph.Relation.ARTIST_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.MONTH_TO_SONG: 0,
                MelonGraph.Relation.PLAYLIST_TO_SONG: 0.01,
                MelonGraph.Relation.PLAYLIST_TO_TAG: 0.025,
                MelonGraph.Relation.PLAYLIST_TO_WORD: 0.025,
                MelonGraph.Relation.SONG_TO_ALBUM: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: 0.0005,
                MelonGraph.Relation.SONG_TO_ARTIST_GENRE: 0,
                MelonGraph.Relation.SONG_TO_DETAILED_GENRE: 0,
                MelonGraph.Relation.SONG_TO_GENRE: 0,
                MelonGraph.Relation.SONG_TO_MONTH: 0,
                MelonGraph.Relation.SONG_TO_PLAYLIST: 0.0075,
                MelonGraph.Relation.SONG_TO_YEAR: 0,
                MelonGraph.Relation.TAG_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.TAG_TO_WORD: 0.01,
                MelonGraph.Relation.WORD_TO_ALBUM: 0,
                MelonGraph.Relation.WORD_TO_ARTIST: 0,
                MelonGraph.Relation.WORD_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.WORD_TO_TAG: 0.001,
                MelonGraph.Relation.YEAR_TO_SONG: 0,
            },
            'song_relation_weight': None,
            'tag_relation_weight': None,
        })

In [10]:
recommender.set_params(**{
            'max_songs': 100,
            'max_tags': 10,
            'max_depth': 8,
            'relation_weight': None,
            'song_relation_weight': {
                MelonGraph.Relation.ALBUM_TO_SONG: 0.07414642795032474,
                MelonGraph.Relation.ALBUM_TO_WORD: 0.0061584168335407094,
                MelonGraph.Relation.ARTIST_TO_SONG: 0.005007988938451341,
                MelonGraph.Relation.ARTIST_TO_WORD: 0.13369050243492486,
                MelonGraph.Relation.ARTIST_GENRE_TO_SONG: 0.039747880057376936,
                MelonGraph.Relation.GENRE_TO_SONG: 0.04041364503484934,
                MelonGraph.Relation.MONTH_TO_SONG: 0.10327547008450252,
                MelonGraph.Relation.PLAYLIST_TO_SONG: 0.023834754515476037,
                MelonGraph.Relation.PLAYLIST_TO_TAG: 0.003978224154889666,
                MelonGraph.Relation.PLAYLIST_TO_WORD: 0.15054393789224635,
                MelonGraph.Relation.SONG_TO_ALBUM: 0.032202483574836266,
                MelonGraph.Relation.SONG_TO_ARTIST: 0.05170572396717645,
                MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: 0.002382416247506785,
                MelonGraph.Relation.SONG_TO_ARTIST_GENRE: 0.06990916834467857,
                MelonGraph.Relation.SONG_TO_DETAILED_GENRE: 0.0,
                MelonGraph.Relation.SONG_TO_GENRE: 0.0,
                MelonGraph.Relation.SONG_TO_MONTH: 0.0,
                MelonGraph.Relation.SONG_TO_PLAYLIST: 0.0391536425494617,
                MelonGraph.Relation.SONG_TO_YEAR: 0.16006897848218085,
                MelonGraph.Relation.TAG_TO_PLAYLIST: 0.02846355124840641,
                MelonGraph.Relation.TAG_TO_WORD: 0.06997925941645167,
                MelonGraph.Relation.WORD_TO_ALBUM: 0.034048230407077586,
                MelonGraph.Relation.WORD_TO_ARTIST: 0.10831338685096968,
                MelonGraph.Relation.WORD_TO_PLAYLIST: 0.045391029624364594,
                MelonGraph.Relation.WORD_TO_TAG: 0.010800350699808562,
                MelonGraph.Relation.YEAR_TO_SONG: 0.0030303652258104746,
            },
            'tag_relation_weight': {
                MelonGraph.Relation.ALBUM_TO_SONG: 0.00020466860713289187,
                MelonGraph.Relation.ALBUM_TO_WORD: 0.08434854530504439,
                MelonGraph.Relation.ARTIST_TO_SONG: 0.003972435623688278,
                MelonGraph.Relation.ARTIST_TO_WORD: 0.04684892666384003,
                MelonGraph.Relation.ARTIST_GENRE_TO_SONG: 0.0015760062945061823,
                MelonGraph.Relation.GENRE_TO_SONG: 0.0014284375224107395,
                MelonGraph.Relation.MONTH_TO_SONG: 0.000867680601042818,
                MelonGraph.Relation.PLAYLIST_TO_SONG: 0.004263433370580513,
                MelonGraph.Relation.PLAYLIST_TO_TAG: 0.016805868120610315,
                MelonGraph.Relation.PLAYLIST_TO_WORD: 0.0337555699134987,
                MelonGraph.Relation.SONG_TO_ALBUM: 0.028811340591184224,
                MelonGraph.Relation.SONG_TO_ARTIST: 0.011916070154272334,
                MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: 0.01463042200734143,
                MelonGraph.Relation.SONG_TO_ARTIST_GENRE: 0.10788626854824333,
                MelonGraph.Relation.SONG_TO_DETAILED_GENRE: 0.0,
                MelonGraph.Relation.SONG_TO_GENRE: 0.0,
                MelonGraph.Relation.SONG_TO_MONTH: 0.0,
                MelonGraph.Relation.SONG_TO_PLAYLIST: 0.06694190614146299,
                MelonGraph.Relation.SONG_TO_YEAR: 0.018167639927004053,
                MelonGraph.Relation.TAG_TO_PLAYLIST: 0.07910617846425558,
                MelonGraph.Relation.TAG_TO_WORD: 0.08036327272815523,
                MelonGraph.Relation.WORD_TO_ALBUM: 0.003802920436354447,
                MelonGraph.Relation.WORD_TO_ARTIST: 0.006862580053298147,
                MelonGraph.Relation.WORD_TO_PLAYLIST: 0.03620983925425599,
                MelonGraph.Relation.WORD_TO_TAG: 0.001665218252183399,
                MelonGraph.Relation.YEAR_TO_SONG: 0.0042068917633064215,
            },
        })

In [65]:
%%log_cell
%%time
recommender.read_and_predict(
    question_fname,
    result_fname,
    jobs,
)
evaluator.evaluate(answer_fname, result_fname, question_fname)

Loading question file...
Predicting...


100%|##########| 72/72 [00:23<00:00,  3.06it/s]
100%|##########| 72/72 [00:25<00:00,  2.88it/s]
100%|##########| 72/72 [00:25<00:00,  2.85it/s]
100%|##########| 72/72 [00:31<00:00,  2.29it/s]
100%|##########| 72/72 [00:32<00:00,  2.24it/s]
100%|##########| 72/72 [00:32<00:00,  2.23it/s]
100%|##########| 72/72 [00:32<00:00,  2.22it/s]
100%|##########| 72/72 [00:32<00:00,  2.20it/s]


Writing results...
=== Total score ===
Music nDCG: 0.281356
Tag nDCG: 0.487142
Score: 0.312224
=== SONG_TAG score ===
Music nDCG: 0.328844
Tag nDCG: 0.472607
Score: 0.350409
=== TAG_TITLE score ===
Music nDCG: 0.0994872
Tag nDCG: 0.530705
Score: 0.16417
=== SONG_ONLY score ===
Music nDCG: 0.330862
Tag nDCG: 0.518553
Score: 0.359015
=== TITLE_ONLY score ===
Music nDCG: 0.0723551
Tag nDCG: 0.318497
Score: 0.109276
CPU times: user 371 ms, sys: 876 ms, total: 1.25 s
Wall time: 34 s


## Debug

In [8]:
from utils import read_json
from utils import write_json
from evaluate import print_scores

In [66]:
questions = read_json(question_fname)
questions = questions
answers = read_json(answer_fname)

In [12]:
questions = read_json(question_fname)
questions = questions[1::3]
answers = read_json(answer_fname)

In [10]:
from lib.grape import MelonGraph


def convert_params(
        max_depth,
        c,
        ALBUM,
        ARTIST,
        ARTIST_GENRE,
        ARTIST_DETAILED_GENRE,
        DETAILED_GENRE,
        GENRE,
        MONTH,
        PLAYLIST,
        SONG,
        TAG,
        WORD,
        YEAR,
        ALBUM_TO_SONG,
        ALBUM_TO_WORD,
        ARTIST_TO_SONG,
        ARTIST_TO_WORD,
        ARTIST_GENRE_TO_SONG,
        GENRE_TO_SONG,
        MONTH_TO_SONG,
        PLAYLIST_TO_SONG,
        PLAYLIST_TO_TAG,
        PLAYLIST_TO_WORD,
        SONG_TO_ALBUM,
        SONG_TO_ARTIST,
        SONG_TO_ARTIST_DETAILED_GENRE,
        SONG_TO_ARTIST_GENRE,
        SONG_TO_DETAILED_GENRE,
        SONG_TO_GENRE,
        SONG_TO_MONTH,
        SONG_TO_PLAYLIST,
        SONG_TO_YEAR,
        TAG_TO_PLAYLIST,
        TAG_TO_WORD,
        WORD_TO_ALBUM,
        WORD_TO_ARTIST,
        WORD_TO_PLAYLIST,
        WORD_TO_TAG,
        YEAR_TO_SONG,
    ):
    params = {
        'max_songs': 100,
        'max_tags': 10,
        'max_depth': int(max_depth),
        'relation_weight': {
            MelonGraph.Relation.ALBUM_TO_SONG: ALBUM_TO_SONG * c * SONG,
            MelonGraph.Relation.ALBUM_TO_WORD: ALBUM_TO_WORD * c * WORD,
            MelonGraph.Relation.ARTIST_TO_SONG: ARTIST_TO_SONG * c * SONG,
            MelonGraph.Relation.ARTIST_TO_WORD: ARTIST_TO_WORD * c * WORD,
            MelonGraph.Relation.ARTIST_GENRE_TO_SONG: ARTIST_GENRE_TO_SONG * c * SONG,
            MelonGraph.Relation.GENRE_TO_SONG: GENRE_TO_SONG * c * SONG,
            MelonGraph.Relation.MONTH_TO_SONG: MONTH_TO_SONG * c * SONG,
            MelonGraph.Relation.PLAYLIST_TO_SONG: PLAYLIST_TO_SONG * c * SONG,
            MelonGraph.Relation.PLAYLIST_TO_TAG: PLAYLIST_TO_TAG * c * TAG,
            MelonGraph.Relation.PLAYLIST_TO_WORD: PLAYLIST_TO_WORD * c * WORD,
            MelonGraph.Relation.SONG_TO_ALBUM: SONG_TO_ALBUM * c * ALBUM,
            MelonGraph.Relation.SONG_TO_ARTIST: SONG_TO_ARTIST * c * ARTIST,
            MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: SONG_TO_ARTIST_DETAILED_GENRE * c * ARTIST_DETAILED_GENRE,
            MelonGraph.Relation.SONG_TO_ARTIST_GENRE: SONG_TO_ARTIST_GENRE * c * ARTIST_GENRE,
            MelonGraph.Relation.SONG_TO_DETAILED_GENRE: SONG_TO_DETAILED_GENRE * c * DETAILED_GENRE,
            MelonGraph.Relation.SONG_TO_GENRE: SONG_TO_GENRE * c * GENRE,
            MelonGraph.Relation.SONG_TO_MONTH: SONG_TO_MONTH * c * MONTH,
            MelonGraph.Relation.SONG_TO_PLAYLIST: SONG_TO_PLAYLIST * c * PLAYLIST,
            MelonGraph.Relation.SONG_TO_YEAR: SONG_TO_YEAR * c * YEAR,
            MelonGraph.Relation.TAG_TO_PLAYLIST: TAG_TO_PLAYLIST * c * PLAYLIST,
            MelonGraph.Relation.TAG_TO_WORD: TAG_TO_WORD * c * WORD,
            MelonGraph.Relation.WORD_TO_ALBUM: WORD_TO_ALBUM * c * ALBUM,
            MelonGraph.Relation.WORD_TO_ARTIST: WORD_TO_ARTIST * c * ARTIST,
            MelonGraph.Relation.WORD_TO_PLAYLIST: WORD_TO_PLAYLIST * c * PLAYLIST,
            MelonGraph.Relation.WORD_TO_TAG: WORD_TO_TAG * c * TAG,
            MelonGraph.Relation.YEAR_TO_SONG: YEAR_TO_SONG * c * SONG,
        },
        'song_relation_weight': None,
        'tag_relation_weight': None,
    }
    return params

In [11]:
params = {
    'max_depth': 4.0,
    'c': 0.2,
    'ALBUM': 0.2,
    'ARTIST': 0.5,
    'ARTIST_GENRE': 0.5,
    'ARTIST_DETAILED_GENRE': 0.1,
    'DETAILED_GENRE': 0.2,
    'GENRE': 0.3,
    'MONTH': 0.0,
    'PLAYLIST': 0.3,
    'SONG': 0.5,
    'TAG': 0.2,
    'WORD': 0.8,
    'YEAR': 0.9,
    'ALBUM_TO_SONG': 0.7,
    'ALBUM_TO_WORD': 0.03,
    'ARTIST_TO_SONG': 0.04,
    'ARTIST_TO_WORD': 0.7,
    'ARTIST_GENRE_TO_SONG': 0.35,
    'GENRE_TO_SONG': 0.35,
    'MONTH_TO_SONG': 0,
    'PLAYLIST_TO_SONG': 0.2,
    'PLAYLIST_TO_TAG': 0.1,
    'PLAYLIST_TO_WORD': 0.8,
    'SONG_TO_ALBUM': 0.7,
    'SONG_TO_ARTIST': 0.45,
    'SONG_TO_ARTIST_DETAILED_GENRE': 0.08,
    'SONG_TO_ARTIST_GENRE': 0.55,
    'SONG_TO_DETAILED_GENRE': 0,
    'SONG_TO_GENRE': 0,
    'SONG_TO_MONTH': 0.65,
    'SONG_TO_PLAYLIST': 0.6,
    'SONG_TO_YEAR': 0.8,
    'TAG_TO_PLAYLIST': 0.4,
    'TAG_TO_WORD': 0.35,
    'WORD_TO_ALBUM': 0.7,
    'WORD_TO_ARTIST': 0.9,
    'WORD_TO_PLAYLIST': 0.6,
    'WORD_TO_TAG': 0.3,
    'YEAR_TO_SONG': 0.03,
}

recommender.set_params(**convert_params(**params))

In [67]:
recommender.set_params(**{
            'max_songs': 100,
            'max_tags': 10,
            'max_depth': 6,
            'relation_weight': {
                MelonGraph.Relation.ALBUM_TO_SONG: 0.01,
                MelonGraph.Relation.ALBUM_TO_WORD: 0,
                MelonGraph.Relation.ARTIST_TO_SONG: 0.009,
                MelonGraph.Relation.ARTIST_TO_WORD: 0.01,
                MelonGraph.Relation.ARTIST_GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.GENRE_TO_SONG: 0.01,
                MelonGraph.Relation.MONTH_TO_SONG: 0,
                MelonGraph.Relation.PLAYLIST_TO_SONG: 0.01,
                MelonGraph.Relation.PLAYLIST_TO_TAG: 0.025,
                MelonGraph.Relation.PLAYLIST_TO_WORD: 0.025,
                MelonGraph.Relation.SONG_TO_ALBUM: 0.015,
                MelonGraph.Relation.SONG_TO_ARTIST: 0.02,
                MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: 0.0005,
                MelonGraph.Relation.SONG_TO_ARTIST_GENRE: 0,
                MelonGraph.Relation.SONG_TO_DETAILED_GENRE: 0,
                MelonGraph.Relation.SONG_TO_GENRE: 0,
                MelonGraph.Relation.SONG_TO_MONTH: 0,
                MelonGraph.Relation.SONG_TO_PLAYLIST: 0.0075,
                MelonGraph.Relation.SONG_TO_YEAR: 0,
                MelonGraph.Relation.TAG_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.TAG_TO_WORD: 0.01,
                MelonGraph.Relation.WORD_TO_ALBUM: 0,
                MelonGraph.Relation.WORD_TO_ARTIST: 0.01,
                MelonGraph.Relation.WORD_TO_PLAYLIST: 0.01,
                MelonGraph.Relation.WORD_TO_TAG: 0.001,
                MelonGraph.Relation.YEAR_TO_SONG: 0,
            },
            'song_relation_weight': None,
            'tag_relation_weight': None,
        })

In [69]:
%%log_cell
%%time
results = recommender.predict(questions, jobs=12)

(total_music_ndcgs, total_tag_ndcgs, total_scores,
                case_music_ndcgs, case_tag_ndcgs, case_scores) = evaluator._eval(results, questions, answers)

print_scores(
    total_music_ndcgs, total_tag_ndcgs, total_scores,
    case_music_ndcgs, case_tag_ndcgs, case_scores,
)

100%|##########| 48/48 [00:22<00:00,  2.14it/s]
100%|##########| 48/48 [00:23<00:00,  2.04it/s]
100%|##########| 48/48 [00:24<00:00,  2.00it/s]
100%|##########| 48/48 [00:24<00:00,  1.97it/s]
100%|##########| 48/48 [00:24<00:00,  1.94it/s]
100%|##########| 48/48 [00:24<00:00,  1.92it/s]
100%|##########| 48/48 [00:25<00:00,  1.92it/s]
100%|##########| 48/48 [00:25<00:00,  1.91it/s]
100%|##########| 48/48 [00:25<00:00,  1.89it/s]
100%|##########| 48/48 [00:25<00:00,  1.89it/s]
100%|##########| 48/48 [00:25<00:00,  1.88it/s]
100%|##########| 48/48 [00:25<00:00,  1.86it/s]


=== Total score ===
Music nDCG: 0.284201
Tag nDCG: 0.480523
Score: 0.31365
=== SONG_TAG score ===
Music nDCG: 0.331252
Tag nDCG: 0.462117
Score: 0.350882
=== TAG_TITLE score ===
Music nDCG: 0.0996514
Tag nDCG: 0.530705
Score: 0.164309
=== SONG_ONLY score ===
Music nDCG: 0.33242
Tag nDCG: 0.513886
Score: 0.35964
=== TITLE_ONLY score ===
Music nDCG: 0.0882363
Tag nDCG: 0.30912
Score: 0.121369
CPU times: user 420 ms, sys: 748 ms, total: 1.17 s
Wall time: 27.2 s


In [125]:
nodes = list(recommender.grape._graph.get_all_nodes(MelonGraph.NodeType.YEAR))
nodes = sorted(nodes, key=lambda x: x.data['indegree'], reverse=True)
for n in nodes[:50]:
    print(n.id, n.data['indegree'])
#     print(n.id, n.data['name'], n.data['indegree'])

2015 65314
2017 63008
2018 59489
2016 58016
2014 54305
2019 52119
2013 43404
2012 33315
2011 28744
2010 25514
2009 23436
2008 23280
2007 21019
2006 18171
2005 16040
2004 13564
2003 13304
2020 12426
2002 10655
2001 9576
2000 7887
1999 6719
1998 5404
1997 5335
1996 4105
1995 3594
1994 3191
1993 2532
1992 2496
1991 1941
1990 1836
1989 1587
1988 1441
1987 1193
1986 804
1985 670
1984 660
1983 556
1978 528
1979 486
1973 479
1982 476
1980 466
1981 453
1976 444
1972 444
1977 428
1974 404
1970 382
1971 336


In [91]:
from evaluate import get_scores

def get_error(s1, s2):
    error = 0
    for k in s1:
        for k2 in s1[k]:
            error += (s1[k][k2] - s2[k][k2]) ** 2
    return error

s1 = get_scores(total_music_ndcgs, total_tag_ndcgs, total_scores, case_music_ndcgs, case_tag_ndcgs, case_scores)

skip = 6

errors = []
for skip in range(1, 16):
    for offset in range(skip):
        sub_total_music_ndcgs = total_music_ndcgs[offset::skip]
        sub_total_tag_ndcgs = total_tag_ndcgs[offset::skip]
        sub_total_scores = total_scores[offset::skip]
        sub_case_music_ndcgs = {k: v[offset::skip] for k, v in case_music_ndcgs.items()}
        sub_case_tag_ndcgs = {k: v[offset::skip] for k, v in case_tag_ndcgs.items()}
        sub_case_scores = {k: v[offset::skip] for k, v in case_scores.items()}

        s2 = get_scores(sub_total_music_ndcgs, sub_total_tag_ndcgs, sub_total_scores, sub_case_music_ndcgs, sub_case_tag_ndcgs, sub_case_scores)
        errors.append(
            {
                'skip': skip,
                'offset': offset,
                'error': get_error(s1, s2)
            }
        )

errors = sorted(errors, key=lambda x: x['error'])
for error in errors:
    print(error)

{'skip': 1, 'offset': 0, 'error': 0.0}
{'skip': 2, 'offset': 1, 'error': 0.3101298821676581}
{'skip': 2, 'offset': 0, 'error': 0.31012988216765824}
{'skip': 3, 'offset': 1, 'error': 0.3269414083787091}
{'skip': 4, 'offset': 1, 'error': 0.36726286616567283}
{'skip': 4, 'offset': 2, 'error': 0.3856912951138309}
{'skip': 3, 'offset': 2, 'error': 0.4312515931270432}
{'skip': 3, 'offset': 0, 'error': 0.45703186777387794}
{'skip': 12, 'offset': 3, 'error': 0.4579794290553558}
{'skip': 4, 'offset': 3, 'error': 0.46487362968569634}
{'skip': 6, 'offset': 0, 'error': 0.49701107512450987}
{'skip': 7, 'offset': 2, 'error': 0.5043860873979316}
{'skip': 8, 'offset': 2, 'error': 0.5102255862425455}
{'skip': 6, 'offset': 2, 'error': 0.5115587459869972}
{'skip': 5, 'offset': 4, 'error': 0.5437370865258001}
{'skip': 8, 'offset': 5, 'error': 0.5507290780237842}
{'skip': 5, 'offset': 3, 'error': 0.551258481508675}
{'skip': 7, 'offset': 4, 'error': 0.565307559797619}
{'skip': 7, 'offset': 5, 'error': 0.579

In [82]:
print_scores(total_music_ndcgs, total_tag_ndcgs, total_scores, case_music_ndcgs, case_tag_ndcgs, case_scores)

=== Total score ===
Music nDCG: 0.287772
Tag nDCG: 0.441057
Score: 0.310765
=== SONG_TAG score ===
Music nDCG: 0.338858
Tag nDCG: 0.435439
Score: 0.353345
=== TAG_TITLE score ===
Music nDCG: 0.10012
Tag nDCG: 0.358257
Score: 0.13884
=== SONG_ONLY score ===
Music nDCG: 0.328356
Tag nDCG: 0.501433
Score: 0.354318
=== TITLE_ONLY score ===
Music nDCG: 0.119093
Tag nDCG: 0.267445
Score: 0.141346


In [93]:
skip = 3
offset = 1

sub_total_music_ndcgs = total_music_ndcgs[offset::skip]
sub_total_tag_ndcgs = total_tag_ndcgs[offset::skip]
sub_total_scores = total_scores[offset::skip]
sub_case_music_ndcgs = {k: v[offset::skip] for k, v in case_music_ndcgs.items()}
sub_case_tag_ndcgs = {k: v[offset::skip] for k, v in case_tag_ndcgs.items()}
sub_case_scores = {k: v[offset::skip] for k, v in case_scores.items()}

print_scores(sub_total_music_ndcgs, sub_total_tag_ndcgs, sub_total_scores, sub_case_music_ndcgs, sub_case_tag_ndcgs, sub_case_scores)

=== Total score ===
Music nDCG: 0.284386
Tag nDCG: 0.447137
Score: 0.308799
=== SONG_TAG score ===
Music nDCG: 0.289699
Tag nDCG: 0.426068
Score: 0.310154
=== TAG_TITLE score ===
Music nDCG: 0.0718048
Tag nDCG: 0.360906
Score: 0.11517
=== SONG_ONLY score ===
Music nDCG: 0.369304
Tag nDCG: 0.518822
Score: 0.391732
=== TITLE_ONLY score ===
Music nDCG: 0.137737
Tag nDCG: 0.292585
Score: 0.160964


In [70]:
s1

{'total': {'music_ndcg': 0.28777245622769804,
  'tag_ndcg': 0.4410570428158009,
  'score': 0.31076514421591345},
 'SONG_TAG': {'music_ndcg': 0.33885801875183746,
  'tag_ndcg': 0.4354390131962043,
  'score': 0.35334516791849246},
 'TAG_TITLE': {'music_ndcg': 0.10011950367547963,
  'tag_ndcg': 0.35825722840377444,
  'score': 0.13884016238472385},
 'SONG_ONLY': {'music_ndcg': 0.32835633711365186,
  'tag_ndcg': 0.5014327505454684,
  'score': 0.3543177991284243},
 'TITLE_ONLY': {'music_ndcg': 0.11909343370207164,
  'tag_ndcg': 0.26744473349490455,
  'score': 0.14134612867099658}}

In [67]:
s2

{'total': {'music_ndcg': 0.311221844548289,
  'tag_ndcg': 0.39524961742084913,
  'score': 0.32382601047917303},
 'SONG_TAG': {'music_ndcg': 0.41240626367524985,
  'tag_ndcg': 0.46184365851841164,
  'score': 0.41982187290172407},
 'TAG_TITLE': {'music_ndcg': 0.09951321983088891,
  'tag_ndcg': 0.22451540879311743,
  'score': 0.1182635481752232},
 'SONG_ONLY': {'music_ndcg': 0.31084811253655065,
  'tag_ndcg': 0.5251601753935106,
  'score': 0.3429949219650946},
 'TITLE_ONLY': {'music_ndcg': 0.0,
  'tag_ndcg': 0.05017166594399686,
  'score': 0.0075257498915995286}}

In [63]:
s1

{'total': {'music_ndcg': 0.28777245622769804,
  'tag_ndcg': 0.4410570428158009,
  'score': 0.31076514421591345},
 'SONG_TAG': {'music_ndcg': 0.33885801875183746,
  'tag_ndcg': 0.4354390131962043,
  'score': 0.35334516791849246},
 'TAG_TITLE': {'music_ndcg': 0.10011950367547963,
  'tag_ndcg': 0.35825722840377444,
  'score': 0.13884016238472385},
 'SONG_ONLY': {'music_ndcg': 0.32835633711365186,
  'tag_ndcg': 0.5014327505454684,
  'score': 0.3543177991284243},
 'TITLE_ONLY': {'music_ndcg': 0.11909343370207164,
  'tag_ndcg': 0.26744473349490455,
  'score': 0.14134612867099658}}

## Tuning

In [None]:
questions = read_json(question_fname)
answers = read_json(answer_fname)

In [73]:
import numpy as np
from functools import partial
from random_search_optimizer import RandomSearchOptimizer
from statistics import mean


def opt_func(
        questions, answers,
        max_depth,
        c,
        ALBUM,
        ARTIST,
        ARTIST_GENRE,
        ARTIST_DETAILED_GENRE,
        DETAILED_GENRE,
        GENRE,
        MONTH,
        PLAYLIST,
        SONG,
        TAG,
        WORD,
        YEAR,
        ALBUM_TO_SONG,
        ALBUM_TO_WORD,
        ARTIST_TO_SONG,
        ARTIST_TO_WORD,
        ARTIST_GENRE_TO_SONG,
        GENRE_TO_SONG,
        MONTH_TO_SONG,
        PLAYLIST_TO_SONG,
        PLAYLIST_TO_TAG,
        PLAYLIST_TO_WORD,
        SONG_TO_ALBUM,
        SONG_TO_ARTIST,
        SONG_TO_ARTIST_DETAILED_GENRE,
        SONG_TO_ARTIST_GENRE,
        SONG_TO_DETAILED_GENRE,
        SONG_TO_GENRE,
        SONG_TO_MONTH,
        SONG_TO_PLAYLIST,
        SONG_TO_YEAR,
        TAG_TO_PLAYLIST,
        TAG_TO_WORD,
        WORD_TO_ALBUM,
        WORD_TO_ARTIST,
        WORD_TO_PLAYLIST,
        WORD_TO_TAG,
        YEAR_TO_SONG,
        score='total',
        jobs=1,
    ):
    params = {
        'max_songs': 100,
        'max_tags': 10,
        'max_depth': int(max_depth),
        'relation_weight': {
            MelonGraph.Relation.ALBUM_TO_SONG: ALBUM_TO_SONG * c * SONG,
            MelonGraph.Relation.ALBUM_TO_WORD: ALBUM_TO_WORD * c * WORD,
            MelonGraph.Relation.ARTIST_TO_SONG: ARTIST_TO_SONG * c * SONG,
            MelonGraph.Relation.ARTIST_TO_WORD: ARTIST_TO_WORD * c * WORD,
            MelonGraph.Relation.ARTIST_GENRE_TO_SONG: ARTIST_GENRE_TO_SONG * c * SONG,
            MelonGraph.Relation.GENRE_TO_SONG: GENRE_TO_SONG * c * SONG,
            MelonGraph.Relation.MONTH_TO_SONG: MONTH_TO_SONG * c * SONG,
            MelonGraph.Relation.PLAYLIST_TO_SONG: PLAYLIST_TO_SONG * c * SONG,
            MelonGraph.Relation.PLAYLIST_TO_TAG: PLAYLIST_TO_TAG * c * TAG,
            MelonGraph.Relation.PLAYLIST_TO_WORD: PLAYLIST_TO_WORD * c * WORD,
            MelonGraph.Relation.SONG_TO_ALBUM: SONG_TO_ALBUM * c * ALBUM,
            MelonGraph.Relation.SONG_TO_ARTIST: SONG_TO_ARTIST * c * ARTIST,
            MelonGraph.Relation.SONG_TO_ARTIST_DETAILED_GENRE: SONG_TO_ARTIST_DETAILED_GENRE * c * ARTIST_DETAILED_GENRE,
            MelonGraph.Relation.SONG_TO_ARTIST_GENRE: SONG_TO_ARTIST_GENRE * c * ARTIST_GENRE,
            MelonGraph.Relation.SONG_TO_DETAILED_GENRE: SONG_TO_DETAILED_GENRE * c * DETAILED_GENRE,
            MelonGraph.Relation.SONG_TO_GENRE: SONG_TO_GENRE * c * GENRE,
            MelonGraph.Relation.SONG_TO_MONTH: SONG_TO_MONTH * c * MONTH,
            MelonGraph.Relation.SONG_TO_PLAYLIST: SONG_TO_PLAYLIST * c * PLAYLIST,
            MelonGraph.Relation.SONG_TO_YEAR: SONG_TO_YEAR * c * YEAR,
            MelonGraph.Relation.TAG_TO_PLAYLIST: TAG_TO_PLAYLIST * c * PLAYLIST,
            MelonGraph.Relation.TAG_TO_WORD: TAG_TO_WORD * c * WORD,
            MelonGraph.Relation.WORD_TO_ALBUM: WORD_TO_ALBUM * c * ALBUM,
            MelonGraph.Relation.WORD_TO_ARTIST: WORD_TO_ARTIST * c * ARTIST,
            MelonGraph.Relation.WORD_TO_PLAYLIST: WORD_TO_PLAYLIST * c * PLAYLIST,
            MelonGraph.Relation.WORD_TO_TAG: WORD_TO_TAG * c * TAG,
            MelonGraph.Relation.YEAR_TO_SONG: YEAR_TO_SONG * c * SONG,
        },
        'song_relation_weight': None,
        'tag_relation_weight': None,
    }

    recommender.set_params(**params)
    results = recommender.predict(questions, jobs, verbose=False)

    (total_music_ndcgs, total_tag_ndcgs, total_scores,
     case_music_ndcgs, case_tag_ndcgs, case_scores) = evaluator._eval(results, questions, answers)

    if score == 'songs':
        return mean(total_music_ndcgs)
    elif score == 'tags':
        return mean(total_tag_ndcgs)
    elif score == 'total':
        return mean(total_scores)
    else:
        raise Exception(f"Wrong parameter - score: {score}")


def generate_optimizer(questions, answers, score='total', jobs=1):
    func_fixed = partial(opt_func, questions=questions, answers=answers, score=score, jobs=jobs)

    opt = RandomSearchOptimizer(
        func_fixed, 
        {
            'max_depth': (4, 4),
            'c': (0, 1),
            'ALBUM': (0, 1),
            'ARTIST': (0, 1),
            'ARTIST_GENRE': (0, 1),
            'ARTIST_DETAILED_GENRE': (0, 1),
            'DETAILED_GENRE': (0, 1),
            'GENRE': (0, 1),
            'MONTH': (0, 0),
            'PLAYLIST': (0, 1),
            'SONG': (0, 1),
            'TAG': (0, 1),
            'WORD': (0, 1),
            'YEAR': (0, 1),
            'ALBUM_TO_SONG': (0, 1),
            'ALBUM_TO_WORD': (0, 1),
            'ARTIST_TO_SONG': (0, 1),
            'ARTIST_TO_WORD': (0, 1),
            'ARTIST_GENRE_TO_SONG': (0, 1),
            'GENRE_TO_SONG': (0, 1),
            'MONTH_TO_SONG': (0, 1),
            'PLAYLIST_TO_SONG': (0, 1),
            'PLAYLIST_TO_TAG': (0, 1),
            'PLAYLIST_TO_WORD': (0, 1),
            'SONG_TO_ALBUM': (0, 1),
            'SONG_TO_ARTIST': (0, 1),
            'SONG_TO_ARTIST_DETAILED_GENRE': (0, 1),
            'SONG_TO_ARTIST_GENRE': (0, 1),
            'SONG_TO_DETAILED_GENRE': (0, 0),
            'SONG_TO_GENRE': (0, 0),
            'SONG_TO_MONTH': (0, 1),
            'SONG_TO_PLAYLIST': (0, 1),
            'SONG_TO_YEAR': (0, 1),
            'TAG_TO_PLAYLIST': (0, 1),
            'TAG_TO_WORD': (0, 1),
            'WORD_TO_ALBUM': (0, 1),
            'WORD_TO_ARTIST': (0, 1),
            'WORD_TO_PLAYLIST': (0, 1),
            'WORD_TO_TAG': (0, 1),
            'YEAR_TO_SONG': (0, 1),
        }, 
        seed=1211
    )

    return opt

In [74]:
opt = generate_optimizer(questions, answers, score='songs', jobs=12)

In [None]:
opt.run(2)

|index     |target    |max_depth |c         |ALBUM     |ARTIST    |ARTIST_G..|ARTIST_D..|DETAILED..|GENRE     |MONTH     |PLAYLIST  |SONG      |TAG       |WORD      |YEAR      |ALBUM_TO..|ALBUM_TO..|ARTIST_T..|ARTIST_T..|ARTIST_G..|GENRE_TO..|MONTH_TO..|PLAYLIST..|PLAYLIST..|PLAYLIST..|SONG_TO_..|SONG_TO_..|SONG_TO_..|SONG_TO_..|SONG_TO_..|SONG_TO_..|SONG_TO_..|SONG_TO_..|SONG_TO_..|TAG_TO_P..|TAG_TO_W..|WORD_TO_..|WORD_TO_..|WORD_TO_..|WORD_TO_..|YEAR_TO_..|
|         2|0.19810765171773342|    4.0000|    0.9053|    0.2688|    0.2116|    0.9949|    0.9094|    0.5632|    0.5817|    0.0000|    0.8295|    0.5363|    0.0381|    0.9187|    0.6168|    0.1367|    0.2793|    0.1917|    0.6113|    0.4142|    0.6157|    0.6333|    0.7524|    0.8454|    0.9082|    0.3429|    0.5366|    0.4484|    0.3464|    0.0000|    0.0000|    0.9463|    0.0951|    0.2455|    0.9279|    0.4201|    0.5947|    0.9580|    0.9868|    0.3531|    0.7502|
