# Development Environment

In [None]:
!pip install -r requirements.txt

In [1]:
from utils.jupyter_utils import log_cell
from utils.jupyter_utils import clear_logs

In [2]:
clear_logs()

## Execution

In [None]:
!python split_data.py run res/train.json --ratio=0.2 --suffix="02"

In [None]:
!python split_data.py run res/train.json --ratio=0.002 --suffix="0002"

In [None]:
!python split_data.py run res/train.json --ratio=0.001 --suffix="0001"

In [None]:
%%log_cell

# 자가 테스트용
!python grape_recommender.py run \
    --song_meta_fname=res/song_meta.json \
    --genre_fname=res/genre_gn_all.json \
    --train_fname=arena_data/orig/train.json \
    --question_fname=arena_data/questions/val.json

In [None]:
%%log_cell

# 자가 테스트용 채점
!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json

In [None]:
%%log_cell

# 리더보드 제출용
!python grape_recommender.py run \
    --song_meta_fname=res/song_meta.json \
    --genre_fname=res/genre_gn_all.json \
    --train_fname=res/train.json \
    --question_fname=res/val.json

In [None]:
%%log_cell

# 최종 제출용
!python grape_recommender.py run \
    --song_meta_fname=res/song_meta.json \
    --genre_fname=res/genre_gn_all.json \
    --train_fname=res/train.json \
    --question_fname=res/test.json

## Development

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
def prepare_data(suffix=''):
    if not suffix:
        return
    import shutil
    shutil.copyfile(f"./arena_data/orig/train{suffix}.json", "./arena_data/orig/train.json")
    shutil.copyfile(f"./arena_data/orig/val{suffix}.json", "./arena_data/orig/val.json")
    shutil.copyfile(f"./arena_data/questions/val{suffix}.json", "./arena_data/questions/val.json")
    shutil.copyfile(f"./arena_data/answers/val{suffix}.json", "./arena_data/answers/val.json")

In [5]:
# prepare_data("02")
prepare_data("0002")
# prepare_data("0001")

In [6]:
from utils import read_json
from utils import write_json

In [7]:
train = read_json('./arena_data/orig/train.json')
genre_gn_all = read_json('./res/genre_gn_all.json')
song_meta = read_json('./res/song_meta.json')

In [None]:
# 제출용
train = read_json('./res/train.json')
genre_gn_all = read_json('./res/genre_gn_all.json')
song_meta = read_json('./res/song_meta.json')

In [8]:
%%log_cell
%%time
import lib.graph
%aimport -lib.graph.core
%aimport -lib.graph.nodes

graph = lib.graph.GraphBuilder().build(song_meta, genre_gn_all, train)

Building nodes: 100%|#########9| 822830/823084 [00:08<00:00, 93170.10it/s] 
Building edges: 100%|##########| 823084/823084 [00:50<00:00, 16178.78it/s]


CPU times: user 1min 2s, sys: 1.63 s, total: 1min 3s
Wall time: 1min 3s


In [9]:
%%log_cell
%%time
import lib.grape
from constants import NUM_OF_RECOMMENDED_SONGS
from constants import NUM_OF_RECOMMENDED_TAGS

grape = lib.grape.Grape(
    graph,
    NUM_OF_RECOMMENDED_SONGS,
    NUM_OF_RECOMMENDED_TAGS,
)

Caching nodes: 100%|##########| 1355036/1355036 [00:28<00:00, 47939.36it/s]
Caching edges: 100%|##########| 20143464/20143464 [01:16<00:00, 263710.34it/s]
Caching union nodes: 3923031it [00:52, 74849.99it/s]                             

CPU times: user 2min 34s, sys: 3.2 s, total: 2min 38s
Wall time: 2min 37s





In [10]:
%%log_cell
%%time
grape.fit(train)

Fitting MostPopular model: 100%|##########| 114841/114841 [00:10<00:00, 11167.00it/s]


CPU times: user 10.9 s, sys: 90.4 ms, total: 11 s
Wall time: 11 s


<lib.grape.grape.Grape at 0x7f89ca99d090>

In [28]:
questions = read_json('./arena_data/questions/val.json')
answers = read_json('./arena_data/answers/val.json')

In [None]:
# 제출용
questions = read_json('./res/val.json')

In [13]:
%%log_cell
from utils import validate_answers

n_questions = 2000
# n_questions = 200
# n_questions = 100
results = grape.predict_all(questions[:n_questions])
validate_answers(results, questions[:n_questions])
write_json(results, './arena_data/results/results.json')

!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json \
    --question_fname=arena_data/questions/val.json

100%|##########| 230/230 [01:34<00:00,  2.44it/s]


=== Total score ===
Music nDCG: 0.261882
Tag nDCG: 0.477977
Score: 0.294297
=== SONG_TAG score ===
Music nDCG: 0.326615
Tag nDCG: 0.515792
Score: 0.354991
=== TAG_TITLE score ===
Music nDCG: 0.0589877
Tag nDCG: 0.485158
Score: 0.122913
=== SONG_ONLY score ===
Music nDCG: 0.291291
Tag nDCG: 0.436181
Score: 0.313025
=== TITLE_ONLY score ===
Music nDCG: 0.0815868
Tag nDCG: 0.51645
Score: 0.146816


In [39]:
%%log_cell
from utils import validate_answers

n_questions = 2000
# n_questions = 200
# n_questions = 100
results = grape.predict_all(questions[:n_questions])
validate_answers(results, questions[:n_questions])
write_json(results, './arena_data/results/results.json')

!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json \
    --question_fname=arena_data/questions/val.json

100%|##########| 230/230 [00:27<00:00,  8.31it/s]


=== Total score ===
Music nDCG: 0.261959
Tag nDCG: 0.476947
Score: 0.294207
=== SONG_TAG score ===
Music nDCG: 0.327088
Tag nDCG: 0.515792
Score: 0.355394
=== TAG_TITLE score ===
Music nDCG: 0.0591726
Tag nDCG: 0.485158
Score: 0.12307
=== SONG_ONLY score ===
Music nDCG: 0.291509
Tag nDCG: 0.433788
Score: 0.312851
=== TITLE_ONLY score ===
Music nDCG: 0.0786319
Tag nDCG: 0.51645
Score: 0.144305


In [31]:
%%log_cell
from utils import validate_answers

question_id = 15

print("=== questions ===")
print(questions[question_id:question_id+1])
print()

print("=== answers ===")
print(answers[question_id:question_id+1])
print()

results = grape.predict_all(questions[question_id:question_id+1])
validate_answers(results, questions[question_id:question_id+1])
write_json(results, './arena_data/results/results.json')

!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json \
    --question_fname=arena_data/questions/val.json

  0%|          | 0/1 [00:00<?, ?it/s]

=== questions ===
[{'tags': ['카페', '봄'], 'id': 20353, 'plylst_title': '', 'songs': [376537, 32116, 241758, 239826, 394738, 553099, 122035, 47735, 491855, 10851, 303903, 368771, 45923, 489174], 'like_cnt': 10, 'updt_date': '2016-05-24 10:10:32.000'}]

=== answers ===
[{'tags': ['피아노', '뉴에이지', '재즈'], 'id': 20353, 'plylst_title': '따뜻한 봄날의 달달한 피아노 뮤직', 'songs': [238465, 129962, 18663, 409769, 184162, 138251, 573337, 288942, 434503, 175027, 1238, 670743, 279741, 87876, 86289], 'like_cnt': 10, 'updt_date': '2016-05-24 10:10:32.000'}]

=== current weights ===
SongNode(376537): 1
SongNode(32116): 1
SongNode(241758): 1
SongNode(239826): 1
SongNode(394738): 1
SongNode(553099): 1
SongNode(122035): 1
SongNode(47735): 1
SongNode(491855): 1
SongNode(10851): 1
SongNode(303903): 1
SongNode(368771): 1
SongNode(45923): 1
SongNode(489174): 1
TagNode(카페): 1
TagNode(봄): 1

=== moved weights ===
AlbumNode(2672081): 0.045
ArtistNode(906972): 0.045
PlaylistNode(90992): 0.0375
PlaylistNode(52127): 0.035
Playli

100%|##########| 1/1 [00:00<00:00,  1.81it/s]

=== current weights ===
PlaylistNode(106162): 1.6746472656250006e-07
PlaylistNode(92831): 1.6604770312500005e-07
PlaylistNode(109082): 1.6604770312500005e-07
PlaylistNode(138690): 1.6500371484375004e-07
PlaylistNode(20617): 1.5932804687500006e-07
PlaylistNode(50775): 1.5528081640625002e-07
PlaylistNode(94837): 1.5254002734375004e-07
PlaylistNode(55315): 1.5249667187500003e-07
PlaylistNode(139230): 1.5228190234375009e-07
PlaylistNode(153203): 1.5224924218750004e-07
PlaylistNode(50330): 1.5128334765625006e-07
PlaylistNode(74706): 1.4895956250000003e-07
PlaylistNode(27952): 1.4892423828125007e-07
PlaylistNode(82148): 1.4841678906250002e-07
PlaylistNode(83095): 1.4578659375000004e-07
PlaylistNode(30969): 1.4555839843750003e-07
PlaylistNode(67490): 1.4465062500000003e-07
PlaylistNode(2697): 1.4443088671875003e-07
PlaylistNode(81588): 1.4439438281250002e-07
PlaylistNode(21775): 1.4398077343750003e-07

=== moved weights ===
TagNode(카페): 7.637815380859377e-08
TagNode(힐링): 7.637815380859377e-08




=== Total score ===
Music nDCG: 0.397618
Tag nDCG: 0.391066
Score: 0.396635
=== SONG_TAG score ===
Music nDCG: 0.397618
Tag nDCG: 0.391066
Score: 0.396635


In [22]:
%%log_cell
from utils import validate_answers

question_id = 18
results = grape.predict_all(questions[question_id:question_id+1])
validate_answers(results, questions[question_id:question_id+1])
write_json(results, './arena_data/results/results.json')

!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json \
    --question_fname=arena_data/questions/val.json

  0%|          | 0/1 [00:00<?, ?it/s]

=== current weights ===
SongNode(319001): 1
SongNode(563404): 1
SongNode(700251): 1
SongNode(554179): 1
SongNode(680363): 1
SongNode(622548): 1
SongNode(425397): 1
SongNode(436693): 1
SongNode(344170): 1
SongNode(6237): 1
SongNode(6715): 1
SongNode(346915): 1
SongNode(504129): 1
SongNode(213777): 1
SongNode(634734): 1
SongNode(644291): 1
SongNode(449836): 1
SongNode(611194): 1
SongNode(694735): 1
SongNode(478146): 1
SongNode(493132): 1
TagNode(몽롱): 1
TagNode(Rock): 1
TagNode(명곡): 1
TagNode(해외락): 1
TagNode(부드러운): 1

=== moved weights ===
AlbumNode(20068): 0.015
ArtistNode(100774): 0.03
PlaylistNode(89037): 0.0175
PlaylistNode(66802): 0.0075
PlaylistNode(88623): 0.0075
PlaylistNode(139687): 0.0075
union: 0.015
union: 0.015
AlbumNode(20243): 0.015
ArtistNode(100329): 0.03
PlaylistNode(38699): 0.0175
union: 0.015
union: 0.015
union: 0.015
union: 0.015
union: 0.015
AlbumNode(20299): 0.015
ArtistNode(100781): 0.015
PlaylistNode(43559): 0.0075
PlaylistNode(68935): 0.0075
PlaylistNode(89199): 

100%|##########| 1/1 [00:00<00:00,  2.86it/s]

=== moved weights ===
PlaylistNode(57128): 2.8990737109375006e-08
PlaylistNode(41658): 1.7626610156250005e-08
PlaylistNode(23973): 4.405286953125002e-08
PlaylistNode(92939): 4.0393537500000014e-08
PlaylistNode(39442): 2.441213398437501e-08
PlaylistNode(9229): 3.268874257812501e-08
PlaylistNode(123516): 1.7626610156250005e-08
PlaylistNode(64414): 4.703216328125001e-08
PlaylistNode(37100): 5.545566992187502e-08
PlaylistNode(100485): 1.7626610156250005e-08
PlaylistNode(2499): 4.0393537500000014e-08
PlaylistNode(58359): 2.902941054687501e-08
PlaylistNode(16675): 4.0393537500000014e-08
PlaylistNode(101474): 2.8990737109375006e-08
PlaylistNode(119790): 1.7626610156250005e-08
PlaylistNode(66233): 2.8183397265625005e-08
PlaylistNode(64864): 2.8990737109375006e-08
PlaylistNode(10288): 4.0393537500000014e-08
PlaylistNode(6769): 1.7626610156250005e-08
PlaylistNode(92727): 3.657805664062501e-08
PlaylistNode(93028): 4.0393537500000014e-08
PlaylistNode(65429): 1.7626610156250005e-08
PlaylistNode(363




=== Total score ===
Music nDCG: 0.0
Tag nDCG: 0.0
Score: 0.0
=== SONG_TAG score ===
Music nDCG: 0.0
Tag nDCG: 0.0
Score: 0.0


In [None]:
%%log_cell
def predict_multi(questions):
    import multiprocessing as mp
    import numpy as np
    import itertools

    jobs = mp.cpu_count()
    jobs = 2
    chunks = list(np.array_split(questions, jobs))

    with mp.Pool() as pool:
        results = pool.map(predict_job, chunks)
        return list(itertools.chain(*results))


def predict_job(q):
    return grape.predict_all(q)

In [None]:
%%log_cell
from utils import validate_answers

n_questions = 2000000000
# n_questions = 200
# n_questions = 100
# answers = grape.predict_all(questions[:n_questions])
results = predict_multi(questions[:n_questions])
validate_answers(results, questions[:n_questions])
write_json(results, './arena_data/results/results.json')

# !python evaluate.py evaluate \
#     --gt_fname=arena_data/answers/val.json \
#     --rec_fname=arena_data/results/results.json \
#     --question_fname=arena_data/questions/val.json

## Test

In [None]:
%%time
import lib.graph
%aimport -lib.graph.core
%aimport  lib.graph.core.graph
%aimport -lib.graph.nodes

graph = lib.graph.GraphBuilder().build(song_meta[:100], genre_gn_all, [])

In [None]:
%%time
import lib.grape
from constants import NUM_OF_RECOMMENDED_SONGS
from constants import NUM_OF_RECOMMENDED_TAGS

grape = lib.grape.Grape(
    graph,
    NUM_OF_RECOMMENDED_SONGS,
    NUM_OF_RECOMMENDED_TAGS,
)

In [None]:
grape.fit(train)

In [None]:
questions = read_json('./arena_data/questions/val.json')

In [None]:
from utils import validate_answers

n_questions = 20
answers = grape.predict_all(questions[:n_questions])
validate_answers(answers, questions[:n_questions])
write_json(answers, './arena_data/results/results.json')

!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json

## Playground