# Development Environment

## Execution

In [1]:
!python split_data.py run res/train.json --ratio=0.2 --suffix="02"

Reading data...

Total playlists: 115071
Splitting data...
Generating questions...
ALL: 0, SONG_TAG: 8687, SONG_TITLE: 0, TAG_TITLE: 2762, SONG_ONLY: 9954, TAG_ONLY: 0, TITLE_ONLY: 1611, NOTHING: 0


In [2]:
!python split_data.py run res/train.json --ratio=0.002 --suffix="0002"

Reading data...

Total playlists: 115071
Splitting data...
Generating questions...
ALL: 0, SONG_TAG: 87, SONG_TITLE: 0, TAG_TITLE: 27, SONG_ONLY: 99, TAG_ONLY: 0, TITLE_ONLY: 17, NOTHING: 0


In [3]:
!python split_data.py run res/train.json --ratio=0.001 --suffix="0001"

Reading data...

Total playlists: 115071
Splitting data...
Generating questions...
ALL: 0, SONG_TAG: 43, SONG_TITLE: 0, TAG_TITLE: 13, SONG_ONLY: 50, TAG_ONLY: 0, TITLE_ONLY: 9, NOTHING: 0


In [None]:
# 자가 테스트용
!python grape_recommender.py run \
    --song_meta_fname=res/song_meta.json \
    --genre_fname=res/genre_gn_all.json \
    --train_fname=arena_data/orig/train.json \
    --question_fname=arena_data/questions/val.json

In [None]:
# 자가 테스트용 채점
!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json

In [None]:
# 리더보드 제출용
!python grape_recommender.py run \
    --song_meta_fname=res/song_meta.json \
    --genre_fname=res/genre_gn_all.json \
    --train_fname=res/train.json \
    --question_fname=res/val.json

In [None]:
# 최종 제출용
!python grape_recommender.py run \
    --song_meta_fname=res/song_meta.json \
    --genre_fname=res/genre_gn_all.json \
    --train_fname=res/train.json \
    --question_fname=res/test.json

## Development

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
def prepare_data(suffix=''):
    if not suffix:
        return
    import shutil
    shutil.copyfile(f"./arena_data/orig/train{suffix}.json", "./arena_data/orig/train.json")
    shutil.copyfile(f"./arena_data/orig/val{suffix}.json", "./arena_data/orig/val.json")
    shutil.copyfile(f"./arena_data/questions/val{suffix}.json", "./arena_data/questions/val.json")
    shutil.copyfile(f"./arena_data/answers/val{suffix}.json", "./arena_data/answers/val.json")

In [6]:
# prepare_data("02")
prepare_data("0002")
# prepare_data("0001")

In [7]:
from utils import read_json
from utils import write_json

In [8]:
train = read_json('./arena_data/orig/train.json')
genre_gn_all = read_json('./res/genre_gn_all.json')
song_meta = read_json('./res/song_meta.json')

In [9]:
%%time
import lib.graph
%aimport -lib.graph.core
%aimport -lib.graph.nodes

graph = lib.graph.GraphBuilder().build(song_meta, genre_gn_all, train)

Building nodes: 100%|█████████▉| 822830/823084 [00:07<00:00, 114260.55it/s]
Building edges: 100%|██████████| 823084/823084 [00:49<00:00, 16486.97it/s]
CPU times: user 59.7 s, sys: 1.51 s, total: 1min 1s
Wall time: 1min


In [10]:
%%time
import lib.grape
from constants import NUM_OF_RECOMMENDED_SONGS
from constants import NUM_OF_RECOMMENDED_TAGS

grape = lib.grape.Grape(
    graph,
    NUM_OF_RECOMMENDED_SONGS,
    NUM_OF_RECOMMENDED_TAGS,
)

Caching nodes: 100%|██████████| 1254916/1254916 [00:21<00:00, 57957.74it/s]
Caching edges: 100%|██████████| 19106852/19106852 [01:24<00:00, 226939.01it/s]
Caching union nodes: 3822911it [00:54, 69738.03it/s]
CPU times: user 2min 38s, sys: 3.05 s, total: 2min 41s
Wall time: 2min 40s


In [11]:
%%time
grape.fit(train)

Fitting MostPopular model: 100%|██████████| 114841/114841 [00:10<00:00, 10518.98it/s]
CPU times: user 11.8 s, sys: 70.6 ms, total: 11.9 s
Wall time: 11.8 s


<lib.grape.grape.Grape at 0x7f4277eabc70>

In [12]:
questions = read_json('./arena_data/questions/val.json')

In [13]:
from utils import validate_answers

n_questions = 2000
# n_questions = 200
# n_questions = 100
answers = grape.predict_all(questions[:n_questions])
validate_answers(answers, questions[:n_questions])
write_json(answers, './arena_data/results/results.json')

!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json

100%|██████████| 230/230 [01:24<00:00,  2.73it/s]
Music nDCG: 0.23854
Tag nDCG: 0.40505
Score: 0.263517


## Test

In [None]:
%%time
import lib.graph
%aimport -lib.graph.core
%aimport  lib.graph.core.graph
%aimport -lib.graph.nodes

graph = lib.graph.GraphBuilder().build(song_meta[:100], genre_gn_all, [])

In [None]:
%%time
import lib.grape
from constants import NUM_OF_RECOMMENDED_SONGS
from constants import NUM_OF_RECOMMENDED_TAGS

grape = lib.grape.Grape(
    graph,
    NUM_OF_RECOMMENDED_SONGS,
    NUM_OF_RECOMMENDED_TAGS,
)

In [None]:
grape.fit(train)

In [None]:
questions = read_json('./arena_data/questions/val.json')

In [None]:
from utils import validate_answers

n_questions = 20
answers = grape.predict_all(questions[:n_questions])
validate_answers(answers, questions[:n_questions])
write_json(answers, './arena_data/results/results.json')

!python evaluate.py evaluate \
    --gt_fname=arena_data/answers/val.json \
    --rec_fname=arena_data/results/results.json

## Playground