In [1]:
import pickle
import pandas as pd
import os
import io
import json
import distutils.dir_util
import numpy as np

# rerank

> Data Load

In [2]:
# json write & load 함수 정의
def write_json(data, fname):
    def _conv(o):
        if isinstance(o, (np.int64, np.int32)):
            return int(o)
        raise TypeError

    parent = os.path.dirname(fname)
    distutils.dir_util.mkpath(parent)
    with io.open(fname, "w", encoding="utf-8") as f:
        json_str = json.dumps(data, ensure_ascii=False, default=_conv)
        f.write(json_str)
        
def load_json(fname):
    with open(fname, encoding='utf-8') as f:
        json_obj = json.load(f)

    return json_obj

In [7]:
with open('../0_data/count_song.pkl', 'rb') as f:
    count_song = pickle.load(f)

len(count_song)

44674

In [8]:
with open('../0_data/count_tag.pkl', 'rb') as f:
    count_tag = pickle.load(f)

len(count_tag)

3400

In [3]:
with open('../0_data/mfl_col.pkl', 'rb') as f:
    mfl_col = pickle.load(f)

len(mfl_col)

24666

In [4]:
results = pd.read_json('./results/result_model_7.json', typ = 'frame', encoding='utf-8')
results.head(3)

Unnamed: 0,id,songs,tags
0,99313,"[69533, 28661, 13057, 549823, 300742, 409667, ...","[감성, 커피, 여유, 카페, Jazz, 주말, 봄, 힐링, 휴식, 재즈]"
1,91258,"[92933, 484278, 12397, 2748, 523071, 166761, 4...","[Pop, 팝송, 일렉, 명곡, 그루브, soul, RNBSOUL, RnB, 소울,..."
2,41460,"[259795, 550374, 310375, 81862, 21125, 534049,...","[사랑, 잔잔한, 스트레스, 가을, 밤, 감성, 명곡, 회상, 기분전환, 추억]"


In [5]:
pl_ids = results['id']
p_songs = results['songs']
p_tags = results['tags']

len(pl_ids), len(p_songs), len(p_songs)

(11456, 11456, 11456)

In [9]:
p_songs_reranked = []
for p_song in p_songs :
    score = []
    for song in p_song :
        score.append(count_song[song])
    p_song_reranked = np.array(p_song)[np.array(score).argsort()].tolist()
    p_songs_reranked.append(p_song_reranked)
len(p_songs_reranked)

11456

In [10]:
p_tags_reranked = []
for p_tag in p_tags :
    score = []
    for tag in p_tag :
        score.append(count_tag[tag])
    p_tag_reranked = np.array(p_tag)[np.array(score).argsort()].tolist()
    p_tags_reranked.append(p_tag_reranked)
len(p_tags_reranked)

11456

In [11]:
result=[]
for i in range(len(pl_ids)) :
    dic={}
    dic['id']=pl_ids[i]
    dic['songs']=p_songs_reranked[i]
    dic['tags']=p_tags_reranked[i]
    result.append(dic)

In [20]:
write_json(result,'./results/result_model_7_rrk.json')

In [21]:
result = load_json('./results/result_model_7_rrk.json')

# 평가

In [18]:
class ArenaEvaluator:
    def _idcg(self, l):
        return sum((1.0 / np.log(i + 2) for i in range(l)))

    def __init__(self):
        self._idcgs = [self._idcg(i) for i in range(101)]

    def _ndcg(self, gt, rec):
        dcg = 0.0
        for i, r in enumerate(rec):
            if r in gt:
                dcg += 1.0 / np.log(i + 2)
        if len(gt)>100:
            gt = gt[:100]
        return dcg / self._idcgs[len(gt)]

    def _eval(self, gt_fname, rec_fname):
        gt_playlists = load_json(gt_fname)
        gt_dict = {g["id"]: g for g in gt_playlists}
        rec_playlists = load_json(rec_fname)
        gt_ids = set([g["id"] for g in gt_playlists])
        rec_ids = set([r["id"] for r in rec_playlists])
        if gt_ids != rec_ids:
            raise Exception("결과의 플레이리스트 수가 올바르지 않습니다.")

        rec_song_counts = [len(p["songs"]) for p in rec_playlists]
        rec_tag_counts = [len(p["tags"]) for p in rec_playlists]
        if set(rec_song_counts) != set([100]):
            raise Exception("추천 곡 결과의 개수가 맞지 않습니다.")

        if set(rec_tag_counts) != set([10]):
            raise Exception("추천 태그 결과의 개수가 맞지 않습니다.")

        rec_unique_song_counts = [len(set(p["songs"])) for p in rec_playlists]
        rec_unique_tag_counts = [len(set(p["tags"])) for p in rec_playlists]

        if set(rec_unique_song_counts) != set([100]):
            raise Exception("한 플레이리스트에 중복된 곡 추천은 허용되지 않습니다.")

        if set(rec_unique_tag_counts) != set([10]):
            raise Exception("한 플레이리스트에 중복된 태그 추천은 허용되지 않습니다.")

        music_ndcg = 0.0
        tag_ndcg = 0.0

        for rec in rec_playlists:
            gt = gt_dict[rec["id"]]
            music_ndcg += self._ndcg(gt["songs"], rec["songs"][:100])
            tag_ndcg += self._ndcg(gt["tags"], rec["tags"][:10])

        music_ndcg = music_ndcg / len(rec_playlists)
        tag_ndcg = tag_ndcg / len(rec_playlists)
        score = music_ndcg * 0.85 + tag_ndcg * 0.15

        return music_ndcg, tag_ndcg, score

    def evaluate_with_save(self, gt_fname, rec_fname, model_file_path, default_file_path):
        # try:
        music_ndcg, tag_ndcg, score = self._eval(gt_fname, rec_fname)
        with open(f'{default_file_path}/results.txt','a') as f:
            f.write(model_file_path)
            f.write(f"\nMusic nDCG: {music_ndcg:.6}\n")
            f.write(f"Tag nDCG: {tag_ndcg:.6}\n")
            f.write(f"Score: {score:.6}\n\n")
            print(f"Music nDCG: {music_ndcg:.6}")
            print(f"Tag nDCG: {tag_ndcg:.6}")
            print(f"Score: {score:.6}")
        # except Exception as e:
        #     print(e)

    def evaluate(self, gt_fname, rec_fname):
        # try:
        music_ndcg, tag_ndcg, score = self._eval(gt_fname, rec_fname)
        print(f"Music nDCG: {music_ndcg:.6}")
        print(f"Tag nDCG: {tag_ndcg:.6}")
        print(f"Score: {score:.6}")

In [22]:
gt_fname = '../0_data/test.json'
rec_fname = 'results/result_model_7_rrk.json'
arena_evaluator = ArenaEvaluator()
arena_evaluator.evaluate(gt_fname, rec_fname)

Music nDCG: 0.163414
Tag nDCG: 0.415366
Score: 0.201207
