In [1]:
import pickle
import pandas as pd
import os
import io
import json
import distutils.dir_util
import numpy as np

# 평가

> Data Load

In [2]:
# json write & load 함수 정의
def write_json(data, fname):
    def _conv(o):
        if isinstance(o, (np.int64, np.int32)):
            return int(o)
        raise TypeError

    parent = os.path.dirname(fname)
    distutils.dir_util.mkpath(parent)
    with io.open(fname, "w", encoding="utf-8") as f:
        json_str = json.dumps(data, ensure_ascii=False, default=_conv)
        f.write(json_str)
        
def load_json(fname):
    with open(fname, encoding='utf-8') as f:
        json_obj = json.load(f)

    return json_obj

In [3]:
with open('../0_data/mfl_col.pkl', 'rb') as f:
    mfl_col = pickle.load(f)

len(mfl_col)

24666

In [4]:
song_len = 22798
# song = 22798, tag = 1868

In [5]:
q_test = pd.read_json('../0_data/q_test.json')
q_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11456 entries, 0 to 11455
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   tags          11456 non-null  object
 1   id            11456 non-null  int64 
 2   plylst_title  11456 non-null  object
 3   songs         11456 non-null  object
 4   like_cnt      11456 non-null  int64 
 5   updt_date     11456 non-null  object
dtypes: int64(2), object(4)
memory usage: 537.1+ KB


In [6]:
q_test.head()

Unnamed: 0,tags,id,plylst_title,songs,like_cnt,updt_date
0,[],110887,발라드 미디엄 위주의 경쾌 발라드,"[389159, 226331, 597375, 586653, 613020, 50379...",14,2016-07-07 21:20:09.000
1,"[Pop, 힐링, 기분전환, 퇴근길]",113079,지치고 고된 하루끝 퇴근시간에 듣기 좋은 POP,[],339,2020-03-23 20:03:51.000
2,[],80316,달빛 비추는 밤 나의 감성을 자극할 노래,"[413189, 47106, 317362, 63533, 422807, 342803,...",567,2019-12-02 18:15:25.000
3,[슬픔],32338,추운 겨울 아무 이유없이 땡기는 노래,[],22,2010-11-22 22:33:54.000
4,"[휴일, 취향저격DJ]",91698,홈캉스 필수템 우아한 트렌디 POP,[],67,2018-08-08 12:23:09.000


In [7]:
with open('../0_data/q_test_onehot.pkl', 'rb') as f:
    q_test_onehot = pickle.load(f)

q_test_onehot.shape

(11456, 24666)

> predict

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam

In [9]:
model_7 = tf.keras.models.load_model('results/model_7.h5')

In [10]:
predict_plist=model_7.predict(q_test_onehot)



In [65]:
df_id = list(q_test['id']) # q_test ply_id 값 list 형태로 반환
col= mfl_col # feature로 쓴 컬럼 값

ori_song = col[:song_len] # mfl_col에서 song 값만 자르기
ori_tag = col[song_len:] # mfl_col에서 tag 값만 자르기

song_predict = predict_plist[:,:song_len] # song output(추천곡)
tag_predict = predict_plist[:,song_len:] # tag output(추천태그)

In [117]:
result=[]
n=0
for i in df_id: # q_test ply_id 값
    dic={}
    dic['id']=i # dic{'id':각 플리 id}

    plist_song = song_predict[n].argsort()[-200:] # predict한 song output 중 상위 200개
    p_song=[]
    for song in plist_song[::-1]:
        p_song.append(ori_song[song])
    dic['songs']=p_song

    plist_tag=tag_predict[n].argsort()[-100:] # predict한 tag output 중 상위 100개
    p_tag=[]
    for tag in plist_tag[::-1]:
        p_tag.append(ori_tag[tag])
    dic['tags']=p_tag
    n+=1
    result.append(dic)

In [112]:
# tag_predict[0].argsort()[-10:] # argsort 는 뒤에서 부터 잘라야 함

array([ 88,   1,   5,  59,  52,  21,  94,  45, 119,  69], dtype=int64)

In [116]:
# plist_tag = tag_predict[0].argsort()[-10:]
# p_tag=[]
# for tag in plist_tag[::-1]:
#     print(tag)
#     p_tag.append(ori_tag[tag])
# p_tag

69
119
45
94
21
52
59
5
1
88


['기분전환', '발라드', '매장음악', '밤', '잔잔한', '아이돌', '드라이브', '힐링', '휴식', '가을']

In [119]:
# json 형태로 변환
questions = q_test[['id', 'tags', 'songs']].to_dict(orient='records')

In [92]:
def remove_seen(seen, l):
    seen = set(seen)
    return [x for x in l if not (x in seen)]

In [120]:
answers = []
for i in range(len(result)):
    answers.append({
        "id": result[i]['id'],
        "songs": remove_seen(questions[i]["songs"], result[i]['songs'])[:100], 
        "tags": remove_seen(questions[i]["tags"], result[i]['tags'])[:10],
    })

In [121]:
answers[0]['tags']

['기분전환', '발라드', '매장음악', '밤', '잔잔한', '아이돌', '드라이브', '힐링', '휴식', '가을']

In [122]:
write_json(answers,'results/result_model_7_rs.json')

---

# 평가

In [123]:
class ArenaEvaluator:
    def _idcg(self, l):
        return sum((1.0 / np.log(i + 2) for i in range(l)))

    def __init__(self):
        self._idcgs = [self._idcg(i) for i in range(101)]

    def _ndcg(self, gt, rec):
        dcg = 0.0
        for i, r in enumerate(rec):
            if r in gt:
                dcg += 1.0 / np.log(i + 2)
        if len(gt)>100:
            gt = gt[:100]
        return dcg / self._idcgs[len(gt)]

    def _eval(self, gt_fname, rec_fname):
        gt_playlists = load_json(gt_fname)
        gt_dict = {g["id"]: g for g in gt_playlists}
        rec_playlists = load_json(rec_fname)
        gt_ids = set([g["id"] for g in gt_playlists])
        rec_ids = set([r["id"] for r in rec_playlists])
        if gt_ids != rec_ids:
            raise Exception("결과의 플레이리스트 수가 올바르지 않습니다.")

        rec_song_counts = [len(p["songs"]) for p in rec_playlists]
        rec_tag_counts = [len(p["tags"]) for p in rec_playlists]
        if set(rec_song_counts) != set([100]):
            raise Exception("추천 곡 결과의 개수가 맞지 않습니다.")

        if set(rec_tag_counts) != set([10]):
            raise Exception("추천 태그 결과의 개수가 맞지 않습니다.")

        rec_unique_song_counts = [len(set(p["songs"])) for p in rec_playlists]
        rec_unique_tag_counts = [len(set(p["tags"])) for p in rec_playlists]

        if set(rec_unique_song_counts) != set([100]):
            raise Exception("한 플레이리스트에 중복된 곡 추천은 허용되지 않습니다.")

        if set(rec_unique_tag_counts) != set([10]):
            raise Exception("한 플레이리스트에 중복된 태그 추천은 허용되지 않습니다.")

        music_ndcg = 0.0
        tag_ndcg = 0.0

        for rec in rec_playlists:
            gt = gt_dict[rec["id"]]
            music_ndcg += self._ndcg(gt["songs"], rec["songs"][:100])
            tag_ndcg += self._ndcg(gt["tags"], rec["tags"][:10])

        music_ndcg = music_ndcg / len(rec_playlists)
        tag_ndcg = tag_ndcg / len(rec_playlists)
        score = music_ndcg * 0.85 + tag_ndcg * 0.15

        return music_ndcg, tag_ndcg, score

    def evaluate_with_save(self, gt_fname, rec_fname, model_file_path, default_file_path):
        # try:
        music_ndcg, tag_ndcg, score = self._eval(gt_fname, rec_fname)
        with open(f'{default_file_path}/results.txt','a') as f:
            f.write(model_file_path)
            f.write(f"\nMusic nDCG: {music_ndcg:.6}\n")
            f.write(f"Tag nDCG: {tag_ndcg:.6}\n")
            f.write(f"Score: {score:.6}\n\n")
            print(f"Music nDCG: {music_ndcg:.6}")
            print(f"Tag nDCG: {tag_ndcg:.6}")
            print(f"Score: {score:.6}")
        # except Exception as e:
        #     print(e)

    def evaluate(self, gt_fname, rec_fname):
        # try:
        music_ndcg, tag_ndcg, score = self._eval(gt_fname, rec_fname)
        print(f"Music nDCG: {music_ndcg:.6}")
        print(f"Tag nDCG: {tag_ndcg:.6}")
        print(f"Score: {score:.6}")

In [124]:
gt_fname = '../0_data/a_test.json'
rec_fname = 'results/result_model_7_rs.json'
arena_evaluator = ArenaEvaluator()
arena_evaluator.evaluate(gt_fname, rec_fname)

Music nDCG: 0.107596
Tag nDCG: 0.379045
Score: 0.148313


> rerank 후 평가

In [125]:
with open('../0_data/count_song.pkl', 'rb') as f:
    count_song = pickle.load(f)

len(count_song)

44674

In [126]:
with open('../0_data/count_tag.pkl', 'rb') as f:
    count_tag = pickle.load(f)

len(count_tag)

3400

In [135]:
results = pd.read_json('./results/result_model_7_rs.json', typ = 'frame', encoding='utf-8')
results.head()

Unnamed: 0,id,songs,tags
0,110887,"[144663, 675115, 8719, 116573, 64052, 125822, ...","[기분전환, 발라드, 매장음악, 밤, 잔잔한, 아이돌, 드라이브, 힐링, 휴식, 가을]"
1,113079,"[146989, 76888, 360825, 459256, 493762, 115808...","[휴식, 감성, 팝송, 신나는, 명곡, 잔잔한, 추억, 여행, 매장음악, 밤]"
2,80316,"[418935, 8719, 531820, 116573, 473514, 449244,...","[발라드, 밤, 기분전환, 여름, 새벽, 드라이브, 가을, 잔잔한, 댄스, 겨울]"
3,32338,"[253755, 366786, 497066, 620800, 116573, 14086...","[이별, 비오는날, 발라드, 사랑, 감성, 인디, 밤, 댄스, 드라이브, 추억]"
4,91698,"[38832, 620800, 205179, 642526, 144663, 634998...","[기분전환, 발라드, 팝, 댄스, 매장음악, 락, 비오는날, 일렉, 인디, 드라이브]"


In [147]:
pl_ids = results['id']
p_songs = results['songs']
p_tags = results['tags']

len(pl_ids), len(p_songs), len(p_songs)

(11456, 11456, 11456)

In [138]:
p_tags[0]

['기분전환', '발라드', '매장음악', '밤', '잔잔한', '아이돌', '드라이브', '힐링', '휴식', '가을']

In [148]:
p_songs_reranked = []
for p_song in p_songs :
    score = []
    for song in p_song :
        score.append(count_song[song])
    p_song_reranked = np.array(p_song)[np.array(score).argsort()].tolist()
    p_songs_reranked.append(p_song_reranked[::-1])
len(p_songs_reranked)

11456

In [149]:
p_tags_reranked = []
for p_tag in p_tags :
    score = []
    for tag in p_tag :
        score.append(count_tag[tag])
    p_tag_reranked = np.array(p_tag)[np.array(score).argsort()].tolist()
    p_tags_reranked.append(p_tag_reranked[::-1])
len(p_tags_reranked)

11456

In [154]:
p_tags_reranked = []
for p_tag in p_tags :
    score = []
    for tag in p_tag :
        score.append(count_tag[tag])
    p_tag_reranked = np.array(p_tag)[np.array(score).argsort()].tolist()
    p_tags_reranked.append(p_tag_reranked[::-1])
    break
len(p_tags_reranked)

1

In [155]:
p_tags_reranked

[['기분전환', '드라이브', '휴식', '잔잔한', '힐링', '발라드', '밤', '매장음악', '가을', '아이돌']]

In [150]:
result=[]
for i in range(len(pl_ids)) :
    dic={}
    dic['id']=pl_ids[i]
    dic['songs']=p_songs_reranked[i]
    dic['tags']=p_tags_reranked[i]
    result.append(dic)

In [151]:
write_json(result,'./results/result_model_7_rs_rrk.json')

In [152]:
result = load_json('./results/result_model_7_rs_rrk.json')

In [156]:
gt_fname = '../0_data/a_test.json'
rec_fname = 'results/result_model_7_rs_rrk.json'
arena_evaluator = ArenaEvaluator()
arena_evaluator.evaluate(gt_fname, rec_fname)

Music nDCG: 0.0806131
Tag nDCG: 0.282053
Score: 0.110829


- rerank 전
    - ['기분전환', '발라드', '매장음악', '밤', '잔잔한', '아이돌', '드라이브', '힐링', '휴식', '가을']

- rerank 후
    - ['기분전환', '드라이브', '휴식', '잔잔한', '힐링', '발라드', '밤', '매장음악', '가을', '아이돌']

- 결론: 확률로 뽑은 결과값이 더 정확함

In [None]:
# 5위코드 참고
        # if mode == 0:
        #     if epoch % check_every == 0:
        #         if os.path.exists(tmp_result_file_path):
        #             os.remove(tmp_result_file_path)
        #         elements = []
        #         for idx, (_id, _data) in enumerate(tqdm(q_data_loader, desc='testing...')):
        #             with torch.no_grad():
        #                 _data = _data.to(device)
        #                 output = model(_data)

        #                 songs_input, tags_input = torch.split(_data, num_songs, dim=1)
        #                 songs_output, tags_output = torch.split(output, num_songs, dim=1)

        #                 songs_ids = binary_songs2ids(songs_input, songs_output, id2prep_song_dict)
        #                 tag_ids = binary_tags2ids(tags_input, tags_output, id2tag_dict)

        #                 _id = list(map(int, _id))
        #                 for i in range(len(_id)):
        #                     element = {'id': _id[i], 'songs': list(songs_ids[i]), 'tags': tag_ids[i]}
        #                     elements.append(element)

        #         write_json(elements, tmp_result_file_path)
        #         evaluator.evaluate(answer_file_path, tmp_result_file_path)
        #         os.remove(tmp_result_file_path)

In [None]:
# binary_songs2ids()
# songs_idxes = output.argsort(axis=1)[:, ::-1][:, :100]

In [None]:
# 5위는 오토인코더 그냥 버전, 장르 있는 버전으로 두개의 임베딩? 결과? 가 나왔을때 question 이랑 비교해서 유사도가 높은 걸로 줌
# 여러개의 상황 (song만 있는 경우 등)을 나눠서 함
# 결국 그냥 오토인코더 만의 성능을 알고 싶으면 train.py 만 돌려서 평가하면 됌