In [1]:
import os
import io
import json
import distutils.dir_util
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from itertools import chain 


In [2]:
# json write & load 함수 정의
def write_json(data, fname):
    def _conv(o):
        if isinstance(o, (np.int64, np.int32)):
            return int(o)
        raise TypeError

    parent = os.path.dirname(fname)
    distutils.dir_util.mkpath(parent)
    with io.open(fname, "w", encoding="utf-8") as f:
        json_str = json.dumps(data, ensure_ascii=False, default=_conv)
        f.write(json_str)
        
def load_json(fname):
    with open(fname, encoding='utf-8') as f:
        json_obj = json.load(f)

    return json_obj


# 추천 결과 생성
def remove_seen(seen, l):
    seen = set(seen)
    return [x for x in l if not (x in seen)]

In [3]:
mfl_col_path = './data/mfl_col.pkl'
data_path = './data/cnt3_playlists.json'
model_path = './data/model_ad_e30.h5'

with open(mfl_col_path, 'rb') as f: # 사용 feature
    features = pickle.load(f)

data = pd.read_json(data_path) # 전체 플리 데이터

autoencoder = tf.keras.models.load_model(model_path)

In [4]:
select_ply_lst = ['37349']
similarity = 10
song_num = 10

In [5]:
# 1. ply_id 넣으면 해당 행을 원핫 벡터로 변경
def ply_to_onehot(select_ply_lst):
    # ply_id int형 변환
    select_ply_lst = list(map(int, select_ply_lst))
    # zero_mt 생성
    zero_matrix = np.zeros((1,len(features)))
    input_onehot = pd.DataFrame(zero_matrix,columns=features)

    # input_ply
    input_song = []
    input_tag = []
    for ply_id in select_ply_lst:
        input_song.append(data[data['id']==ply_id]['songs'].tolist()[0])
        input_tag.append(data[data['id']==ply_id]['tags'].tolist()[0])

    input_song = list(chain.from_iterable(input_song))
    input_tag = list(chain.from_iterable(input_tag))
    input_ply = input_song + input_tag

    # one-hot encoding
    for ft in input_ply :
        if ft in features:
            input_onehot.iloc[0,features.index(ft)]=1

    return input_song, input_tag, input_onehot

In [6]:
input_song, input_tag, input_onehot = ply_to_onehot(select_ply_lst)

In [12]:
 predict_plist = autoencoder.predict(input_onehot)



In [14]:
song_len = 22798
song_num = 10
tag_num = 5
ori_song = features[:song_len]
ori_tag = features[song_len:]
song_predict = predict_plist[:,:song_len] # song output(추천곡)
tag_predict = predict_plist[:,song_len:] # tag output(추천태그)

p_song = np.array(ori_song)[song_predict[0].argsort()[::-1][:(song_num*10)]]
p_tag = np.array(ori_tag)[tag_predict[0].argsort()[::-1][:(tag_num*10)]]

In [49]:
# features[:88178]
features[88146:]

['팝',
 '위로',
 '추억',
 '밤',
 '새벽',
 '회상',
 '달달한노래',
 '봄',
 '사랑노래',
 '시원한음악',
 '여름노래',
 '걸그룹댄스',
 '여름',
 '드라이브',
 '일렉',
 '재즈',
 '불면증',
 '잠들기전',
 '감성음악',
 '카페',
 '잔잔한',
 '가을',
 '힙합',
 '힙',
 '힐링',
 '산책',
 '휴식',
 '여행',
 '감성',
 '스트레스해소',
 '기분전환',
 '눈물',
 '사랑',
 '뉴에이지',
 '쓸쓸',
 'SM',
 '우울',
 '인디',
 '공감',
 '인디음악',
 '채널A',
 '별',
 '우주',
 '편안한',
 '히든트랙',
 '분위기',
 '매력',
 '설렘',
 '혼자',
 '고독',
 '혼자있고싶을때',
 'M에센셜',
 'indie',
 '랩',
 '섹시한',
 '인트로',
 '명곡',
 '메탈',
 '드럼',
 '사운드',
 '락',
 '리드미컬',
 '신나는',
 'soul',
 '알앤비',
 '발라드',
 '숨은명곡',
 '연말',
 '추천곡',
 '신곡',
 '띵곡',
 '연말결산',
 '2019',
 '대세',
 '노래방애창곡',
 '신나는음악',
 '회식',
 '갬성',
 '비오는날',
 '까페',
 '목욕',
 '샤워',
 '매장',
 '스타일리쉬',
 '소울',
 '운동',
 '데이브레이크',
 'DAY6',
 '밴드음악',
 '잔나비',
 '에센셜',
 'Pop',
 'Christmas',
 '경쾌한',
 '크리스마스',
 '스탠더드',
 '캐롤',
 '성탄절',
 'Carol',
 '이별',
 '슬픔',
 'CCM',
 'house',
 'elec',
 '한국영화',
 '명장면',
 'OST',
 '영화OST',
 '영화',
 '스트레스',
 '가창력',
 '모음',
 '그루브',
 '리듬',
 '센치',
 'sweetmind',
 '집',
 '방콕',
 '내방',
 '2000',
 '댄스',
 '감성곡',
 '겨울',
 '날씨',
 '침대',
 '하

In [16]:
np.array(ori_tag)[tag_predict[0].argsort()[::-1]]

array(['겨울', '사랑', '설렘', ..., '봄', '가을', '1864'], dtype='<U16')

In [17]:
ori_tag

[612081,
 611194,
 87811,
 188938,
 342160,
 271135,
 187679,
 240935,
 215720,
 321079,
 577749,
 432597,
 584454,
 529453,
 265280,
 362064,
 498524,
 448608,
 622433,
 642916,
 669805,
 633722,
 438662,
 302485,
 327581,
 87972,
 124118,
 293091,
 428950,
 54040,
 651693,
 207804,
 425788,
 679614,
 157249,
 697803,
 9701,
 66415,
 346049,
 5635,
 537228,
 194386,
 651550,
 244254,
 460069,
 620007,
 105260,
 357551,
 166959,
 549296,
 310714,
 372159,
 84736,
 84686,
 615826,
 482643,
 237718,
 592540,
 527265,
 542756,
 699879,
 384490,
 385004,
 548081,
 208447,
 275469,
 633614,
 386462,
 142758,
 620240,
 399962,
 254706,
 117759,
 701272,
 678948,
 329448,
 292203,
 548207,
 322947,
 648522,
 42547,
 668983,
 96186,
 268510,
 417567,
 183424,
 621451,
 79506,
 610707,
 701466,
 253612,
 115118,
 239922,
 146228,
 90802,
 354430,
 112826,
 320701,
 464317,
 529216,
 509010,
 375902,
 579307,
 54641,
 303472,
 495493,
 129290,
 58895,
 682558,
 25164,
 230100,
 693086,
 525790,


In [15]:
p_tag

array(['겨울', '사랑', '설렘', '254491', '410185', '447675', '265904', '266263',
       '310414', '169319', '377060', '353858', '656851', '621118',
       '225077', '173941', '146122', '226670', '370785', '397351',
       '545776', '229156', '706268', '17785', '크리스마스', '52934', '653711',
       '68013', '152002', '161874', '392503', '226993', '87206', '150118',
       '295966', '72705', '540735', '675075', '225143', '647164',
       '163890', '205602', '553467', '20392', '415376', '509301',
       '345721', '192516', '548106', '478437'], dtype='<U16')

In [50]:
def recommendation(input_song, input_tag, input_onehot, song_num, tag_num=5, song_len=88146):
    # predict
    predict_plist = autoencoder.predict(input_onehot)

    # result
    ori_song = features[:song_len]
    ori_tag = features[song_len:]
    song_predict = predict_plist[:,:song_len] # song output(추천곡)
    tag_predict = predict_plist[:,song_len:] # tag output(추천태그)

    p_song = np.array(ori_song)[song_predict[0].argsort()[::-1][:(song_num*10)]]
    p_tag = np.array(ori_tag)[tag_predict[0].argsort()[::-1][:(tag_num*10)]]

    rec_song = remove_seen(input_song, p_song)[:song_num] 
    rec_tag = remove_seen(input_tag, p_tag)[:tag_num]
    
    return rec_song, rec_tag

In [54]:
rec_song, rec_tag = recommendation(input_song, input_tag, input_onehot, song_num, tag_num=5, song_len=88146)



In [55]:
rec_song

[5049, 567990, 622775, 468494, 12851, 10445, 304532, 638130, 600574, 661264]

In [56]:
rec_tag

['크리스마스', '기분전환', '첫눈', '신나는', '겨울노래']

In [11]:
p_tag

NameError: name 'p_tag' is not defined