In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
test_df = pd.read_pickle('../data/test.pkl')
test_df = test_df.sort_values(['userId', 'rating'], ascending=False)
test_series = test_df.groupby('userId').movieId.apply(set)

In [3]:
def ndcg_calculator(rec, gt):
    icdg = sum((1.0 / np.log(i + 2) for i in range(20)))
    return sum([1.0 / np.log(idx + 2) for idx, item in enumerate(rec) if item in gt]) / icdg

def evaluater(file_name):
    '''주어진 파일이 userId:[추천 아이템 리스트] 형식이라고 가정'''
    
    submit = pd.read_pickle(f'../data/{file_name}.pkl')
    

    submit = pd.concat([test_series, submit], axis=1)
    submit.columns = ['gt', 'rec']
    
    ndcgs = []
    for idx, row in tqdm(submit.iterrows(), total=len(submit)):
        ndcgs.append(ndcg_calculator(row['gt'], row.rec))
    print(f"mean nDCG: {np.mean(ndcgs):.4f}")

### evaluate ALS

In [4]:
evaluater('submit_als')

100%|██████████| 138493/138493 [00:19<00:00, 7156.97it/s]

mean nDCG: 0.1628





### evaluate BPR

In [5]:
evaluater('submit_bpr')

100%|██████████| 138493/138493 [00:18<00:00, 7326.59it/s]

mean nDCG: 0.1073





### evaluate Hybrid
- ALS + age & genre feature

In [4]:
evaluater('submit_hybrid_rf')

100%|██████████| 138493/138493 [00:18<00:00, 7616.75it/s]

mean nDCG: 0.0215



