# 人気度順推薦
## 人気度の定義
* 今回は評価値が高いものを人気が髙い映画とする
* 人気度の定義は「クリック数が多いもの」「購入が多いもの」「評価値が髙いもの」など複数あり、自社サービスに最も適した定義を利用


In [1]:
# 親のフォルダのパスを追加
import sys; sys.path.insert(0, '..')

from util.data_loader import DataLoader
from util.metric_calculator import MetricCalculator

In [2]:
# Movielensのデータの読み込み
data_loader = DataLoader(num_users=1000, num_test_items=5, data_path='../data/ml-10M100K/')
movielens = data_loader.load()

In [3]:

import numpy as np
# 評価値が髙い映画の確認
movie_stats = movielens.train.groupby(['movie_id', 'title']).agg({'rating': [np.size, np.mean]})
movie_stats.sort_values(by=('rating', 'mean'), ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,rating,rating
Unnamed: 0_level_1,Unnamed: 1_level_1,size,mean
movie_id,title,Unnamed: 2_level_2,Unnamed: 3_level_2
4095,Cry Freedom (1987),1,5.0
7227,"Trouble with Angels, The (1966)",1,5.0
27255,"Wind Will Carry Us, The (Bad ma ra khahad bord) (1999)",1,5.0
4453,Michael Jordan to the Max (2000),2,5.0
3415,"Mirror, The (Zerkalo) (1975)",1,5.0


In [4]:
# しきい値を導入
movie_stats = movielens.train.groupby(['movie_id', 'title']).agg({'rating': [np.size, np.mean]})
atleast_flg = movie_stats['rating']['size'] >= 100
movies_sorted_by_rating = movie_stats[atleast_flg].sort_values(by=('rating', 'mean'), ascending=False)
movies_sorted_by_rating.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,rating,rating
Unnamed: 0_level_1,Unnamed: 1_level_1,size,mean
movie_id,title,Unnamed: 2_level_2,Unnamed: 3_level_2
318,"Shawshank Redemption, The (1994)",424,4.491745
50,"Usual Suspects, The (1995)",334,4.459581
912,Casablanca (1942),163,4.444785
904,Rear Window (1954),129,4.44186
2019,Seven Samurai (Shichinin no samurai) (1954),104,4.408654


In [5]:
# 人気度推薦
from src.popularity import PopularityRecommender
recommender = PopularityRecommender()
recommend_result = recommender.recommend(movielens, minimum_num_rating=100)

In [6]:
#  評価
metric_calculator = MetricCalculator()
metrics = metric_calculator.calc(
    movielens.test.rating.tolist(), recommend_result.rating.tolist(),
    movielens.test_user2items, recommend_result.user2items, k=10)
print(metrics)

rmse=1.082, Precision@K=0.008, Recall@K=0.027


In [7]:
# しきい値を変更したときの挙動
for minimum_num_rating in [1, 200]:
    recommend_result = recommender.recommend(movielens, minimum_num_rating=minimum_num_rating)
    metrics = metric_calculator.calc(
        movielens.test.rating.tolist(), recommend_result.rating.tolist(),
        movielens.test_user2items, recommend_result.user2items, k=10)
    print(metrics)

rmse=1.082, Precision@K=0.000, Recall@K=0.000
rmse=1.082, Precision@K=0.013, Recall@K=0.042


In [9]:
# しきい値を変更したときの挙動
for minimum_num_rating in [300]:
    recommend_result = recommender.recommend(movielens, minimum_num_rating=minimum_num_rating)
    metrics = metric_calculator.calc(
        movielens.test.rating.tolist(), recommend_result.rating.tolist(),
        movielens.test_user2items, recommend_result.user2items, k=10)
    print(metrics)

rmse=1.082, Precision@K=0.017, Recall@K=0.056
