In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics.pairwise import pairwise_distances


# データ取得
u_data_org = pd.read_csv(
    'http://files.grouplens.org/datasets/movielens/ml-100k/u.data',
    names=["user_id","item_id","rating","timestamp"],
    sep="\t")
u_data_org.head()

u_data_train = pd.read_csv(
    'http://files.grouplens.org/datasets/movielens/ml-100k/ua.base',
    names=["user_id","item_id","rating","timestamp"],
    sep="\t"
)
u_data_train.head()
u_data_test = pd.read_csv(
    'http://files.grouplens.org/datasets/movielens/ml-100k/ua.test',
    names=["user_id","item_id","rating","timestamp"],
    sep="\t"
)
u_data_test.head()


# データ整備
item_list = u_data_org.sort_values('item_id').item_id.unique()
user_list = u_data_org.user_id.unique()
rating_matrix_item = np.zeros([len(item_list), len(user_list)])

for item_id in tqdm(range(1, len(item_list))):
    user_list_item = u_data_train[u_data_train['item_id'] == item_id].sort_values('user_id').user_id.unique()
    for user_id in user_list_item:
        try:
            user_rate = u_data_train[ (u_data_train['item_id'] == item_id) & (u_data_train['user_id'] == user_id) ].loc[:, 'rating']
        except:
            user_rate = 0
        rating_matrix_item[item_id - 1, user_id - 1] = user_rate

# スコア
rating_matrix_calc = rating_matrix_item.copy()
rating_matrix_calc[rating_matrix_calc != 0] = 1
rating_matrix_train = np.abs(rating_matrix_calc - 1)

# 類似度
similarity_matrix = 1 - pairwise_distances(rating_matrix_item, metric='cosine')
np.fill_diagonal(similarity_matrix, 0)

# 計算
use_id = 100
hits = 0
rating_matrix_user = rating_matrix_item[:, user_id - 1]
pred_rating_user = similarity_matrix * rating_matrix_user
pred_rating_user = pred_rating_user.sum(axis=1)
pred_rating_user_item = pred_rating_user * rating_matrix_train[:, user_id - 1]
recommend_list = np.argsort(pred_rating_user_item)[::-1][:10] + 1
purchase_list_user = u_data_test[u_data_test.user_id == user_id].loc[:, 'item_id'].unique()
for item_id in recommend_list:
    if item_id in purchase_list_user:
        hits +- 1
pre = hits / 10.0

# 評価
# precision_list = []
# recall_list = []
# user_list_test = u_data_test.sort_values('user_id').user_id.unique()
#
# for user_id in tqdm(user_list_test):
#     hits = 0
#     rating_matrix_user = rating_matrix_item[:, user_id - 1]
#     pred_rating_user = similarity_matrix * rating_matrix_user
#     pred_rating_user_item = pred_rating_user * rating_matrix_train[:,user_id - 1]
#     pred_rating_user_item[np.isnan(pred_rating_user_item)] = 0
#     recommend_list = np.argsort(pred_rating_user_item)[::-1][:10] + 1
#     purchase_list_user = u_data_test[u_data_test.user_id == user_id].loc[:, 'item_id'].unique()
#     if len(purchase_list_user) == 0:
#         continue
#     for item_id in recommend_list:
#         if item_id in purchase_list_user:
#             hits += 1
#     pre = hits / 10.0
#     precision_list.append(pre)
#
# precision = sum(precision_list) / len(precision_list)
# print('Precision:', precision)


