- Leave-One-Outで分割
- user * item 行列
- 各userに対する推薦リストの作成
- 正解データの作成
- 評価指標の計算

In [2]:
import pandas as pd

# データ読み込み
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('u.data', sep='\t', names=cols, engine='python')

# Leave-One-Out（最新の1件をテストに）
df['rank'] = df.groupby('user_id')['timestamp'].rank(method='first', ascending=False)
train_df = df[df['rank'] > 1]
test_df = df[df['rank'] == 1]

In [3]:
# 行：ユーザー、列：アイテム、値：評価（未評価は0）
user_item_matrix = train_df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

In [5]:
from sklearn.metrics.pairwise import cosine_similarity

# 類似度行列（ユーザー×ユーザー）
user_similarity = cosine_similarity(user_item_matrix)
user_similarity = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
user_similarity.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.167776,0.048242,0.059537,0.37863,0.427965,0.440714,0.321863,0.080583,0.377502,...,0.366719,0.121047,0.276543,0.178533,0.196115,0.11712,0.315938,0.135202,0.174834,0.394987
2,0.167776,1.0,0.112956,0.182128,0.07336,0.235602,0.104305,0.10475,0.166886,0.16104,...,0.152622,0.293381,0.354014,0.432945,0.316109,0.230823,0.211326,0.16584,0.174596,0.09807
3,0.048242,0.112956,1.0,0.355892,0.021598,0.053896,0.054582,0.085147,0.063972,0.050573,...,0.032422,0.044016,0.167498,0.071288,0.126326,0.02683,0.165495,0.105155,0.112973,0.015454
4,0.059537,0.182128,0.355892,1.0,0.032369,0.069417,0.086047,0.174467,0.106264,0.049004,...,0.053059,0.037912,0.136244,0.199994,0.148666,0.030813,0.201462,0.158088,0.174535,0.043927
5,0.37863,0.07336,0.021598,0.032369,1.0,0.237986,0.373954,0.251147,0.058635,0.201972,...,0.335383,0.081648,0.095515,0.081076,0.148705,0.071825,0.241418,0.142695,0.153843,0.311633


In [6]:
# 各userに対する推薦リストを作成(Top-10)

def recommend_items_userknn(user_id, top_k=10, neighbor_k=20):
    # 類似ユーザーを取得（自分は除外）
    sim_users = user_similarity[user_id].drop(user_id).nlargest(neighbor_k)

    # 類似ユーザーの評価を加重平均
    neighbor_ratings = user_item_matrix.loc[sim_users.index]
    weighted_scores = neighbor_ratings.T.dot(sim_users) / sim_users.sum()

    # すでに評価したアイテムは除外
    seen_items = user_item_matrix.loc[user_id]
    weighted_scores = weighted_scores[seen_items == 0]

    return weighted_scores.sort_values(ascending=False).head(top_k).index.tolist()

# 推薦リスト作成
recommendations = {
    user: recommend_items_userknn(user, top_k=10)
    for user in test_df['user_id']
}


In [7]:
# 正解データの作成
ground_truth = test_df.set_index('user_id')['item_id'].to_dict()

In [8]:
# 評価指標の計算

from Evaluation_index import recall_at_k, precision_at_k, ndcg_at_k, mrr_at_k, hit_at_k

k = 10
print("=== UserKNN モデル評価結果（Top-10）===")
print(f"Recall@10    : {recall_at_k(recommendations, ground_truth, k):.4f}")
print(f"Precision@10 : {precision_at_k(recommendations, ground_truth, k):.4f}")
print(f"NDCG@10      : {ndcg_at_k(recommendations, ground_truth, k):.4f}")
print(f"MRR@10       : {mrr_at_k(recommendations, ground_truth, k):.4f}")
print(f"Hit@10       : {hit_at_k(recommendations, ground_truth, k):.4f}")

# RecBoleによるPopularityモデルの評価結果はない

=== UserKNN モデル評価結果（Top-10）===
Recall@10    : 0.1283
Precision@10 : 0.0128
NDCG@10      : 0.0649
MRR@10       : 0.0456
Hit@10       : 0.1283
