- 実装方法(numpy + pytorch)
- ①BPR loss(精霊と負例のスコア差を最大化)
- ②評価時はuserごとのスコアを計算してTop-10を推薦

In [6]:
import numpy as np, pandas as pd
from sklearn.preprocessing import LabelEncoder

# データ読み込み
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('u.data', sep='\t', names=cols)

# Implicit化
df['rating'] = (df['rating'] >= 4).astype(int)

# Leave-One-Out
df['rank'] = df.groupby('user_id')['timestamp'].rank(method='first', ascending=False)
train_df = df[df['rank'] > 1]
test_df = df[df['rank'] == 1]

# ユーザー・アイテム ID を連番に変換
user_enc = LabelEncoder()
item_enc = LabelEncoder()

# 明示的にコピーして安全に代入
train_df = train_df.copy()
train_df.loc[:, 'user'] = user_enc.fit_transform(train_df['user_id'])
train_df.loc[:, 'item'] = item_enc.fit_transform(train_df['item_id'])

# test_df にも同様に処理（存在チェック後）
test_df = test_df[test_df['user_id'].isin(user_enc.classes_) & test_df['item_id'].isin(item_enc.classes_)]
test_df = test_df.copy()
test_df.loc[:, 'user'] = user_enc.transform(test_df['user_id'])
test_df.loc[:, 'item'] = item_enc.transform(test_df['item_id'])

num_users = train_df['user'].nunique()
num_items = train_df['item'].nunique()

In [7]:
train_df.head()

Unnamed: 0,user_id,item_id,rating,timestamp,rank,user,item
0,196,242,0,881250949,37.0,195,241
1,186,302,0,891717742,19.0,185,301
2,22,377,0,878887116,76.0,21,376
3,244,51,0,880606923,61.0,243,50
4,166,346,0,886397596,13.0,165,345


In [8]:
test_df.head()

Unnamed: 0,user_id,item_id,rating,timestamp,rank,user,item
52,260,322,1,890618898,1.0,259,321
53,25,181,1,885853415,1.0,24,180
70,189,512,1,893277702,1.0,188,511
109,265,118,1,875320714,1.0,264,117
167,155,323,0,879371261,1.0,154,322


In [11]:
# LightGCNモデルの定義

import torch
import torch.nn as nn

class LightGCN(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=64, num_layers=3):
        super().__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers

        # 初期エンベディング
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

        # 初期化
        nn.init.xavier_uniform_(self.user_embedding.weight)
        nn.init.xavier_uniform_(self.item_embedding.weight)

        # 隣接行列を後で定義
        self.edge_index = None

    def propagate(self, edge_index):
        user_emb = self.user_embedding.weight
        item_emb = self.item_embedding.weight

        # 結合エンベディング
        all_embeddings = torch.cat([user_emb, item_emb], dim=0)

        # 隣接情報
        num_nodes = self.num_users + self.num_items
        adj = torch.zeros((num_nodes, num_nodes))
        for u, i in edge_index:
            adj[u, self.num_users + i] = 1
            adj[self.num_users + i, u] = 1
        deg = adj.sum(1)
        deg_inv_sqrt = torch.pow(deg, -0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        DAD = deg_inv_sqrt.unsqueeze(1) * adj * deg_inv_sqrt.unsqueeze(0)

        # K層伝播（LightGCNは線形のみ）
        embs = [all_embeddings]
        x = all_embeddings
        for _ in range(self.num_layers):
            x = torch.matmul(DAD, x)
            embs.append(x)

        # 平均を取る
        final_emb = torch.stack(embs, dim=0).mean(0)
        return final_emb[:self.num_users], final_emb[self.num_users:]


In [13]:
# エッジインデックスを用意
edge_index = train_df[['user', 'item']].values.tolist()

# モデル初期化・伝播
model = LightGCN(num_users, num_items)
model.eval()
user_emb, item_emb = model.propagate(edge_index)

# 評価指標用辞書を構築
recommendations = {}
with torch.no_grad():
    scores = torch.matmul(user_emb, item_emb.T)  # ユーザー×アイテムのスコア
    for u in test_df['user'].unique():
        top_items = torch.topk(scores[u], 10).indices.tolist()
        original_user = user_enc.inverse_transform([u])[0]
        original_items = item_enc.inverse_transform(top_items).tolist()
        recommendations[original_user] = original_items

# ground_truth の作成
ground_truth = test_df.set_index('user_id')['item_id'].to_dict()

# 評価指標（別ファイルでもOK）
from Evaluation_index import recall_at_k, precision_at_k, ndcg_at_k, mrr_at_k, hit_at_k

k = 10
print("=== LightGCN モデル評価結果（Top-10）===")
print(f"Recall@10    : {recall_at_k(recommendations, ground_truth, k):.4f}")
print(f"Precision@10 : {precision_at_k(recommendations, ground_truth, k):.4f}")
print(f"NDCG@10      : {ndcg_at_k(recommendations, ground_truth, k):.4f}")
print(f"MRR@10       : {mrr_at_k(recommendations, ground_truth, k):.4f}")
print(f"Hit@10       : {hit_at_k(recommendations, ground_truth, k):.4f}")

print("=== RecBole モデル評価結果（Top-10）===")
print(f"Recall@10    : {0.0578:.4f}")
print(f"Precision@10 : {0.0644:.4f}")
print(f"NDCG@10      : {0.0810:.4f}")
print(f"MRR@10       : {0.1594:.4f}")
print(f"Hit@10       : {0.3671:.4f}")


=== LightGCN モデル評価結果（Top-10）===
Recall@10    : 0.0043
Precision@10 : 0.0004
NDCG@10      : 0.0015
MRR@10       : 0.0007
Hit@10       : 0.0043
=== RecBole モデル評価結果（Top-10）===
Recall@10    : 0.0578
Precision@10 : 0.0644
NDCG@10      : 0.0810
MRR@10       : 0.1594
Hit@10       : 0.3671
