- NGCFは．user * itemのグラフ構造を活用する
- 1. user * itemIDの連番化(エンコード)
  2. train dataからuser-itemペアのエッジを構築する
  3. 評価指標関数

In [7]:
# pytorch baseで実装する

import pandas as pd
from sklearn.preprocessing import LabelEncoder

# データ読み込みと分割
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('u.data', sep='\t', names=cols)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Leave-One-Out 分割
df['rank'] = df.groupby('user_id')['timestamp'].rank(method='first', ascending=False)
train_df = df[df['rank'] > 1].copy()
test_df = df[df['rank'] == 1].copy()

# ラベルエンコーディング (train のみで fit)
user_enc = LabelEncoder()
item_enc = LabelEncoder()
train_df['user'] = user_enc.fit_transform(train_df['user_id'])
train_df['item'] = item_enc.fit_transform(train_df['item_id'])

# test に存在する user_id/item_id が train に存在するものだけにフィルター
test_df = test_df[test_df['user_id'].isin(user_enc.classes_)]
test_df = test_df[test_df['item_id'].isin(item_enc.classes_)].copy()

# test に transform 適用（未出現値は既に除外されている）
test_df['user'] = user_enc.transform(test_df['user_id'])
test_df['item'] = item_enc.transform(test_df['item_id'])

# ユーザー・アイテム数
num_users = train_df['user'].nunique()
num_items = train_df['item'].nunique()


In [8]:
print("train_df shape: ", train_df.shape)
print(train_df.isnull().sum())
train_df.head()

train_df shape:  (99057, 7)
user_id      0
item_id      0
rating       0
timestamp    0
rank         0
user         0
item         0
dtype: int64


Unnamed: 0,user_id,item_id,rating,timestamp,rank,user,item
0,196,242,3,1970-01-01 00:00:00.881250949,37.0,195,241
1,186,302,3,1970-01-01 00:00:00.891717742,19.0,185,301
2,22,377,1,1970-01-01 00:00:00.878887116,76.0,21,376
3,244,51,2,1970-01-01 00:00:00.880606923,61.0,243,50
4,166,346,1,1970-01-01 00:00:00.886397596,13.0,165,345


In [9]:
print("test_df shape: ", test_df.shape)
print(test_df.isnull().sum())
test_df.head()

test_df shape:  (941, 7)
user_id      0
item_id      0
rating       0
timestamp    0
rank         0
user         0
item         0
dtype: int64


Unnamed: 0,user_id,item_id,rating,timestamp,rank,user,item
52,260,322,4,1970-01-01 00:00:00.890618898,1.0,259,321
53,25,181,5,1970-01-01 00:00:00.885853415,1.0,24,180
70,189,512,4,1970-01-01 00:00:00.893277702,1.0,188,511
109,265,118,4,1970-01-01 00:00:00.875320714,1.0,264,117
167,155,323,2,1970-01-01 00:00:00.879371261,1.0,154,322


In [10]:
# グラフデータ構築

import torch
from scipy.sparse import coo_matrix

edges = torch.tensor(train_df[['user', 'item']].values).T
values = torch.ones(edges.size(1))

adj = coo_matrix((values, (edges[0], edges[1] + num_users)), shape=(num_users + num_items, num_users + num_items))
adj = adj + adj.T  # 対称行列にする


In [11]:
type(adj)

scipy.sparse._csr.csr_matrix

In [13]:
adj

<Compressed Sparse Row sparse matrix of dtype 'float32'
	with 198114 stored elements and shape (2623, 2623)>

In [14]:
# NDCGモデル定義

import torch.nn as nn
import torch.nn.functional as F

class NGCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=64):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.W1 = nn.Linear(embedding_dim, embedding_dim)
        self.W2 = nn.Linear(embedding_dim, embedding_dim)

    def forward(self, adj_matrix):
        x = torch.cat([self.user_embedding.weight, self.item_embedding.weight], dim=0)
        out = torch.sparse.mm(adj_matrix, x)
        out = F.relu(self.W1(out))
        out = self.W2(out)
        return out


In [15]:
# 推薦と評価

# 埋め込みを取得
model = NGCF(num_users, num_items)
adj_tensor = torch.tensor(adj.todense(), dtype=torch.float32)
embeddings = model(adj_tensor)

user_emb = embeddings[:num_users]
item_emb = embeddings[num_users:]

# ユーザーごとにTop-10推薦
scores = torch.matmul(user_emb, item_emb.T)
top_k = torch.topk(scores, k=10, dim=1).indices

# 推薦辞書作成
recommendations = {user: top_k[user].tolist() for user in test_df['user'].unique()}
ground_truth = test_df.set_index('user')['item'].to_dict()


In [16]:
from Evaluation_index import recall_at_k, precision_at_k, ndcg_at_k, mrr_at_k, hit_at_k

k = 10
print("=== NGCF モデル評価結果（Top-10）===")
print(f"Recall@10    : {recall_at_k(recommendations, ground_truth, k):.4f}")
print(f"Precision@10 : {precision_at_k(recommendations, ground_truth, k):.4f}")
print(f"NDCG@10      : {ndcg_at_k(recommendations, ground_truth, k):.4f}")
print(f"MRR@10       : {mrr_at_k(recommendations, ground_truth, k):.4f}")
print(f"Hit@10       : {hit_at_k(recommendations, ground_truth, k):.4f}")

print("=== RecBole モデル評価結果（Top-10）===")
print(f"Recall@10    : {0.0581:.4f}")
print(f"Precision@10 : {0.0647:.4f}")
print(f"NDCG@10      : {0.0813:.4f}")
print(f"MRR@10       : {0.1616:.4f}")
print(f"Hit@10       : {0.3745:.4f}")


=== NGCF モデル評価結果（Top-10）===
Recall@10    : 0.0308
Precision@10 : 0.0031
NDCG@10      : 0.0157
MRR@10       : 0.0113
Hit@10       : 0.0308
=== RecBole モデル評価結果（Top-10）===
Recall@10    : 0.0581
Precision@10 : 0.0647
NDCG@10      : 0.0813
MRR@10       : 0.1616
Hit@10       : 0.3745
