- 行列分解と多層パーセプトロンを融合したニューラル推薦モデル

- Leave-One-Out分割のデータフレームを準備する
- userとitemのIDを連番にマッピングする
- torchのデータセット定義
- NeuMFモデル定義
- 学習
- 推薦リスト作成
- 評価指標計算

In [8]:
pip install torch

Collecting torch
  Downloading torch-2.7.1-cp310-none-macosx_11_0_arm64.whl (68.6 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.6/68.6 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Collecting fsspec
  Downloading fsspec-2025.5.1-py3-none-any.whl (199 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sympy>=1.13.3
  Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
Collecting filelock
  Downloading filelock-3.18.0-py3-none-any.whl (16 kB)
Collecting mpmath<1.4,>=1.1.0
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.2/536.2 kB[0m [31m26.0 MB/s[0m eta [36m0:

In [14]:
import pandas as pd

# データ読み込み
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv("u.data", sep="\t", names=cols, engine="python")

# Leave-One-Out（ユーザーごとの最新1件を test に、それ以外を train に）
df['rank'] = df.groupby('user_id')['timestamp'].rank(method='first', ascending=False)
train_df = df[df['rank'] > 1].copy()
test_df = df[df['rank'] == 1].copy()

# test_df に train_df に存在しないユーザー・アイテムがあれば除外
valid_users = set(train_df['user_id'])
valid_items = set(train_df['item_id'])
test_df = test_df[test_df['user_id'].isin(valid_users) & test_df['item_id'].isin(valid_items)].copy()

display(train_df.head())
display(test_df.head())

Unnamed: 0,user_id,item_id,rating,timestamp,rank
0,196,242,3,881250949,37.0
1,186,302,3,891717742,19.0
2,22,377,1,878887116,76.0
3,244,51,2,880606923,61.0
4,166,346,1,886397596,13.0


Unnamed: 0,user_id,item_id,rating,timestamp,rank
52,260,322,4,890618898,1.0
53,25,181,5,885853415,1.0
70,189,512,4,893277702,1.0
109,265,118,4,875320714,1.0
167,155,323,2,879371261,1.0


In [18]:
# IDの連番コードの作成

from sklearn.preprocessing import LabelEncoder

user_enc = LabelEncoder()
item_enc = LabelEncoder()

train_df['user'] = user_enc.fit_transform(train_df['user_id'])
train_df['item'] = item_enc.fit_transform(train_df['item_id'])

test_df['user'] = user_enc.transform(test_df['user_id'])
test_df['item'] = item_enc.transform(test_df['item_id'])

num_users = train_df['user'].nunique()
num_items = train_df['item'].nunique()

In [23]:
# データセット定義

import torch
from torch.utils.data import Dataset, DataLoader

class RecDataset(Dataset):
    def __init__(self, df):
        self.users = df['user'].values
        self.items = df['item'].values
        self.labels = (df['rating'] >= 4).astype(float).values  # Implicit化

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        # 修正後（dtype指定で float32 に変換）
        return (
            torch.tensor(self.users[idx], dtype=torch.long),
            torch.tensor(self.items[idx], dtype=torch.long),
            torch.tensor(self.labels[idx], dtype=torch.float32)
        )

In [20]:
# NeuMFモデル定義

import torch.nn as nn

class NeuMF(nn.Module):
    def __init__(self, num_users, num_items, emb_size=32, mlp_layers=[64, 32, 16]):
        super().__init__()
        # MF 部分
        self.user_mf = nn.Embedding(num_users, emb_size)
        self.item_mf = nn.Embedding(num_items, emb_size)

        # MLP 部分
        self.user_mlp = nn.Embedding(num_users, emb_size)
        self.item_mlp = nn.Embedding(num_items, emb_size)
        layers = []
        input_size = emb_size * 2
        for size in mlp_layers:
            layers.append(nn.Linear(input_size, size))
            layers.append(nn.ReLU())
            input_size = size
        self.mlp = nn.Sequential(*layers)

        # 出力層
        self.output = nn.Linear(emb_size + mlp_layers[-1], 1)

    def forward(self, user, item):
        mf = self.user_mf(user) * self.item_mf(item)
        mlp_input = torch.cat([self.user_mlp(user), self.item_mlp(item)], dim=-1)
        mlp = self.mlp(mlp_input)
        concat = torch.cat([mf, mlp], dim=-1)
        return torch.sigmoid(self.output(concat)).squeeze()


In [24]:
# 学習ループ

train_loader = DataLoader(RecDataset(train_df), batch_size=256, shuffle=True)

model = NeuMF(num_users, num_items)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.BCELoss()

for epoch in range(5):
    model.train()
    total_loss = 0
    for user, item, label in train_loader:
        pred = model(user, item)
        loss = loss_fn(pred, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 259.1673
Epoch 2, Loss: 233.9880
Epoch 3, Loss: 222.8763
Epoch 4, Loss: 216.5177
Epoch 5, Loss: 212.0192


In [25]:
# 推薦リスト作成

model.eval()
recommendations = {}

with torch.no_grad():
    for user in test_df['user'].unique():
        user_tensor = torch.tensor([user] * num_items)
        item_tensor = torch.arange(num_items)
        scores = model(user_tensor, item_tensor)
        top_items = torch.topk(scores, 10).indices.tolist()

        original_user = user_enc.inverse_transform([user])[0]
        original_items = item_enc.inverse_transform(top_items).tolist()
        recommendations[original_user] = original_items


In [28]:
# 評価指標計算

from Evaluation_index import recall_at_k, precision_at_k, ndcg_at_k, mrr_at_k, hit_at_k

ground_truth = test_df.set_index('user_id')['item_id'].to_dict()
k = 10

print("=== NeuMF モデル評価結果（Top-10）===")
print(f"Recall@10    : {recall_at_k(recommendations, ground_truth, k):.4f}")
print(f"Precision@10 : {precision_at_k(recommendations, ground_truth, k):.4f}")
print(f"NDCG@10      : {ndcg_at_k(recommendations, ground_truth, k):.4f}")
print(f"MRR@10       : {mrr_at_k(recommendations, ground_truth, k):.4f}")
print(f"Hit@10       : {hit_at_k(recommendations, ground_truth, k):.4f}")

print("=== RecBole モデル評価結果（Top-10）===")
print(f"Recall@10    : {0.0550:.4f}")
print(f"Precision@10 : {0.0606:.4f}")
print(f"NDCG@10      : {0.0763:.4f}")
print(f"MRR@10       : {0.1520:.4f}")
print(f"Hit@10       : {0.3543:.4f}")

=== NeuMF モデル評価結果（Top-10）===
Recall@10    : 0.0202
Precision@10 : 0.0020
NDCG@10      : 0.0113
MRR@10       : 0.0086
Hit@10       : 0.0202
=== RecBole モデル評価結果（Top-10）===
Recall@10    : 0.0550
Precision@10 : 0.0606
NDCG@10      : 0.0763
MRR@10       : 0.1520
Hit@10       : 0.3543
