- SpectralCF: 強調フィルタリング(CF)，スペクトラルグラフ理論をベースのモデル．user * item間の二部グラフをグラフフーリエ変換で畳み込む(Spectol領域で畳み込む)．

- Numpy + 隣接グラフ

- 実装手順
- Leave-One-Out分割
- 隣接行列の作成
- スペクトラル畳み込み関数の定義
- 畳み込みの初期化と学習
- 推薦スコア計算とTop-N推薦
- 評価

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix, diags
from sklearn.metrics import precision_score, recall_score
from tqdm import tqdm


# データ読み込み
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('u.data', sep='\t', names=cols)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Leave-One-Out分割
df['rank'] = df.groupby('user_id')['timestamp'].rank(method='first', ascending=False)
train_df = df[df['rank'] > 1].copy()
test_df = df[df['rank'] == 1].copy()

# ラベルエンコード
user_enc = LabelEncoder()
item_enc = LabelEncoder()
train_df['user'] = user_enc.fit_transform(train_df['user_id'])
train_df['item'] = item_enc.fit_transform(train_df['item_id'])
test_df = test_df[test_df['user_id'].isin(user_enc.classes_)]
test_df = test_df[test_df['item_id'].isin(item_enc.classes_)]
test_df['user'] = user_enc.transform(test_df['user_id'])
test_df['item'] = item_enc.transform(test_df['item_id'])

num_users = train_df['user'].nunique()
num_items = train_df['item'].nunique()

In [4]:
# 隣接行列の構築

from scipy.sparse import bmat

R = csr_matrix((np.ones(len(train_df)), (train_df['user'], train_df['item'])),
               shape=(num_users, num_items))

A = bmat([
    [csr_matrix((num_users, num_users)), R],
    [R.T, csr_matrix((num_items, num_items))]
], format='csr')

In [5]:
# スペクトラル畳み込み関数の定義

def normalized_laplacian(A):
    d = np.array(A.sum(axis=1)).flatten()
    d_inv_sqrt = 1.0 / np.sqrt(d + 1e-8)
    D_inv_sqrt = diags(d_inv_sqrt)
    return csr_matrix(np.eye(A.shape[0])) - D_inv_sqrt @ A @ D_inv_sqrt

def spectral_conv(L, X, K=3):
    Z = X.copy()
    out = X.copy()
    for k in range(1, K):
        Z = L @ Z
        out += Z
    return out


In [6]:
# 埋め込み初期化と学習

embedding_dim = 64
X_init = np.random.normal(0, 0.1, size=(num_users + num_items, embedding_dim))
L = normalized_laplacian(A)
Z = spectral_conv(L, X_init, K=3)

user_emb = Z[:num_users]
item_emb = Z[num_users:]


In [8]:
# 推薦スコア計算とTop-N推薦

scores = user_emb @ item_emb.T

# 学習済みアイテムを除外
X_train = R
recommendations = {}
top_k = 10

for user in range(num_users):
    user_scores = scores[user].copy()
    seen = X_train[user].indices
    user_scores[seen] = -np.inf
    top_items = np.argsort(user_scores)[-top_k:][::-1]
    recommendations[user] = top_items.tolist()

ground_truth = test_df.set_index('user')['item'].to_dict()

In [9]:
# 評価

from Evaluation_index import recall_at_k, precision_at_k, ndcg_at_k, mrr_at_k, hit_at_k

print("=== SpectralCF モデル評価結果（Top-10）===")
print(f"Recall@10    : {recall_at_k(recommendations, ground_truth, 10):.4f}")
print(f"Precision@10 : {precision_at_k(recommendations, ground_truth, 10):.4f}")
print(f"NDCG@10      : {ndcg_at_k(recommendations, ground_truth, 10):.4f}")
print(f"MRR@10       : {mrr_at_k(recommendations, ground_truth, 10):.4f}")
print(f"Hit@10       : {hit_at_k(recommendations, ground_truth, 10):.4f}")

print("=== RecBole モデル評価結果（Top-10）===")
print(f"Recall@10    : 0.0527")
print(f"Precision@10 : 0.0608")
print(f"NDCG@10      : 0.0768")
print(f"MRR@10       : 0.1575")
print(f"Hit@10       : 0.3531")


=== SpectralCF モデル評価結果（Top-10）===
Recall@10    : 0.0032
Precision@10 : 0.0003
NDCG@10      : 0.0015
MRR@10       : 0.0010
Hit@10       : 0.0032
=== RecBole モデル評価結果（Top-10）===
Recall@10    : 0.0527
Precision@10 : 0.0608
NDCG@10      : 0.0768
MRR@10       : 0.1575
Hit@10       : 0.3531
