- SLIM(Sparse Linear Methods for Top-N Recommendation)
- 類似行列を学習する線形リコメンダー

- 実装手順
- Leave-One-Out分割
- 学習用のスパース行列の作成
- SLIM-ElasticNetモデルの学習
- 推薦の生成
- 評価用ground truthの準備
- 評価指標の計算


In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import ElasticNet
from scipy.sparse import csr_matrix
from tqdm import tqdm

import warnings
from scipy.sparse import SparseEfficiencyWarning

# 警告を無視
warnings.simplefilter("ignore", SparseEfficiencyWarning)

# 1. データ読み込みと前処理
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('u.data', sep='\t', names=cols)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Leave-One-Out分割
df['rank'] = df.groupby('user_id')['timestamp'].rank(method='first', ascending=False)
train_df = df[df['rank'] > 1].copy()
test_df = df[df['rank'] == 1].copy()

# ラベルエンコード
user_enc = LabelEncoder()
item_enc = LabelEncoder()
train_df['user'] = user_enc.fit_transform(train_df['user_id'])
train_df['item'] = item_enc.fit_transform(train_df['item_id'])

test_df = test_df[test_df['user_id'].isin(user_enc.classes_)]
test_df = test_df[test_df['item_id'].isin(item_enc.classes_)]
test_df['user'] = user_enc.transform(test_df['user_id'])
test_df['item'] = item_enc.transform(test_df['item_id'])

num_users = train_df['user'].nunique()
num_items = train_df['item'].nunique()

In [12]:
# 学習用のスパース行列の作成

# 2. 学習データをスパース行列に
train_df['interaction'] = 1
X = csr_matrix((train_df['interaction'], (train_df['user'], train_df['item'])),
               shape=(num_users, num_items))


In [13]:
# SLIM-ElasticNetモデルの学習

W = np.zeros((num_items, num_items))  # 類似度行列（アイテム×アイテム）

alpha = 1.0  # L1正則化
l1_ratio = 0.1  # ElasticNetのバランス (L1:L2)

model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, positive=True, fit_intercept=False, copy_X=False)

for i in tqdm(range(num_items)):
    y = X[:, i].toarray().ravel()
    X_other = X.copy()
    X_other[:, i] = 0  # 自己相関を排除
    model.fit(X_other, y)
    W[i, :] = model.coef_


100%|███████████████████████████████████████████████████████████████████████| 1680/1680 [00:09<00:00, 179.39it/s]


In [16]:
# 推薦の生成

# スコア計算：X(ユーザ×アイテム) × W(アイテム×アイテム) = 推薦スコア
score_matrix = X.dot(W)

# 各ユーザーに対して上位10件を推薦
top_k = 10
recommendations = {}
for user in range(num_users):
    scores = score_matrix[user]
    seen_items = X[user].indices
    scores[seen_items] = -np.inf  # 学習済みアイテムを除外
    top_items = np.argsort(scores)[-top_k:][::-1]
    recommendations[user] = top_items.tolist()

# 正解データ
ground_truth = test_df.set_index('user')['item'].to_dict()


In [17]:
from Evaluation_index import recall_at_k, precision_at_k, ndcg_at_k, mrr_at_k, hit_at_k

k = 10
print("=== SLIMElastic モデル評価結果（Top-10）===")
print(f"Recall@{k}    : {recall_at_k(recommendations, ground_truth, k):.4f}")
print(f"Precision@{k} : {precision_at_k(recommendations, ground_truth, k):.4f}")
print(f"NDCG@{k}      : {ndcg_at_k(recommendations, ground_truth, k):.4f}")
print(f"MRR@{k}       : {mrr_at_k(recommendations, ground_truth, k):.4f}")
print(f"Hit@{k}       : {hit_at_k(recommendations, ground_truth, k):.4f}")

print("=== RecBole モデル評価結果（Top-10）===")
print(f"Recall@10    : {0.2050:.4f}")
print(f"Precision@10 : {0.1071:.4f}")
print(f"NDCG@10      : {0.0399:.4f}")
print(f"MRR@10       : {0.0246:.4f}")
print(f"Hit@10       : {0.0332:.4f}")


=== SLIMElastic モデル評価結果（Top-10）===
Recall@10    : 0.0978
Precision@10 : 0.0098
NDCG@10      : 0.0555
MRR@10       : 0.0425
Hit@10       : 0.0978
=== RecBole モデル評価結果（Top-10）===
Recall@10    : 0.2050
Precision@10 : 0.1071
NDCG@10      : 0.0399
MRR@10       : 0.0246
Hit@10       : 0.0332
