# 特異値分解(SVD)

In [1]:
# 親のフォルダのパスを追加
import sys; sys.path.insert(0, '..')

from util.data_loader import DataLoader
from util.metric_calculator import MetricCalculator

In [2]:
# Movielensのデータの読み込み
data_loader = DataLoader(num_users=1000, num_test_items=5, data_path='../data/ml-10M100K/')
movielens = data_loader.load()

In [3]:
user_movie_matrix = movielens.train.pivot(index='user_id', columns='movie_id', values='rating')
user_movie_matrix

movie_id,1,2,3,4,5,6,7,8,9,10,...,62000,62113,62293,62344,62394,62801,62803,63113,63992,64716
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,1.0,,,,,,3.0,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048,,,,,,,,,,,...,,,,,,,,,,
1050,,3.0,,,,3.0,,,,3.0,...,,,,,,,,,,
1051,5.0,,3.0,,3.0,,4.0,,,,...,,,,,,,,,,
1052,,,,,,,,,,,...,,,,,,,,,,


In [4]:
# スパース情報
user_num = len(user_movie_matrix.index)
item_num = len(user_movie_matrix.columns)
non_null_num = user_num*item_num - user_movie_matrix.isnull().sum().sum()
non_null_ratio = non_null_num / (user_num*item_num)

print(f'ユーザー数={user_num}, アイテム数={item_num}, 密度={non_null_ratio:.2f}')

ユーザー数=1000, アイテム数=6673, 密度=0.02


In [5]:
user_movie_matrix.fillna(0)

movie_id,1,2,3,4,5,6,7,8,9,10,...,62000,62113,62293,62344,62394,62801,62803,63113,63992,64716
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1050,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1051,5.0,0.0,3.0,0.0,3.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1052,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
import scipy
import numpy as np

# 評価値をユーザー×映画の行列に変換。欠損値は、平均値で穴埋めする
user_movie_matrix = movielens.train.pivot(index='user_id', columns='movie_id', values='rating')
user_id2index = dict(zip(user_movie_matrix.index, range(len(user_movie_matrix.index))))
movie_id2index = dict(zip(user_movie_matrix.columns, range(len(user_movie_matrix.columns))))
matrix = user_movie_matrix.fillna(movielens.train.rating.mean()).to_numpy()


# 因子数kで特異値分解を行う
P, S, Qt = scipy.sparse.linalg.svds(matrix, k=5)

# 予測評価値行列
pred_matrix = np.dot(np.dot(P, np.diag(S)), Qt)

print(f"P: {P.shape}, S: {S.shape}, Qt: {Qt.shape}, pred_matrix: {pred_matrix.shape}")

P: (1000, 5), S: (5,), Qt: (5, 6673), pred_matrix: (1000, 6673)


In [7]:
# SVDレコメンド
from src.svd import SVDRecommender
recommender = SVDRecommender()
recommend_result = recommender.recommend(movielens)

In [8]:
#  評価
metric_calculator = MetricCalculator()
metrics = metric_calculator.calc(
    movielens.test.rating.tolist(), recommend_result.rating.tolist(),
    movielens.test_user2items, recommend_result.user2items, k=10)
print(metrics)

rmse=3.335, Precision@K=0.009, Recall@K=0.029


In [9]:
# 欠損値のを平均値で穴埋め
recommend_result = recommender.recommend(movielens, fillna_with_zero=False)
metrics = metric_calculator.calc(
movielens.test.rating.tolist(), recommend_result.rating.tolist(),
movielens.test_user2items, recommend_result.user2items, k=10)
print(metrics)

rmse=1.046, Precision@K=0.013, Recall@K=0.043


In [10]:
# 因子数kと精度の関係
for factors in [5, 10, 30]:
    recommend_result = recommender.recommend(movielens, factors=factors, fillna_with_zero=False)
    metrics = metric_calculator.calc(
    movielens.test.rating.tolist(), recommend_result.rating.tolist(),
    movielens.test_user2items, recommend_result.user2items, k=10)
    print(metrics)

rmse=1.046, Precision@K=0.013, Recall@K=0.043
rmse=1.042, Precision@K=0.011, Recall@K=0.039
rmse=1.038, Precision@K=0.011, Recall@K=0.036
