# 특잇값 분해(Singular Value Decomposition, SVD)

In [None]:
# 부모 폴더의 경로 추가
import sys; sys.path.insert(0, '..')

from util.data_loader import DataLoader
from util.metric_calculator import MetricCalculator

In [None]:
# Movielens 데이터 로딩
data_loader = DataLoader(num_users=1000, num_test_items=5, data_path='../data/ml-10M100K/')
movielens = data_loader.load()

In [None]:
user_movie_matrix = movielens.train.pivot(index='user_id', columns='movie_id', values='rating')
user_movie_matrix

In [None]:
# 희소 정보
user_num = len(user_movie_matrix.index)
item_num = len(user_movie_matrix.columns)
non_null_num = user_num*item_num - user_movie_matrix.isnull().sum().sum()
non_null_ratio = non_null_num / (user_num*item_num)

print(f'사용자 수={user_num}, 아이템 수={item_num}, 정밀도={non_null_ratio:.2f}')

In [None]:
user_movie_matrix.fillna(0)

In [None]:
import scipy
import numpy as np

# 평갓값을 사용자 x 영화의 행렬로 변환. 결손값은 평균값으로 채운다
user_movie_matrix = movielens.train.pivot(index='user_id', columns='movie_id', values='rating')
user_id2index = dict(zip(user_movie_matrix.index, range(len(user_movie_matrix.index))))
movie_id2index = dict(zip(user_movie_matrix.columns, range(len(user_movie_matrix.columns))))
matrix = user_movie_matrix.fillna(movielens.train.rating.mean()).to_numpy()


# 인자 수 x 특이값 분해를 수행한다
P, S, Qt = scipy.sparse.linalg.svds(matrix, k=5)

# 예측 평갓값 행렬
pred_matrix = np.dot(np.dot(P, np.diag(S)), Qt)

print(f"P: {P.shape}, S: {S.shape}, Qt: {Qt.shape}, pred_matrix: {pred_matrix.shape}")

In [None]:
# SVD 추천
from src.svd import SVDRecommender
recommender = SVDRecommender()
recommend_result = recommender.recommend(movielens)

In [None]:
# 평가
metric_calculator = MetricCalculator()
metrics = metric_calculator.calc(
    movielens.test.rating.tolist(), recommend_result.rating.tolist(),
    movielens.test_user2items, recommend_result.user2items, k=10)
print(metrics)

In [None]:
# 결손값을 평균값으로 채운다
recommend_result = recommender.recommend(movielens, fillna_with_zero=False)
metrics = metric_calculator.calc(
movielens.test.rating.tolist(), recommend_result.rating.tolist(),
movielens.test_user2items, recommend_result.user2items, k=10)
print(metrics)

In [None]:
# 인자 수와 정밀도의 관계
for factors in [5, 10, 30]:
    recommend_result = recommender.recommend(movielens, factors=factors, fillna_with_zero=False)
    metrics = metric_calculator.calc(
    movielens.test.rating.tolist(), recommend_result.rating.tolist(),
    movielens.test_user2items, recommend_result.user2items, k=10)
    print(metrics)