In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from surprise import Dataset, Reader

In [24]:
# 데이터 준비

from surprise import Reader

movies_small = pd.read_csv('data-files/ml-latest-small/movies.csv')
ratings_small = pd.read_csv('data-files/ml-latest-small/ratings.csv')
data = Dataset.load_from_df(ratings_small[["userId", "movieId", "rating"]],
                            Reader(rating_scale=(0.5, 5.0)))

In [9]:
trainset = data.build_full_trainset()
# trainset.all_ratings()
testset = trainset.build_testset()
len(testset), testset[0]

(100836, (1, 1, 4.0))

In [10]:
# 모델 만들기
import random
from surprise import SVD

svd = SVD(n_factors=100, n_epochs=20, random_state=42)

In [11]:
# 모델 훈련
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1dd2fa50eb0>

In [23]:
# 시청하지 않은 영화 평점 예측
print( (ratings_small["userId"] == 25).sum() )

uid_mask = ratings_small["userId"] == 25        # 25번 사용자의 평점 부여 목록 조회
ratings_small[uid_mask][['userId', 'movieId']]  # 25번 사용자의 평점 부여 영화 조회
print( (ratings_small['movieId'] == 250).sum() )

svd.predict(uid=25, iid=250)

26
6


Prediction(uid=25, iid=250, r_ui=None, est=3.975116005774669, details={'was_impossible': False})

In [36]:
# 시청하지 않은 영화 목록 조회 함수
def unrated_movies(ratings, user_id):
    all_movie_id = ratings["movieId"].unique()
    rated_movie_ids = ratings[ratings["userId"] == user_id]["movieId"].values
    not_rated_movies = [movie_id for movie_id in all_movie_id if movie_id not in rated_movie_ids]
    return not_rated_movies
    # return rated_movie_ids

# sorted(unrated_movies(ratings_small, movies_small, 25))[200:]
# unrated_movies(ratings_small, movies_small, 25)

In [45]:
movies_small[movies_small['movieId'] == 250]['title'].values[0]

'Heavyweights (Heavy Weights) (1995)'

In [46]:
# 추천 함수
def recommend_movies(ratings, movies, user_id, top_n=10):
    not_rated_movie_ids = unrated_movies(ratings, user_id)
    predictions = [svd.predict(user_id, movie_id) for movie_id in not_rated_movie_ids]

    predictions.sort(key=lambda prediction: prediction["est"], reverse=True) # Prediction 객체의 est 속성 값 기준 내림차순 정렬
    top_n_predictions = predictions[:top_n]

    recommendations = [ (p['iid'], movies[movies['movieId'] == p['iid']]['title'].values[0]) for p in top_n_predictions ]

    # return top_n_predictions
    return recommendations

recommend_movies(ratings_small, movies_small, 25)

TypeError: tuple indices must be integers or slices, not str

In [42]:
ar = [1, 3, 2, 5, 4]
ar.sort()
ar
ar2 = [{'idx': 1, 'name': 'john doe'}, {'idx': 2, 'name': 'john doe'}, {'idx': 3, 'name': 'john doe'}, {'idx': 4, 'name': 'john doe'}]
ar2.sort(key=lambda item: -item['idx'])
ar2

[{'idx': 4, 'name': 'john doe'},
 {'idx': 3, 'name': 'john doe'},
 {'idx': 2, 'name': 'john doe'},
 {'idx': 1, 'name': 'john doe'}]