In [1]:
import pandas as pd
ratings = pd.read_csv('data/ratings_small.csv')
ratings.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182


In [2]:
min = ratings['rating'].min()
max = ratings['rating'].max()
min, max

(0.5, 5.0)

In [3]:
from surprise import Reader, Dataset, SVD

In [4]:
reader = Reader(rating_scale=(min, max))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [5]:
svd = SVD(random_state=0)
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x27b4711bd70>

In [6]:
ratings.head(2)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179


In [7]:
svd.predict(1, 31)

Prediction(uid=1, iid=31, r_ui=None, est=2.4162799702909346, details={'was_impossible': False})

In [11]:
uid = 9
mid = 42
seen_movies = ratings[ratings['userId'] == uid]['movieId']

seen_movies.values, len(seen_movies)
if seen_movies[seen_movies==mid].count() == 0:
    print(f'사용자:{uid}는 영화{mid} 평점없음')
    print(svd.predict(uid, mid))

사용자:9는 영화42 평점없음
user: 9          item: 42         r_ui = None   est = 2.94   {'was_impossible': False}


In [16]:
#평점매긴 영화목록
uid=9
filt = ratings['userId']  == uid
seen_movies = list(ratings[filt]['movieId'])
len(seen_movies)

45

In [18]:
#평점매긴 전체 영화목록
total_movies = ratings['movieId'].drop_duplicates().tolist()
len(total_movies)

9066

In [20]:
#추천 영화 목록
import numpy as np
unseen_movies = np.setdiff1d(total_movies, seen_movies)
len(unseen_movies)

9021

In [23]:
def get_unseen_movies(ratings, uid):
    filt = ratings['userId'] == uid
    seen_movies = list(ratings[filt]['movies'])
    total_movies = ratings['movieId'].drop_duplicates().tolist()
    unseen_movies = np.setdiff1d(total_movies, seen_movies)
    print(f'사용자 아이디:{uid} 평점매긴 영화수:{len(seen_movies)} 추천대상 영화쉬:{len(unseen_movies)}')
    return unseen_movies

In [24]:
unseen_movies = get_unseen_movies(ratings, 9)

KeyError: 'movies'

In [25]:
predict = [svd.predict(uid, mid) for mid in unseen_movies]
len(predict)

9021

In [28]:
predict.sort(key=lambda pre:pre.est, reverse=True)
top_predict = predict[:5]
top_predict

[Prediction(uid=9, iid=858, r_ui=None, est=4.542866877335705, details={'was_impossible': False}),
 Prediction(uid=9, iid=912, r_ui=None, est=4.484090707192216, details={'was_impossible': False}),
 Prediction(uid=9, iid=4993, r_ui=None, est=4.471004680156093, details={'was_impossible': False}),
 Prediction(uid=9, iid=926, r_ui=None, est=4.427937145395248, details={'was_impossible': False}),
 Prediction(uid=9, iid=745, r_ui=None, est=4.41983077978538, details={'was_impossible': False})]

In [31]:
top_movies = [(pred.iid, pred.est) for pred in top_predict]
top_movies

[(858, 4.542866877335705),
 (912, 4.484090707192216),
 (4993, 4.471004680156093),
 (926, 4.427937145395248),
 (745, 4.41983077978538)]