In [119]:
import pandas as pd
ratings = pd.read_csv('data/ratings_small.csv')
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


In [120]:
min = ratings['rating'].min()
max = ratings['rating'].max()
min,max

(0.5, 5.0)

In [121]:
from surprise import Reader,Dataset,SVD

In [122]:
reader = Reader(rating_scale=(min,max))
data = Dataset.load_from_df(ratings[['userId','movieId','rating']],reader)

In [123]:
svd = SVD(random_state=0)
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2a7dde592e0>

In [124]:
ratings.head(2)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179


In [125]:
svd.predict(1,31)

Prediction(uid=1, iid=31, r_ui=None, est=2.4162799702909346, details={'was_impossible': False})

In [126]:
uid = 9
mid = 42
seen_movies = ratings[ratings['userId']==uid]['movieId']
#seen_movies.values,len(seen_movies)

if seen_movies[seen_movies==mid].count() ==0:
    print(f'사용자:{uid}는 영화 {mid} 평점 없음')
    print(svd.predict(uid,mid))

사용자:9는 영화 42 평점 없음
user: 9          item: 42         r_ui = None   est = 2.94   {'was_impossible': False}


In [127]:
# 관람 한 영화목록
uid = 9
filt = ratings['userId'] == uid
seen_movies = list(ratings[filt]['movieId'])
len(seen_movies)

45

In [128]:
#평점 매긴 전체 영화목록
total_movies = ratings['movieId'].drop_duplicates().tolist()
len(total_movies)

9066

In [129]:
#추천 영화목록
import numpy as np
unseen_movies = np.setdiff1d(total_movies,seen_movies)
len(unseen_movies)

9021

In [130]:
def get_unseen_movies(ratings,uid):
    filt = ratings['userId'] == uid
    seen_movies = list(ratings[filt]['movieId'])
    total_movies = ratings['movieId'].drop_duplicates().tolist()
    unseen_movies = np.setdiff1d(total_movies,seen_movies)
    print(f'사용자아이디:{uid} 평점 남긴 영화수:{len(seen_movies)} || {uid}님에게 추천 하는 영화수:{len(unseen_movies)}')
    return unseen_movies

In [131]:
unseen_movies = get_unseen_movies(ratings,9)
unseen_movies

사용자아이디:9 평점 남긴 영화수:45 || 9님에게 추천 하는 영화수:9021


array([     2,      3,      4, ..., 162542, 162672, 163949])

In [132]:
predict = [svd.predict(uid,mid) for mid in unseen_movies]
len(predict)

9021

In [133]:
predict.sort(key=lambda pre:pre.est, reverse=True)
top_predict = predict[:5]
top_predict

[Prediction(uid=9, iid=858, r_ui=None, est=4.542866877335705, details={'was_impossible': False}),
 Prediction(uid=9, iid=912, r_ui=None, est=4.484090707192216, details={'was_impossible': False}),
 Prediction(uid=9, iid=4993, r_ui=None, est=4.471004680156093, details={'was_impossible': False}),
 Prediction(uid=9, iid=926, r_ui=None, est=4.427937145395248, details={'was_impossible': False}),
 Prediction(uid=9, iid=745, r_ui=None, est=4.41983077978538, details={'was_impossible': False})]

In [134]:
top_movies = [(pred.iid,pred.est) for pred in top_predict]
top_movies

[(858, 4.542866877335705),
 (912, 4.484090707192216),
 (4993, 4.471004680156093),
 (926, 4.427937145395248),
 (745, 4.41983077978538)]

In [135]:
def get_recommendations(svd,uid, unseen_movies,top_n):
    predict = [svd.predict(uid,mid) for mid in unseen_movies]
    predict.sort(key=lambda pre:pre.est, reverse=True)
    top_predict = predict[:top_n]
    top_movies = [(pred.iid,pred.est) for pred in top_predict]
    return top_movies

In [145]:
uid=2
top_n=3
unseen_movies = get_unseen_movies(ratings,uid)
top_movies=get_recommendations(svd,uid,unseen_movies,top_n)
print(f'==============================추천영화리스트==============================')
for top in top_movies:
    mid= top[0]
    try:
        details = movie.details(mid) #movie어디서나옴..?
        print('영화제목:'+details['title'])
    except:
        print('영화제목없음')
    print(f'영화아이디{top[0]}, 예상평점{top[1]}')

사용자아이디:2 평점 남긴 영화수:76 || 2님에게 추천 하는 영화수:8990
영화제목없음
영화아이디318, 예상평점4.536088707483336
영화제목없음
영화아이디8132, 예상평점4.511074801960958
영화제목없음
영화아이디926, 예상평점4.474115364443329
