In [8]:
# install recommender system package : scikit-surprise

# scikit-surprise compatible numpy version == 1.26.4

# %conda install -c conda-forge scikit-surprise
# !pip install scikit-surprise

# !pip list | findstr scikit-surprise
# !pip list | findstr numpy
# !pip install numpy==1.26.4

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
# 추천시스템 전용 패키지 확인
import surprise

print(surprise.__version__)
print(np.__version__)

1.1.4
1.26.4


In [10]:
# 데이터 준비 1.
from surprise import Dataset

data = Dataset.load_builtin("ml-100k")

In [11]:
# 데이터 준비 2

from surprise import Reader # 데이터를 읽을 때 읽는 방법을 설정하는 클래스

ratings_small = pd.read_csv('data-files/ml-latest-small/ratings.csv')
data2 = Dataset.load_from_df(ratings_small[['userId', 'movieId', 'rating']],
                             Reader(rating_scale=(0.5, 5)))

In [12]:
ratings_small['rating'].agg(['min', 'max'])

min    0.5
max    5.0
Name: rating, dtype: float64

In [None]:
# 데이터 준비 3

from surprise import Reader # 데이터를 읽을 때 읽는 방법을 설정하는 클래스

data3 = Dataset.load_from_file('data-files/ml-latest-small/ratings.csv',
                             Reader(rating_scale=(0.5, 5), sep=',', skip_lines=1))

In [17]:
# from sklearn.model_selection import train_test_split
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(data3, random_state=42)

In [18]:
from surprise import KNNBasic

knn_model = KNNBasic(sim_options={ 'name': "cosine", 'user_based':False })

In [19]:
knn_model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1383a689370>

In [None]:
# 개별 값 예측
print( testset[:3] )
print( knn_model.predict('140', '6765') )
print( knn_model.predict('603', '290') )

[('140', '6765', 3.5), ('603', '290', 4.0), ('438', '5055', 4.0)]
user: 140        item: 6765       r_ui = None   est = 3.42   {'actual_k': 40, 'was_impossible': False}
user: 603        item: 290        r_ui = None   est = 3.55   {'actual_k': 40, 'was_impossible': False}


In [26]:
# 다수 예측
predictions = knn_model.test(testset)
predictions[:3]

[Prediction(uid='140', iid='6765', r_ui=3.5, est=3.425, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='603', iid='290', r_ui=4.0, est=3.55, details={'actual_k': 40, 'was_impossible': False}),
 Prediction(uid='438', iid='5055', r_ui=4.0, est=3.1625, details={'actual_k': 40, 'was_impossible': False})]

In [28]:
from surprise import accuracy

print( accuracy.mae(predictions=predictions) )
print( accuracy.rmse(predictions=predictions) )

MAE:  0.7615
0.7614697475628528
RMSE: 0.9800
0.97995665962038


In [29]:
movies = pd.read_csv('data-files/ml-latest-small/movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [48]:
all_movie_id = ratings_small['movieId'].unique()
# print( all_movie_id )
# print( ratings_small['userId'].unique() )
# print( (ratings_small['userId'] == 42).sum() )
rated_movies = ratings_small[ratings_small['userId'] == 42]['movieId'].values
# rated_movies

# 42번 사용자가 추천하지 않은 모든 영화에 대한 예상 평점 계산
predictions_of_user42 = []
for movie_id in all_movie_id:
    if movie_id not in rated_movies:
        prediction = knn_model.predict("42", str(movie_id))
        predictions_of_user42.append(prediction)

In [52]:
print( len( predictions_of_user42 ) )

# 평점 기준 정렬 (내림차순) : # 단일 값이 아닌 경우 sort 함수에 기준을 지정해야 합니다.
predictions_of_user42.sort(key=lambda v: v.est, reverse=True) 

9284


In [58]:
# predictions_of_user42[:10]
top_10_movies = []
for p in predictions_of_user42[:10]:
    movie = movies[movies['movieId'] == int(p.iid)]
    top_10_movies.append(movie["title"].values[0])

top_10_movies

['One I Love, The (2014)',
 'Laggies (2014)',
 'Annabelle (2014)',
 'Delirium (2014)',
 'Deathgasm (2015)',
 'A Street Cat Named Bob (2016)',
 'Alvarez Kelly (1966)',
 'Jungle Book 2, The (2003)',
 'Cinderella (1997)',
 'Young Victoria, The (2009)']