In [1]:
import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader, accuracy
from surprise.dataset import DatasetAutoFolds


# CSV 파일 경로 설정
ratings_file_path = (
    r"D:\pythonProject\recommendation\ml-latest-small\ratings.csv"
)
movies_file_path = (
    r"D:\pythonProject\recommendation\ml-latest-small\movies.csv"
)

# ratings 데이터 로드
ratings = pd.read_csv(ratings_file_path)
print(ratings.head())
# DatasetAutoFolds 클래스를 ratings.csv 파일 기반으로 생성
reader = Reader(
    line_format="user item rating timestamp", sep=",", rating_scale=(0.5, 5.0)
)
data_folds = DatasetAutoFolds(
    ratings_file=r"D:\pythonProject\recommendation\ml-latest-small\ratings_noh.csv",
    reader=reader,
)

# 전체 데이터를 학습 데이터로 생성
trainset = data_folds.build_full_trainset()
algo = SVD(n_epochs=20, n_factors=50, random_state=0)
algo.fit(trainset)

# movies 데이터 로드
movies = pd.read_csv(movies_file_path)

# userId=9 의 movieId 데이터 추출하여 movieId=42 데이터가 있는지 확인
movieIds = ratings[ratings["userId"] == 9]["movieId"]

if movieIds[movieIds == 42].count() == 0:
    print("사용자 아이디 9는 영화 아이디 42의 평점 없음\n")

print(f'42번 영화 정보: \n{movies[movies["movieId"] == 42]}\n')

uid = str(9)
iid = str(42)

pred = algo.predict(uid, iid, verbose=False)
print(f'유저 아이디 9인 영화:\n{ratings[ratings["userId"] == 9]["movieId"].tolist()}')


# 사용자가 보지 않은 영화 목록 생성 함수
def get_unseen_surprise(ratings, movies, userId):
    seen_movies = ratings[ratings["userId"] == userId]["movieId"].tolist()
    total_movies = movies["movieId"].tolist()
    unseen_movies = [movie for movie in total_movies if movie not in seen_movies]
    print(
        f"평점 매긴 영화수: {len(seen_movies)}, 추천대상 영화수: {len(unseen_movies)}, 전체 영화수: {len(total_movies)}\n"
    )
    return unseen_movies


unseen_movies = get_unseen_surprise(ratings, movies, 9)


# 영화 추천 함수
def recomm_movie_by_surprise(algo, userId, unseen_movies, top_n=10):
    predictions = [algo.predict(str(userId), str(movieId)) for movieId in unseen_movies]

    def sortkey_est(pred):
        return pred.est

    predictions.sort(key=sortkey_est, reverse=True)
    top_predictions = predictions[:top_n]

    top_movie_ids = [int(pred.iid) for pred in top_predictions]
    top_movie_titles = movies[movies.movieId.isin(top_movie_ids)]["title"]
    top_movie_rating = [np.round(pred.est, 3) for pred in top_predictions]

    top_movie_preds = [
        (id, title, rating)
        for id, title, rating in zip(top_movie_ids, top_movie_titles, top_movie_rating)
    ]

    return top_movie_preds


# unseen_movies = get_unseen_surprise(ratings, movies, 9)
top_movie_preds = recomm_movie_by_surprise(algo, 9, unseen_movies, top_n=10)

print("##### Top-10 추천 영화 리스트 #####")
for top_movie in top_movie_preds:
    print(top_movie[1], ":", top_movie[2])

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
사용자 아이디 9는 영화 아이디 42의 평점 없음

42번 영화 정보: 
    movieId                   title              genres
38       42  Dead Presidents (1995)  Action|Crime|Drama

유저 아이디 9인 영화:
[41, 187, 223, 371, 627, 922, 923, 1037, 1095, 1198, 1270, 1674, 1987, 2011, 2012, 2023, 2300, 2877, 2901, 3173, 3328, 3735, 4131, 4558, 4993, 5218, 5378, 5445, 5447, 5451, 5481, 5507, 5841, 5843, 5872, 5890, 5891, 5893, 5902, 5952, 5956, 5962, 5965, 5988, 6001, 6044]
평점 매긴 영화수: 46, 추천대상 영화수: 9696, 전체 영화수: 9742

##### Top-10 추천 영화 리스트 #####
Usual Suspects, The (1995) : 4.306
Star Wars: Episode IV - A New Hope (1977) : 4.282
Pulp Fiction (1994) : 4.278
Silence of the Lambs, The (1991) : 4.226
Godfather, The (1972) : 4.192
Streetcar Named Desire, A (1951) : 4.155
Star Wars: Episode V - The Empire St