In [8]:
from surprise.model_selection import cross_validate 
import os
from surprise import Reader
import pandas as pd
from surprise import SVD
from surprise import Dataset 
from surprise import accuracy 
from surprise.model_selection import train_test_split

### 교차 검증(Cross Validation)과 하이퍼 파라미터 튜닝

#### cross_validate를 이용한 교차 검증

In [7]:
os.chdir(r"C:\Users\Gram\Desktop\파이썬 머신러닝 완벽 가이드\9장 추천 시스템\data")
ratings = pd.read_csv('ratings.csv')

# Reader사용해 DF불러오기
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

algo = SVD(random_state=0) 
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True) 

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8685  0.8638  0.8672  0.8808  0.8753  0.8711  0.0061  
MAE (testset)     0.6685  0.6674  0.6648  0.6793  0.6704  0.6701  0.0049  
Fit time          9.88    9.74    10.12   10.08   10.03   9.97    0.14    
Test time         0.30    0.26    0.28    0.26    0.24    0.27    0.02    


{'test_rmse': array([0.86854644, 0.86382258, 0.86717326, 0.88083516, 0.87530429]),
 'test_mae': array([0.6684885 , 0.66741223, 0.66479289, 0.67925401, 0.67038297]),
 'fit_time': (9.8827965259552,
  9.740938186645508,
  10.121911525726318,
  10.080025434494019,
  10.03177523612976),
 'test_time': (0.3029472827911377,
  0.26129984855651855,
  0.2782552242279053,
  0.26134371757507324,
  0.23740863800048828)}

#### GridSearchCV 이용한 하이퍼 파라미터 튜닝

In [9]:
from surprise.model_selection import GridSearchCV

# 최적화할 파라미터들을 딕셔너리 형태로 지정. 
param_grid = {'n_epochs': [20, 40, 60], 'n_factors': [50, 100, 200] }

# CV를 3개 폴드 세트로 지정, 성능 평가는 rmse, mse 로 수행 하도록 GridSearchCV 구성
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

# 최고 RMSE Evaluation 점수와 그때의 하이퍼 파라미터
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

0.8766991147364155
{'n_epochs': 20, 'n_factors': 50}
