In [1]:
import sys

# 사이킷런 ≥0.20 필수
import sklearn

# 공통 모듈 임포트
import numpy as np
import pandas as pd
import os

# 노트북 실행 결과를 동일하게 유지하기 위해
np.random.seed(42)

# 깔끔한 그래프 출력을 위해
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

import platform
from matplotlib import font_manager, rc

#매킨토시의 경우
if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
#윈도우의 경우
elif platform.system() == 'Windows':
    font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
    rc('font', family=font_name)

mpl.rcParams['axes.unicode_minus'] = False

# 그림을 저장할 위치
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "association_analysis"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("그림 저장:", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
    
        
import warnings
warnings.filterwarnings(action='ignore')

## surprise 패키지 활용

In [2]:
!pip install scikit_surprise

Collecting scikit_surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
     -------------------------------------- 772.0/772.0 kB 3.5 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: scikit_surprise
  Building wheel for scikit_surprise (setup.py): started
  Building wheel for scikit_surprise (setup.py): finished with status 'done'
  Created wheel for scikit_surprise: filename=scikit_surprise-1.1.3-cp310-cp310-win_amd64.whl size=1082377 sha256=c868911695762e827e7db15579836aaf369da2c79b00c6625dda76ca88699967
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\df\e4\a6\7ad72453dd693f420b0c639bedeec34641738d11b55d8d9b84
Successfully built scikit_surprise
Installing collected packages: scikit_surprise
Successfully installed scikit_surprise-1.1.3


In [5]:
import surprise
from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split

data = Dataset.load_builtin(name='ml-100k', prompt=True)

Dataset ml-100k could not be found. Do you want to download it? [Y/n] Y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to C:\Users\user/.surprise_data/ml-100k


In [6]:
trainset, testset = train_test_split(data, test_size=.25, random_state=42) 

In [7]:
algo = SVD()
algo.fit(trainset) 

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x14c5cff4910>

In [9]:
predictions = algo.test( testset )
#print(predictions[:5])

print([(pred.uid, pred.iid, pred.est) for pred in predictions[:5]])

[('391', '591', 3.5608561717920146), ('181', '1291', 1.5179912523702612), ('637', '268', 3.0225936743828146), ('332', '451', 4.070696434930238), ('271', '204', 3.8608142964823338)]


## 로컬 데이터를 이용한 surprise 패캐지 활용