#### **Collaborative Filtering**

In [None]:
# Surprise 패키지를 이용하기 위해 설치
! pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 4.0 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1633717 sha256=1fc1ca1efe74f3ac73a9833d40c10a62d9fea04bd80870a17dc2564b307e7245
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


In [None]:
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
import os
from surprise.model_selection import train_test_split

#### 1) Data 가져오기

In [None]:
new_df.drop(['timestamp'], axis=1, inplace=True)

In [None]:
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(new_df, reader)

#### 2) Trainset, Testset으로 나누기

In [None]:
trainset, testset = train_test_split(data, test_size=0.3, random_state=10)

#### 3) KNN을 이용 trainset에 파라미터 맞추기

In [None]:
algo = KNNWithMeans(k=5, sim_options = {'name': 'pearson_baseline', 'user_based': False})
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7f4e5dfcba90>

#### 4) Test accuracy 측정

In [None]:
test_pred = algo.test(testset)

In [None]:
print('item-based Model : Test set')
accuracy.rmse(test_pred, verbose=True)

item-based Model : Test set
RMSE: 1.3478


1.3477596209757159

In [None]:
# Item-based Model : Test Set
# RMSE: 1.3335

#### **Model-based collaborative filtering system**

In [None]:
# 대량의 상품, 많은 사용자들에게 추천 가능, sparse한 matrices에도 사용가능

In [None]:
new_df1 = new_df.head(10000)
ratings_matrix = new_df1.pivot_table(values='Rating', index='userId', columns='productId', fill_value=0)
ratings_matrix.head()

productId,0972683275,1400501466,1400501520,1400501776,1400532620,1400532655,140053271X,1400532736,1400599997,1400698987,...,B00000JFMK,B00000JHWX,B00000JI4F,B00000JII6,B00000JMUG,B00000JPPI,B00000JSGF,B00000JYLO,B00000JYWQ,B00000K135
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A01852072Z7B68UHLI5UG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A0266076X6KPZ6CCHGVS,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A0293130VTX2ZXA70JQS,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A030530627MK66BD8V4LN,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A0571176384K8RBNKGF8O,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
ratings_matrix.shape

(9832, 76)

In [None]:
X = ratings_matrix.T
X.head()

userId,A01852072Z7B68UHLI5UG,A0266076X6KPZ6CCHGVS,A0293130VTX2ZXA70JQS,A030530627MK66BD8V4LN,A0571176384K8RBNKGF8O,A0590501PZ7HOWJKBGQ4,A0641581307AKT5MAOU0Q,A076219533YHEV2LJO988,A0821988FXKFYX53V4QG,A099626739FNCRNHIKBCG,...,AZWOPBY75SGAM,AZX0ZDVAFMN78,AZX5LAN9JEAFF,AZX7I110AF0W2,AZXKUK895VGSM,AZXP46IB63PU8,AZYTSU42BZ7TP,AZZGJ2KMWB7R,AZZMV5VT9W7Y8,AZZST8OYL5P4Q
productId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
972683275,0,0,5,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1400501466,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1400501520,0,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,0,0,0,0,0
1400501776,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1400532620,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
X.shape

(76, 9832)

In [None]:
X.index

Index(['0972683275', '1400501466', '1400501520', '1400501776', '1400532620',
       '1400532655', '140053271X', '1400532736', '1400599997', '1400698987',
       '3744295508', '6301977173', '7214047977', '8862935293', '9573212919',
       '9575871979', '9625993428', '9888002198', '9966694544', '9983891212',
       '9984984354', '9985511476', 'B000001OM4', 'B000001OM5', 'B000001OMI',
       'B000001ON6', 'B00000DM9W', 'B00000IGBF', 'B00000J05A', 'B00000J061',
       'B00000J08Q', 'B00000J0D2', 'B00000J0D5', 'B00000J0D8', 'B00000J1EJ',
       'B00000J1EP', 'B00000J1EQ', 'B00000J1F3', 'B00000J1GA', 'B00000J1QK',
       'B00000J1QR', 'B00000J1SC', 'B00000J1TX', 'B00000J1U8', 'B00000J1UQ',
       'B00000J1V3', 'B00000J1V5', 'B00000J3NF', 'B00000J3Q7', 'B00000J3UJ',
       'B00000J434', 'B00000J4EY', 'B00000J4FS', 'B00000J4GE', 'B00000J6WY',
       'B00000JBAT', 'B00000JBHP', 'B00000JBPB', 'B00000JCT8', 'B00000JCTO',
       'B00000JD34', 'B00000JDF5', 'B00000JDF6', 'B00000JDHV', 'B00000JFE3',

In [None]:
X1 = X

In [None]:
from sklearn.decomposition import TruncatedSVD
SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

(76, 10)

In [None]:
decomposed_matrix

array([[ 1.48104508e+02, -1.54620898e-01,  2.45347666e-02,
         8.22408202e-05, -1.34160927e-01,  1.90076292e-04,
         4.62859502e-02, -5.13531224e-04,  4.87179326e-03,
        -8.15744069e-02],
       [ 6.46718715e-04,  1.28823330e+00,  1.46467304e+00,
         1.89425331e-01,  2.22684975e-03, -4.35688628e-02,
        -6.40033352e-02,  1.61709993e-01,  6.09411853e+01,
         2.69579166e+00],
       [ 2.31474280e-05,  9.93509338e-02,  3.09154819e-01,
         9.07078028e-03,  3.87615185e-03, -3.93429734e-03,
        -1.12527408e-02, -9.70617376e-02,  1.73304588e+00,
         3.70409697e-01],
       [ 5.22947703e-04,  7.61564666e-01,  5.13322204e-01,
         2.29616618e-02,  3.27184975e-02, -6.24002805e-02,
        -2.49950778e-02, -2.71072739e-01,  4.79796612e+00,
        -9.62489382e-01],
       [ 3.26380761e-04,  8.02182984e-01,  1.01844823e+00,
        -1.17477358e-03,  2.35632727e-02, -1.26313226e-01,
        -1.70949647e-01,  1.51232148e-01, -1.03954563e+00,
        -1.

In [None]:
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

(76, 76)

In [None]:
X.index[75]

'B00000K135'

In [None]:
i = X.index[75]

product_names = list(X.index)
product_ID = product_names.index(i) # index 구하기
product_ID

75

In [None]:
correlation_product_ID = correlation_matrix[product_ID]
correlation_product_ID.shape

(76,)

In [None]:
correlation_product_ID

array([-0.0544976 ,  0.39413114,  0.22411961,  0.52138115,  0.42087322,
       -0.03507138, -0.0173883 ,  0.70706858, -0.05876908, -0.15227008,
       -0.89703201, -0.94760036, -0.71975416, -0.72267818,  0.14979677,
       -0.48927974, -0.88020176, -0.82044335,  0.20413594, -0.79931172,
        0.98035797, -0.72437307, -0.77251366,  0.65312721,  0.5559973 ,
        0.46808241, -0.06513157,  0.3472852 ,  0.61584896,  0.55354128,
       -0.83098142, -0.49076408,  0.11798875,  0.89187611,  0.80574292,
        0.90373559,  0.7076074 ,  0.69719514, -0.11574652,  0.43555142,
        0.36248461,  0.28018373,  0.77129196, -0.28216657,  0.78438189,
       -0.85282936, -0.0608713 , -0.95759697,  0.54322109,  0.68055707,
        0.57125334, -0.26640018,  0.53188441,  0.79292298,  0.56229441,
       -0.66788827, -0.59482709,  0.55089395, -0.63187342,  0.34514776,
       -0.2763528 ,  0.015119  , -0.42717716, -0.09222394,  0.67289704,
        0.50451399,  0.38776654, -0.3491662 , -0.03507591, -0.92

In [None]:
Recommend = list(X.index[correlation_product_ID > 0.65]) # 피어슨 상관계수가 0.65보다 큰 상품들의 index의 list return
Recommend.remove(i) # 이미 구입한 상품은 뺀다
Recommend[0:24] # 상위 24개 추천

['1400532736',
 '9984984354',
 'B000001OM5',
 'B00000J0D8',
 'B00000J1EJ',
 'B00000J1EP',
 'B00000J1EQ',
 'B00000J1F3',
 'B00000J1TX',
 'B00000J1UQ',
 'B00000J3UJ',
 'B00000J4GE',
 'B00000JFE3',
 'B00000JMUG',
 'B00000JYLO']

In [None]:
Recommend

['1400532736',
 '9984984354',
 'B000001OM5',
 'B00000J0D8',
 'B00000J1EJ',
 'B00000J1EP',
 'B00000J1EQ',
 'B00000J1F3',
 'B00000J1TX',
 'B00000J1UQ',
 'B00000J3UJ',
 'B00000J4GE',
 'B00000JFE3',
 'B00000JMUG',
 'B00000JYLO']