# 7. 하이브리드 추천 시스템
 - 다수의 추천 알고리즘을 결합하는 것이 더 정확
 - 복수의 추천 알고리즘을 결합해서 사용하는 것을 하이브리드 추천 알고리즘이라고 함

## 7.1 하이브리드 추천 시스템의 강점
 - 랜덤포레스트가 단일 트리모델보다 더 좋은 성능을 가지는 이유와 같음

## 7.2 하이브리드 추천 시스템의 원리
 - 가상의 추천엔진 결합하는 코드 실습

In [8]:
from utility import *
from sklearn.utils import shuffle
import random

In [10]:
_, _, ratings = getData()
ratings.reset_index(inplace=True)
ratings.drop('timestamp',axis=1, inplace=True)

In [11]:
# train_test split
ratings = shuffle(ratings)
TRAIN_SIZE = 0.75
cutoff = int(TRAIN_SIZE*len(ratings))
ratings_train = ratings.iloc[:cutoff]
ratings_test = ratings.iloc[cutoff:]

In [12]:
# dummy recommender 
def recommender0(recom_list):
    recommendations = []
    for pair in recom_list:
        recommendations.append(random.random()*4 +1)
    return np.array(recommendations)

def recommender1(recom_list):
    recommendations = []
    for pari in recom_list:
        recommendations.append(random.random()*4+1)
    return np.array(recommendations)

weight = [0.8,0.2]
recom_list = np.array(ratings_test)
predictions0 = recommender0(recom_list)
predictions1 = recommender1(recom_list)
predictions = weight[0]*predictions0 + weight[1]*predictions1
RMSE(recom_list[:,2],predictions)

1.571932488700433

## 7.3 하이브리드 추천 시스템(CF와 MF의 결합)

In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

In [14]:
users, movies, ratings = getData()

ratings.drop('timestamp', axis=1, inplace=True)
ratings.reset_index(inplace=True)

x = ratings.copy()
y = ratings['user_id']
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size = 0.25)

rating_matrix = x_train.pivot(index = 'user_id',columns = 'movie_id',values = 'rating')

In [15]:
matrix_dummy = rating_matrix.copy().fillna(0)
user_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
user_similarity = pd.DataFrame(user_similarity, index = rating_matrix.index, columns = rating_matrix.index)

In [16]:
rating_mean = rating_matrix.mean(axis=1)
rating_bias = (rating_matrix.T - rating_mean).T

def CF_knn_bias(user_id, movie_id, neighbor_size = 0):
    if movie_id in rating_bias:
        sim_scores = user_similarity[user_id].copy()
        movie_ratings = rating_bias[movie_id].copy()
        none_rating_idx = movie_ratings[movie_ratings.isnull()].index
        movie_ratings = movie_ratings.drop(none_rating_idx)
        sim_scores = sim_scores.drop(none_rating_idx)
        
        if neighbor_size == 0:
            prediction = np.dot(sim_scores,movie_ratings)/sim_scores.sum()
            prediction += rating_mean[user_id]
        
        else:
            if len(sim_scores)>1:
                neighbor_size = min(neighbor_size, len(sim_scores))
                sim_scores = np.array(sim_scores)
                movie_ratings = np.array(movie_ratings)
                user_idx = np.argsort(sim_scores)
                sim_scores = sim_scores[user_idx][-neighbor_size:]
                movie_ratings = movie_ratings[user_idx][-neighbor_size:]
                prediction = np.dot(sim_scores,movie_ratings)/sim_scores.sum()
                prediction += rating_mean[user_id]
            else:
                prediction = rating_mean[user_id]
    else:
        prediction = rating_mean[user_id]
    return prediction

In [17]:
R_temp = ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
mf = NEW_MF(R_temp, K=200, alpha=0.001, beta=0.02, iterations=250, verbose=True)
test_set = mf.set_test(ratings_test)
result = mf.test()

Iteration: 10 ; Train RMSE = 0.9655 ; Test RMSE = 0.9878
Iteration: 20 ; Train RMSE = 0.9413 ; Test RMSE = 0.9672
Iteration: 30 ; Train RMSE = 0.9307 ; Test RMSE = 0.9588
Iteration: 40 ; Train RMSE = 0.9247 ; Test RMSE = 0.9543
Iteration: 50 ; Train RMSE = 0.9208 ; Test RMSE = 0.9515
Iteration: 60 ; Train RMSE = 0.9181 ; Test RMSE = 0.9496
Iteration: 70 ; Train RMSE = 0.9160 ; Test RMSE = 0.9483
Iteration: 80 ; Train RMSE = 0.9142 ; Test RMSE = 0.9473
Iteration: 90 ; Train RMSE = 0.9124 ; Test RMSE = 0.9465
Iteration: 100 ; Train RMSE = 0.9105 ; Test RMSE = 0.9458
Iteration: 110 ; Train RMSE = 0.9082 ; Test RMSE = 0.9448
Iteration: 120 ; Train RMSE = 0.9049 ; Test RMSE = 0.9436
Iteration: 130 ; Train RMSE = 0.9003 ; Test RMSE = 0.9419
Iteration: 140 ; Train RMSE = 0.8938 ; Test RMSE = 0.9393
Iteration: 150 ; Train RMSE = 0.8851 ; Test RMSE = 0.9360
Iteration: 160 ; Train RMSE = 0.8744 ; Test RMSE = 0.9324
Iteration: 170 ; Train RMSE = 0.8622 ; Test RMSE = 0.9287
Iteration: 180 ; Train 

In [18]:
def recommender0(recomm_list, mf):
    recommendations = np.array([mf.get_one_prediction(user, movie) for (user, movie) in recomm_list])
    return recommendations

def recommender1(recomm_list, neighbor_size=0):
    recommendations = np.array([CF_knn_bias(user, movie, neighbor_size) for (user, movie) in recomm_list])
    return recommendations

In [19]:
recomm_list = np.array(ratings_test.iloc[:,[0,1]])
predictions0 = recommender0(recomm_list,mf)
RMSE(ratings_test.iloc[:,2], predictions0)
predictions1 = recommender1(recomm_list,37)
RMSE(ratings_test.iloc[:,2], predictions1)

weight = [0.8,0.2]
predictions = predictions0 * weight[0] + predictions1 * weight[1]
RMSE(ratings_test.iloc[:,2], predictions)

0.8908946156532926

In [24]:
for i in np.arange(0,1,0.01):
    weight = [i, 1-i]
    predictions = predictions0*weight[0] + predictions1*weight[1]
    print(f'Weights - {weight[0]:.2f} : {weight[1]:.2f} ; RMSE = {RMSE(ratings_test.iloc[:,2],predictions):.7f}')

Weights - 0.00 : 1.00 ; RMSE = 0.8597621
Weights - 0.01 : 0.99 ; RMSE = 0.8596684
Weights - 0.02 : 0.98 ; RMSE = 0.8595872
Weights - 0.03 : 0.97 ; RMSE = 0.8595183
Weights - 0.04 : 0.96 ; RMSE = 0.8594619
Weights - 0.05 : 0.95 ; RMSE = 0.8594178
Weights - 0.06 : 0.94 ; RMSE = 0.8593862
Weights - 0.07 : 0.93 ; RMSE = 0.8593669
Weights - 0.08 : 0.92 ; RMSE = 0.8593601
Weights - 0.09 : 0.91 ; RMSE = 0.8593656
Weights - 0.10 : 0.90 ; RMSE = 0.8593836
Weights - 0.11 : 0.89 ; RMSE = 0.8594140
Weights - 0.12 : 0.88 ; RMSE = 0.8594568
Weights - 0.13 : 0.87 ; RMSE = 0.8595120
Weights - 0.14 : 0.86 ; RMSE = 0.8595796
Weights - 0.15 : 0.85 ; RMSE = 0.8596596
Weights - 0.16 : 0.84 ; RMSE = 0.8597519
Weights - 0.17 : 0.83 ; RMSE = 0.8598567
Weights - 0.18 : 0.82 ; RMSE = 0.8599739
Weights - 0.19 : 0.81 ; RMSE = 0.8601034
Weights - 0.20 : 0.80 ; RMSE = 0.8602453
Weights - 0.21 : 0.79 ; RMSE = 0.8603996
Weights - 0.22 : 0.78 ; RMSE = 0.8605662
Weights - 0.23 : 0.77 ; RMSE = 0.8607452
Weights - 0.24 :