In [76]:
import pandas as pd
import numpy as np
import pickle

dataframes = pd.read_pickle('recom_data/user_category_dummy3.pkl')

df_counts = dataframes['view_counts']
df_users = dataframes['users']

with open('recom_data/category_dict.pkl', 'rb') as handle:
    category_dict = pickle.load(handle)

item_similarity = pd.read_pickle('recom_data/item_similarity.pkl')

In [77]:
# IBCF에서 사용할 유저 dummy데이터입니다.
db_user_data = [[1, 'F', 26]]
df_db_user_data = pd.DataFrame(db_user_data, columns=['user', 'gender', 'age'])
new_df_users = df_users.append(df_db_user_data)

# 이 유저는 1, 2, 3, 4, 31, 32, 33번 카테고리의 제품을 봤네요..
db_view_data = [[1, 1, 2], [1, 2, 2], [1, 3, 1], [1, 4, 3], [1, 31, 1]]
df_db_view_data = pd.DataFrame(db_view_data, columns=['user', 'category', 'count'])
new_df_counts = df_counts.append(df_db_view_data)

view_matrix = new_df_counts.pivot(index='user', columns='category', values='count')

In [78]:
from sklearn.model_selection import train_test_split
x = new_df_counts.copy()
y = new_df_counts['user']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)

In [28]:
def recom_category(user_id, n_items=20):
    for category in view_matrix:
        # 모든 카테고리에 대해서 예측값을 만들어줍니다.
        user_category.loc[category] = IBCF_model(user_id, category)
    category_sort = user_category.sort_values(ascending=False)[:n_items]
    # 예측값을 sorting하고.... n_items만큼만 뽑아줍니다.
    return category_sort

def IBCF_model(user, category):
    """
        예측값을 만들어주는 Model입니당
        특정 유저 id와 특정 카테고리 no를 토대로 예측값을 만들어줍니다.
        item_similarity: 미리 만들어둔 코사인 유사도로 계산한 category의 유사도입니다.
        user_category: 해당 user의 카테고리별 조회수입니다.
    """
    sim_scores = item_similarity[category]
    # 해당 카테고리와 다른 카테고리의 유사도들입니다.
    user_viewing = user_category.T
    # 해당 user의 카테고리별 조회수입니다. (계산을 위해 전치행렬로 만듭니다.)
    non_viewing_idx = user_viewing[user_viewing.isnull()].index
    # user가 조회하지 않은 카테고리의 index를 찾습니다.
    user_viewing = user_viewing.dropna()
    sim_scores = sim_scores.drop(non_viewing_idx)
    # user_viewing과 sim_scores에서 비어있는거 제거합니다. (계산속도 향상용)
    pred_viewing = np.dot(sim_scores, user_viewing) / sim_scores.sum()
    # 예측값은 유저의 조회 기록을 해당 카테고리와의 유사도로 가중평균한 값입니다.
    return pred_viewing

test_user = {
    'user_id': 1,
    'gender': 'F',
    'age': 26
}

user_category = view_matrix.loc[test_user['user_id']].copy()
# test_user의 카테고리별 조회수입니다.
pred_user_category = recom_category(user_id=test_user['user_id'], n_items=10)
# 추천 카테고리를 가져옵니다. (위 함수 참조)
recommend_category = pd.DataFrame(pred_user_category).reset_index()
recommend_category['category'] = recommend_category['category'].map(lambda x: category_dict[x])
# 카테고리 no를 카테고리명으로 바꿉니다.
print(recommend_category)

     category         1
0       남성 구두  1.616786
1    기타 신발 용품  1.610745
2       남성 하의  1.608433
3     헤어 스프레이  1.602009
4       헤어 왁스  1.600886
5    기타 농구 용품  1.600811
6          아대  1.600481
7         단지갑  1.600388
8  기타 헤어 스타일링  1.598769
9       남성 향수  1.598581


In [67]:
def recom_category(user_id, n_items=20, neighbor_size=28):
    for category in view_matrix:
        # 모든 카테고리에 대해서 예측값을 만들어줍니다.
        user_category.loc[category] = IBCF_model(user_id, category, neighbor_size)
    category_sort = user_category.sort_values(ascending=False)[:n_items]
    # 예측값을 sorting하고.... n_items만큼만 뽑아줍니다.
    return category_sort

def IBCF_model(user, category, neighbor_size):
    """
        예측값을 만들어주는 Model입니당
        특정 유저 id와 특정 카테고리 no를 토대로 예측값을 만들어줍니다.
        item_similarity: 미리 만들어둔 코사인 유사도로 계산한 category의 유사도입니다.
        user_category: 해당 user의 카테고리별 조회수입니다.
    """
    sim_scores = item_similarity[category]
    sim_scores[category] = 0
    # 해당 카테고리와 다른 카테고리의 유사도들입니다.
    user_viewing = source_category.T
    # 해당 user의 카테고리별 조회수입니다. (계산을 위해 전치행렬로 만듭니다.)
    non_viewing_idx = user_viewing[user_viewing.isnull()].index
    # user가 조회하지 않은 카테고리의 index를 찾습니다.
    user_viewing = user_viewing.dropna()
    sim_scores = sim_scores.drop(non_viewing_idx)
    # user_viewing과 sim_scores에서 비어있는거 제거합니다. (계산속도 향상용)

    if sim_scores.sum() >= 1.5:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        user_viewing = np.array(user_viewing)
        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        user_viewing = user_viewing[user_idx][-neighbor_size:]
        pred_viewing = np.dot(sim_scores, user_viewing) / sim_scores.sum()
    else:
        pred_viewing = 0
                
    return pred_viewing

test_user = {
    'user_id': 1,
    'gender': 'F',
    'age': 26
}

# user_category = view_matrix.loc[test_user['user_id']].copy()
# # test_user의 카테고리별 조회수입니다.
# pred_user_category = recom_category(user_id=test_user['user_id'], n_items=10, neighbor_size=28)
# # 추천 카테고리를 가져옵니다. (위 함수 참조)

test = 90001
user_category = view_matrix.loc[test].copy()
source_category = view_matrix.loc[test].copy()

pred_user_category = recom_category(user_id=test, n_items=10, neighbor_size=28)

recommend_category = pd.DataFrame(pred_user_category).reset_index()
recommend_category['category'] = recommend_category['category'].map(lambda x: category_dict[x])
# 카테고리 no를 카테고리명으로 바꿉니다.
print(recommend_category)

  category     90001
0    화방 용품  1.840165
1       기타  1.711164
2      DVD  1.698906
3    기타 음반  1.660707
4   유아동 도서  1.660676
5       잡지  1.642778
6     블루레이  1.625693
7       음반  1.607777
8    필기 용품  1.527456
9    공용 시계  1.519275


In [14]:
# def recom_category(user_id, n_items=20, neighbor_size=28):
#     for category in view_matrix:
#         predictions.loc[category] = IBCF_model(user_id, category, neighbor_size)
#     print(predictions)
#     category_sort = predictions.sort_values(by='count', ascending=False)[:n_items]
#     return category_sort

# def IBCF_model(user, category, neighbor_size):
#     sim_scores = item_similarity[category]
#     # 해당 카테고리와 다른 카테고리의 유사도들입니다.
#     user_viewing = user_category.T
#     # 해당 user의 카테고리별 조회수입니다. (계산을 위해 전치행렬로 만듭니다.)
#     non_viewing_idx = user_viewing[user_viewing.isnull()].index
#     # user가 조회하지 않은 카테고리의 index를 찾습니다.
#     user_viewing = user_viewing.dropna()
#     sim_scores = sim_scores.drop(non_viewing_idx)
#     # user_viewing과 sim_scores에서 비어있는거 제거합니다. (계산속도 향상용)

#     neighbor_size = min(neighbor_size, len(sim_scores))
#     sim_scores = np.array(sim_scores)
#     user_viewing = np.array(user_viewing)
#     user_idx = np.argsort(sim_scores)
#     sim_scores = sim_scores[user_idx][-neighbor_size:]
#     user_viewing = user_viewing[user_idx][-neighbor_size:]
# #     print(category, sim_scores.sum())
# #     print(sim_scores)
# #     print(user_viewing)
#     print(category, sim_scores.sum())
#     if sim_scores.sum():
#         pred_viewing = np.dot(sim_scores, user_viewing) / sim_scores.sum()
#     else:
#         pred_viewing = 1
# #     print(pred_viewing)
# #     print('-' * 80)

#     return pred_viewing

# user_category = view_matrix.loc[1].copy()
# predictions = pd.DataFrame(0, index=view_matrix.columns, columns=['count'])
# # test_user의 카테고리별 조회수입니다.
# pred_user_category = recom_category(user_id=1, n_items=10, neighbor_size=423)
# print(pred_user_category)
# # # 추천 카테고리를 가져옵니다. (위 함수 참조)
# recommend_category = pd.DataFrame(pred_user_category).reset_index()
# recommend_category['category'] = recommend_category['category'].map(lambda x: category_dict[x])
# # # 카테고리 no를 카테고리명으로 바꿉니다.
# print(recommend_category)

1 0.1150792564678299
2 0.16609921481802295
3 0.0
4 0.0
11 0.2873341998588751
12 0.3108583865108603
13 0.0
14 0.0
21 0.0
31 0.114310178826243
32 0.14185817992826882
33 0.15129115643863061
34 0.14651801650185395
35 0.0
41 0.3854387608742813
42 0.24888747625608315
43 0.35536914257654517
44 0.03540577904883633
45 0.0
51 0.0
52 0.28165824057607836
61 0.0
62 0.0
63 0.23569410516098613
71 0.13106951779914555
72 0.30866316009703093
73 0.0
81 0.0
82 0.1023308916689615
83 0.1602005485708777
91 0.16429237749983602
92 0.28143519731442745
93 0.15200661715051755
101 0.0
102 0.23687627752279194
103 0.18921025749702639
104 0.0
105 0.0
111 0.20390202292486884
112 0.1521962688290729
113 0.23698557205793186
114 0.08735190033131082
115 0.104785888973495
116 0.10555040544981656
121 0.12556569972983392
122 0.1933863315952284
123 0.0
131 0.10868744955542235
132 0.1809689546439121
133 0.0
141 0.17107596098536743
142 0.0
151 0.0
152 0.10207868833811343
153 0.0
161 0.12695033114402746
162 0.17501598667729315
16

1094 0.10903031794716125
1101 0.032965800152035964
1102 0.0734706460386654
             count
category          
1         1.586954
2         1.571111
3         1.000000
4         1.000000
11        1.556328
...            ...
1092      1.230385
1093      1.000000
1094      1.000000
1101      1.000000
1102      1.000000

[423 rows x 1 columns]
          count
category       
943         2.0
1034        2.0
992         2.0
991         2.0
983         2.0
982         2.0
261         2.0
262         2.0
263         2.0
264         2.0
         category  count
0             스쿼시    2.0
1  기타 오토바이/스쿠터 용품    2.0
2          자전거 부품    2.0
3             자전거    2.0
4     기타 스키/보드 용품    2.0
5           스키 장비    2.0
6          바디 클렌저    2.0
7           바디 로션    2.0
8        바디 크림/오일    2.0
9        기타 바디 케어    2.0


In [None]:
# 이건 피어슨 상관계수로 한번 해봤습니다..........
item_similarity2 = pd.read_pickle('recom_data/item_similarity_pearson.pkl')

def recom_category(user_id, n_items=20):
    for category in view_matrix:
        user_category.loc[category] = IBCF_model(user_id, category)
    category_sort = user_category.sort_values(ascending=False)[:n_items]
    return category_sort

def IBCF_model(user, category):
    sim_scores = item_similarity2[category]
    user_viewing = user_category.T
    non_viewing_idx = user_viewing[user_viewing.isnull()].index
    user_viewing = user_viewing.dropna()
    sim_scores = sim_scores.drop(non_viewing_idx)
    pred_viewing = np.dot(sim_scores, user_viewing) / sim_scores.sum()
    return pred_viewing

user_category = view_matrix.loc[test_user['user_id']].copy()

pred_user_category = recom_category(user_id=test_user['user_id'], n_items=10)
recommend_category = pd.DataFrame(pred_user_category).reset_index()
recommend_category['category'] = recommend_category['category'].map(lambda x: category_dict[x])
print(recommend_category)

In [None]:
#####################################
# 이하는 Model 정확도 측정입니다.   #
#####################################

In [None]:
# RMSE 계산해주는 함수
def RMSE(y_true, y_pred):
    return np.sqrt(np.mean((np.array(y_true)- np.array(y_pred)) ** 2))

# 모델별 RMSE 계산
def score(model):
    id_pairs = zip(x_test['user'], x_test['category'])
    y_pred = np.array([model(user, category) for (user, category) in id_pairs])
    y_true = np.array(x_test['count'])
    return RMSE(y_true, y_pred)

train_view_matrix = x_train.pivot(index='user', columns='category', values='count')
view_matrix_t = np.transpose(train_view_matrix)

In [None]:
def IBCF_model(user, category):
    sim_scores = item_similarity[category]
    user_viewing = view_matrix_t[user]
    non_viewing_idx = user_viewing[user_viewing.isnull()].index
    user_viewing = user_viewing.dropna()
    sim_scores = sim_scores.drop(non_viewing_idx)
    pred_viewing = np.dot(sim_scores, user_viewing) / total_sim_mean * item_similarity[category].sum()
    return pred_viewing

total_sim_mean = item_similarity.sum().mean()
# 아마..없는 값을 평균으로 안 쓰고 0으로 넣어서 오차가 큰가봐요
print(score(IBCF_model))

In [79]:
# RMSE 계산해주는 함수
def RMSE(y_true, y_pred):
    return np.sqrt(np.mean((np.array(y_true)- np.array(y_pred)) ** 2))

# 모델별 RMSE 계산
def score(model, neighbor_size):
    id_pairs = zip(x_test['user'], x_test['category'])
    y_pred = np.array([model(user, category, neighbor_size) for (user, category) in id_pairs])
    y_true = np.array(x_test['count'])
    return RMSE(y_true, y_pred)

train_view_matrix = x_train.pivot(index='user', columns='category', values='count')
view_matrix_t = np.transpose(train_view_matrix)

In [80]:
def IBCF_model(user, category, neighbor_size):
    sim_scores = item_similarity[category]
    user_viewing = view_matrix_t[user]
    non_viewing_idx = user_viewing[user_viewing.isnull()].index
    user_viewing = user_viewing.dropna()
    sim_scores = sim_scores.drop(non_viewing_idx)

    if sim_scores.sum() >= 1.5:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        user_viewing = np.array(user_viewing)
        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        user_viewing = user_viewing[user_idx][-neighbor_size:]
        pred_viewing = np.dot(sim_scores, user_viewing) / sim_scores.sum()
    else:
        pred_viewing = 0
    
    return pred_viewing

# print(score(IBCF_model, neighbor_size=423))

In [83]:
view_matrix = new_df_counts.pivot(index='user', columns='category', values='count')
view_matrix_t = view_matrix.T

In [None]:
def make_pred(user):
#     pred_matrix.loc[user] = view_matrix_t[user]
    viewed_categories = view_matrix_t[user].dropna()
    temp_pred = pd.DataFrame(0, index=view_matrix.columns, columns=['count'])
    for category in viewed_categories.index:
        if viewed_categories[category]:
            temp_pred['count'] = temp_pred['count'] + item_similarity[category] * viewed_categories[category] / total_sim_mean * item_similarity[category].sum() 
    pred_matrix.loc[user] = temp_pred['count']
    
pred_matrix = pd.DataFrame(0, index=view_matrix.index, columns=view_matrix.columns)
total_sim_mean = item_similarity.sum().mean()

for user in view_matrix.index:
    make_pred(user)
print(pred_matrix)

In [None]:
print(pred_matrix.loc[90001].sort_values())

In [None]:
results = []
for size in range(1, 351, 20):
    error = score(IBCF_model, neighbor_size=size)
    print('size: ', size, 'error: ', error)
    results.append([size, error])

size:  1 error:  6.142782756621763e-17
size:  21 error:  1.011052057187185
size:  41 error:  1.0442725543579574
size:  61 error:  1.054173375662542


In [None]:
# import matplotlib.pyplot as plt
# %matplotlib inline

# xs = [result[0] for result in results[20:]]
# ys = [result[1] for result in results[20:]]
# plt.plot(xs, ys)
# plt.show()

In [None]:
##################################
# 언제 전환할 것인가? 선택하기   #
##################################
"""
    기존 그룹화 추천의 경우 데이터가 없는 경우는 Error가 낮게 나올 수밖에 없음
    모델과 에러가 큰 차이가 안 날 경우 전환하는 걸로 얘기해야 될 듯....
    그러니까요.. 음....
    데이터를 지금 몇개 넣어놨는데... IBCF로 예측값을 만들고...
    그 데이터랑 비교해서 Error가 어느정도 이하면 타협하고 넘어가는 걸로 합시다!
    기본 추천 시스템 vs IBCF 비교는 어렵네요.
    
    아 아닌가;;
    나중에 할게요!! 지금 중요한거아닙니당
"""

In [None]:
dataframes = pd.read_pickle('recom_data/user_category_dummy3.pkl')

df_counts = dataframes['view_counts']
df_users = dataframes['users']

with open('recom_data/category_dict.pkl', 'rb') as handle:
    category_dict = pickle.load(handle)

item_similarity = pd.read_pickle('recom_data/item_similarity.pkl')

# IBCF에서 사용할 유저 dummy데이터입니다.
db_user_data = [[1, 'F', 26]]
df_db_user_data = pd.DataFrame(db_user_data, columns=['user', 'gender', 'age'])
new_df_users = df_users.append(df_db_user_data)

# 이 유저는 1, 2, 3, 4, 31, 32, 33번 카테고리의 제품을 봤네요..
db_view_data = [[1, 1, 1], [1, 11, 1]]
df_db_view_data = pd.DataFrame(db_view_data, columns=['user', 'category', 'count'])
new_df_counts = df_counts.append(df_db_view_data)

view_matrix = new_df_counts.pivot(index='user', columns='category', values='count')

from sklearn.model_selection import train_test_split
x = new_df_counts.copy()
y = new_df_counts['user']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)

In [None]:
def IBCF_model(user, category, neighbor_size=28):
    sim_scores = item_similarity[category]
    user_viewing = view_matrix_t[user]
    non_viewing_idx = user_viewing[user_viewing.isnull()].index
    user_viewing = user_viewing.dropna()
    sim_scores = sim_scores.drop(non_viewing_idx)
    
    neighbor_size = min(neighbor_size, len(sim_scores))
    sim_scores = np.array(sim_scores)
    user_viewing = np.array(user_viewing)
    category_idx = np.argsort(sim_scores)
    sim_scores = sim_scores[category_idx][-neighbor_size:]
    user_viewing = user_viewing[category_idx][-neighbor_size:]
    pred_viewing = np.dot(sim_scores, user_viewing) / sim_scores.sum()
    
    return pred_viewing

test_user = {
    'user_id': 1,
    'gender': 'F',
    'age': 26
}

user_category = view_matrix.loc[test_user['user_id']].copy()

pred_data_df = new_df_counts[new_df_counts['user'] == test_user['user_id']].copy()
pred_data_df['pred_ibcf'] = pred_data_df['category'].apply(lambda category: IBCF_model(test_user['user_id'], category))

print(pred_data_df)

In [None]:
"""
    현재 작성 중입니다... 기본 모델로 예측값 만들거에용...
"""

merged_df = pd.merge(new_df_counts, new_df_users, left_on='user', right_on='user')
# print(merged_df)
test2222222 = (merged_df.groupby(['gender', 'age', 'category'])['count']
               .agg(['sum', 'size']))
test2222222['mean'] = test2222222['sum'] / test2222222['size']
# print(test2222222)

total_avg = (merged_df.groupby(['category'])['count']
            .agg(['sum', 'size']))
total_avg['mean'] = total_avg['sum'] / total_avg['size']

def pred_category_view(category):
    try:
        return test2222222.loc['F'].loc[20].loc[category]['mean']
    except:
        return total_avg.loc[category]['mean']
    
pred_data_df['pred_basic'] = pred_data_df['category'].apply(pred_category_view)

print(pred_data_df)

pred_data_df['error_ibcf'] = ((pred_data_df['count'] - pred_data_df['pred_ibcf']) ** 2) ** 0.5
pred_data_df['error_basic'] = ((pred_data_df['count'] - pred_data_df['pred_basic']) ** 2) ** 0.5

print(pred_data_df['error_ibcf'].mean())
print(pred_data_df['error_basic'].mean())

In [None]:
dataframes = pd.read_pickle('recom_data/user_category_dummy3.pkl')

df_counts = dataframes['view_counts']
df_users = dataframes['users']

with open('recom_data/category_dict.pkl', 'rb') as handle:
    category_dict = pickle.load(handle)

item_similarity = pd.read_pickle('recom_data/item_similarity.pkl')

# IBCF에서 사용할 유저 dummy데이터입니다.
db_user_data = [[1, 'F', 26]]
df_db_user_data = pd.DataFrame(db_user_data, columns=['user', 'gender', 'age'])
new_df_users = df_users.append(df_db_user_data)

# 이 유저는 1, 2, 3, 4, 31, 32, 33번 카테고리의 제품을 봤네요..
dummy_datas = [
    [[1, 1, 3], [1, 11, 1]],
    [[1, 1, 2], [1, 11, 2]],
    [[1, 1, 2], [1, 11, 2], [1, 41, 3]],
    [[1, 1, 2], [1, 11, 2], [1, 41, 3], [1, 51, 2], [1, 81, 1]],
    [[1, 1, 2], [1, 11, 2], [1, 31, 2], [1, 801, 2]],
    [[1, 1, 2], [1, 11, 2], [1, 31, 1], [1, 801, 3]],
    [[1, 1, 2], [1, 11, 1], [1, 31, 1], [1, 801, 4]],
    [[1, 1, 1], [1, 11, 1], [1, 31, 1], [1, 801, 5]],
]

for dummy_data in dummy_datas:
    print(dummy_data)
    db_view_data = dummy_data
    df_db_view_data = pd.DataFrame(db_view_data, columns=['user', 'category', 'count'])
    new_df_counts = df_counts.append(df_db_view_data)

    view_matrix = new_df_counts.pivot(index='user', columns='category', values='count')

    from sklearn.model_selection import train_test_split
    x = new_df_counts.copy()
    y = new_df_counts['user']
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)

    def IBCF_model(user, category, neighbor_size=28):
        sim_scores = item_similarity[category]
        user_viewing = view_matrix_t[user]
        non_viewing_idx = user_viewing[user_viewing.isnull()].index
        user_viewing = user_viewing.dropna()
        sim_scores = sim_scores.drop(non_viewing_idx)

        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        user_viewing = np.array(user_viewing)
        category_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[category_idx][-neighbor_size:]
        user_viewing = user_viewing[category_idx][-neighbor_size:]
        pred_viewing = np.dot(sim_scores, user_viewing) / sim_scores.sum()

        return pred_viewing

    test_user = {
        'user_id': 1,
        'gender': 'F',
        'age': 26
    }

    user_category = view_matrix.loc[test_user['user_id']].copy()

    pred_data_df = new_df_counts[new_df_counts['user'] == test_user['user_id']].copy()
    pred_data_df['pred_ibcf'] = pred_data_df['category'].apply(lambda category: IBCF_model(test_user['user_id'], category))

    merged_df = pd.merge(new_df_counts, new_df_users, left_on='user', right_on='user')
    # print(merged_df)
    test2222222 = (merged_df.groupby(['gender', 'age', 'category'])['count']
                   .agg(['sum', 'size']))
    test2222222['mean'] = test2222222['sum'] / test2222222['size']
    # print(test2222222)

    total_avg = (merged_df.groupby(['category'])['count']
                .agg(['sum', 'size']))
    total_avg['mean'] = total_avg['sum'] / total_avg['size']

    def pred_category_view(category):
        try:
            return test2222222.loc['F'].loc[20].loc[category]['mean']
        except:
            return total_avg.loc[category]['mean']

    pred_data_df['pred_basic'] = pred_data_df['category'].apply(pred_category_view)

    pred_data_df['error_ibcf'] = ((pred_data_df['count'] - pred_data_df['pred_ibcf']) ** 2) ** 0.5
    pred_data_df['error_basic'] = ((pred_data_df['count'] - pred_data_df['pred_basic']) ** 2) ** 0.5

    print(pred_data_df['error_ibcf'].mean())
    print(pred_data_df['error_basic'].mean())