<font color='tomato'><font color="#CC3D3D"><p>
# NCF using Implicit Ratings

### Import modules

In [1]:
import pandas as pd
import numpy as np
import pickle, random, os
import warnings; warnings.filterwarnings("ignore")
import matplotlib.pylab as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
print(tf.__version__)

2.13.0


### Set random seeds to make your results reproducible

In [2]:
# 매번 모델링을 할 때마다 동일한 결과를 얻으려면 아래 코드를 실행해야 함.

def reset_seeds(reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")

    np.random.seed(1)
    random.seed(2)
    tf.compat.v1.set_random_seed(3)
#    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # for GPU
    print("RANDOM SEEDS RESET") 
   
reset_seeds()

RANDOM SEEDS RESET


### Step 1: Load and process the data

##### Read data

In [3]:
# 도서판매 트랜잭션을 Negative Sampling을 통해 Implicit 학습/평가 데이터로 변환한 데이터
train, test = pd.read_pickle('binary_ratings.pkl')

print(train.shape,test.shape)
train.head()

FileNotFoundError: [Errno 2] No such file or directory: 'binary_ratings.pkl'

### Step 2: Define the model architecture

##### Stack layers from input to output

In [None]:
# Variable 초기화 
K = 8                      # Latent factor 수 
M = train.USER.max() + 1   # Number of users
N = train.ITEM.max() + 1   # Number of items

M, N

In [None]:
# Input layer
user = keras.Input(shape=(1, ))                         
item = keras.Input(shape=(1, ))                      

# Embedding layer
MF_Embedding_User = keras.layers.Embedding(M, K)(user)          
MF_Embedding_Item = keras.layers.Embedding(N, K)(item)       

MLP_Embedding_User = keras.layers.Embedding(M, 8)(user)  # 8 => NCF 논문 구현체 참고
MLP_Embedding_Item = keras.layers.Embedding(N, 8)(item)

# GMF part
user_latent = keras.layers.Flatten()(MF_Embedding_User)
item_latent = keras.layers.Flatten()(MF_Embedding_Item)

gmf_output = keras.layers.Multiply()([user_latent, item_latent]) # element-wise product

# MLP part 
user_latent = keras.layers.Flatten()(MLP_Embedding_User)
item_latent = keras.layers.Flatten()(MLP_Embedding_Item)

x = keras.layers.Concatenate()([user_latent, item_latent])
x = keras.layers.Dense(16, activation='elu')(x)  # 최신 activation 함수로 변경
x = keras.layers.Dense(8, activation='elu')(x)
mlp_output = keras.layers.Dense(4, activation='elu')(x)

# Concatenate MF and MLP parts
x = keras.layers.Concatenate()([gmf_output, mlp_output])
x = keras.layers.Dropout(0.3)(x)   # 과대적합을 줄이기 위해 Dropout 추가

# Final prediction layer
output = keras.layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs=[user, item], outputs=output)

##### Summarize & visualize the model

In [None]:
model.summary()

In [None]:
# 연결 그래프 시각화
tf.keras.utils.plot_model(model, show_shapes=True, rankdir="BT")

### Step 3: Choose the optimizer and the cost function

In [None]:
# loss, optimizer, metrics 설정
model.compile(
    loss='binary_crossentropy',                  # 이진분류를 위한 손실함수
    optimizer=keras.optimizers.legacy.RMSprop(lr=1e-3), # 옵티마이저 변경
    metrics=["accuracy"]
)

### Step 4: Train the model

##### Set learning conditions & fit the model 

In [None]:
hist = model.fit(
    x=[train.USER, train.ITEM],
    y=train.RATING,
    validation_split=0.1,
    batch_size=256, 
    epochs=20, 
    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)],
    shuffle=False,
    verbose=2
)

##### Visualize training history 

In [None]:
# Plot RMSE

plt.plot(hist.history['loss'], label="train")
plt.plot(hist.history['val_loss'], label="valid")
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# 예측 rating(0~1) 빈도 도식화
plt.hist(model.predict([test.USER, test.ITEM]))
plt.show()

### Step 5: Make recommendations

##### Make top-K recommendations

In [None]:
top_k = 10        # 추천 아이템 갯수
test_users = 100  # 추천 대상자 수

# 예측을 위해 아이템수만큼 사용자 ID를 복제하는 과정
target_users = []
target_items = []
for u in np.sort(test.USER.unique()[:test_users]):
    for i in test.ITEM.unique():
        target_users.append(u)   
        target_items.append(i)
target_users = np.array(target_users)      
target_items = np.array(target_items)    

# 예측 스코어를 얻고 평가를 위한 데이터프레임 형식으로 변환하는 과정
pred_scores = model.predict([target_users, target_items]).flatten()
recommendations = (pd.DataFrame({'USER':target_users, 'ITEM':target_items, 'SCORE':pred_scores})
                   .sort_values(by=['USER','SCORE'], ascending=False)
                   .groupby('USER')
                   .head(top_k))
recommendations

##### Evaluate the recommendation performance

In [None]:
# Hit List
pd.merge(recommendations, test, on=['USER','ITEM'])

In [None]:
# MS Recommender 패키지에서 제공하는 추천시스템 평가함수
from msr.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k

args = [test, recommendations]
kwargs = dict(col_user='USER', 
              col_item='ITEM', 
              col_rating='RATING', 
              col_prediction='SCORE', 
              relevancy_method='top_k', 
              k=10)

eval_map = map_at_k(*args, **kwargs)
eval_ndcg = ndcg_at_k(*args, **kwargs)
eval_precision = precision_at_k(*args, **kwargs)
eval_recall = recall_at_k(*args, **kwargs)

print(f"Model: NCF",
      f"Precision@{top_k}:\t {eval_precision:f}",
      f"Recall@{top_k}:\t {eval_recall:f}", 
      f"MAP@{top_k}:\t\t {eval_map:f}",
      f"NDCG@{top_k}:\t {eval_ndcg:f}", sep='\n')

<font color="#CC3D3D"><p>
# End