## data setting & 함수 setting

In [1]:
import pandas as pd
import pickle
import numpy as np
from nmf_model import NMFModel

In [2]:
train_data=pd.read_csv("../datasets/assignment/train_set.csv")
test_data=pd.read_csv("../datasets/assignment/test_set.csv")

In [3]:
user_train_input=train_data['userId'].values.tolist()
user_test_input=test_data['userId'].values.tolist()

item_train_input=train_data['movieId'].values.tolist()
item_test_input=test_data['movieId'].values.tolist()

label_train_input=train_data['preference'].values.tolist()
label_test_input=test_data['preference'].values.tolist()

In [4]:
num_users=len(train_data.userId.unique())+1
num_items=max(item_train_input)+1

In [5]:
model_all=NMFModel(user_train_input, item_train_input, label_train_input, num_users, num_items)

In [6]:
def cal_result(pred_result, label_test_input):
    ans=0
    for i in range(len(pred_result)):
        if label_test_input==0:
            continue
        elif label_test_input[i]==1 and pred_result[i]>0:
            ans+=1
        elif label_test_input[i]==-1 and pred_result[i]<0:
            ans+=1
        else:
            ans-=1
    return ans

## gmf

In [7]:
gmf_model=model_all.get_gmf_model()

In [8]:
model_all.print_graph('gmf')

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 16)        2215904     user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 16)        2087856     item_input[0][0]                 
______________________________________________________________________________________________

In [10]:
gmf_hist=model_all.fit_model('gmf', './pretrained/training_2/gmf')

Train on 15971866 samples
Epoch 1/5
Epoch 00001: saving model to ./pretrained/training_2/gmf
Epoch 2/5
Epoch 00002: saving model to ./pretrained/training_2/gmf
Epoch 3/5
Epoch 00003: saving model to ./pretrained/training_2/gmf
Epoch 4/5
Epoch 00004: saving model to ./pretrained/training_2/gmf
Epoch 5/5
Epoch 00005: saving model to ./pretrained/training_2/gmf


In [9]:
gmf_model=model_all.load_weight('gmf', './pretrained/training_2/gmf')

In [None]:
gmf_pred=model_all.predict('gmf', user_test_input, item_test_input)


### gmf 예측 점수

In [None]:
cal_result(gmf_pred, label_test_input)

928167

## mlp

In [10]:
mlp_model=model_all.get_mlp_model(num_layers=3)

In [11]:
model_all.print_graph('mlp')

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 32)        4431808     user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 32)        4175712     item_input[0][0]                 
____________________________________________________________________________________________

In [12]:
mlp_model=model_all.load_weight('mlp', './pretrained/training_2/mlp')

In [15]:
mlp_hist=model_all.fit_model('mlp', './pretrained/training_2/mlp')

Train on 15971866 samples
Epoch 1/5
Epoch 00001: saving model to ./pretrained/training_2/mlp
Epoch 2/5
Epoch 00002: saving model to ./pretrained/training_2/mlp
Epoch 3/5
Epoch 00003: saving model to ./pretrained/training_2/mlp
Epoch 4/5
Epoch 00004: saving model to ./pretrained/training_2/mlp
Epoch 5/5
Epoch 00005: saving model to ./pretrained/training_2/mlp


In [None]:
mlp_pred=model_all.predict('mlp', user_test_input, item_test_input)


### mlp 예측 점수

In [None]:
cal_result(mlp_pred, label_test_input)

902315

## nmf

In [13]:
nmf_model=model_all.get_nmf_model()

In [14]:
model_all.print_graph('nmf')

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
MLP_user_embedding (Embedding)  (None, 1, 32)        4431808     user_input[0][0]                 
__________________________________________________________________________________________________
MLP_item_embedding (Embedding)  (None, 1, 32)        4175712     item_input[0][0]                 
____________________________________________________________________________________________

In [15]:
nmf_pred=model_all.fit_model('nmf', './pretrained/training_2/nmf')

Train on 15971866 samples
Epoch 1/5
Epoch 00001: saving model to ./pretrained/training_2/nmf
Epoch 2/5
Epoch 00002: saving model to ./pretrained/training_2/nmf
Epoch 3/5
Epoch 00003: saving model to ./pretrained/training_2/nmf
Epoch 4/5
Epoch 00004: saving model to ./pretrained/training_2/nmf
Epoch 5/5
Epoch 00005: saving model to ./pretrained/training_2/nmf


In [None]:
nmf_pred=model_all.predict('nmf', user_test_input, item_test_input)


### nmf 예측 점수

In [None]:
cal_result(nmf_pred, label_test_input)

871565

## gmf, mlp, nmf ensemble

In [22]:
nmf_pred

array([0.31575507, 0.6557944 , 0.9454955 , ..., 0.55694944, 0.22265647,
       0.91134447], dtype=float32)

In [23]:
gmf_pred

array([0.28996745, 0.58728915, 0.74631387, ..., 0.31497043, 0.45203266,
       0.8878511 ], dtype=float32)

In [24]:
mlp_pred

array([-0.08990841,  0.5454825 ,  0.82680434, ...,  0.2829082 ,
        0.01542157,  0.8529042 ], dtype=float32)

### 세 모델의 regression 결과 합으로 ensemble

In [27]:
x=np.matrix([gmf_pred, mlp_pred, nmf_pred])

In [28]:
s=x.sum(axis=0)

In [38]:
s[0][0]

matrix([[0.5158141, 1.7885661, 2.5186138, ..., 1.1548281, 0.6901107,
         2.6520998]], dtype=float32)

In [42]:
arr=s.getA()

In [44]:
arr[0]

array([0.5158141, 1.7885661, 2.5186138, ..., 1.1548281, 0.6901107,
       2.6520998], dtype=float32)

### ensemble 예측 점수

In [45]:
cal_result(arr[0], label_test_input)

952481