In [1]:
import pandas as pd
from nmf_model import NMFModel

In [2]:
train_data=pd.read_csv("../datasets/assignment/train_set.csv")
test_data=pd.read_csv("../datasets/assignment/test_set.csv")

In [3]:
user_train_input=train_data['userId'].values.tolist()
user_test_input=test_data['userId'].values.tolist()

item_train_input=train_data['movieId'].values.tolist()
item_test_input=test_data['movieId'].values.tolist()

label_train_input=train_data['preference'].values.tolist()
label_test_input=test_data['preference'].values.tolist()

In [4]:
num_users=len(train_data.userId.unique())+1
num_items=max(item_train_input)+1

In [5]:
model_all=NMFModel(user_train_input, item_train_input, label_train_input, num_users, num_items)

In [6]:
def cal_result(pred_result, label_test_input):
    ans=0
    for i in range(len(pred_result)):
        if label_test_input==0:
            continue
        elif label_test_input[i]==1 and pred_result[i]>0:
            ans+=1
        elif label_test_input[i]==-1 and pred_result[i]<0:
            ans+=1
        else:
            ans-=1
    return ans

### mlp

In [12]:
mlp_model=model_all.get_mlp_model(num_layers=3)

In [13]:
model_all.print_graph('mlp')

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 32)        4431808     user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 32)        4175712     item_input[0][0]                 
____________________________________________________________________________________________

In [14]:
model_all.fit_model(mlp_model, './pretrained/training_1/mlp', n_epochs=1)

Train on 15971866 samples
Epoch 00001: saving model to ./pretrained/training_1/mlp


<tensorflow.python.keras.callbacks.History at 0x26cff46d8>

In [16]:
mlp_pred=model_all.predict('mlp', user_test_input, item_test_input)


In [19]:
cal_result(mlp_pred, label_test_input)

717393

### gmf

In [7]:
gmf_model=model_all.get_gmf_model()

In [8]:
model_all.load_weight('gmf', './pretrained/training_1')

ValueError: Shapes (1,) and (128,) are incompatible

In [21]:
model_all.print_graph('gmf')

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 16)        2215904     user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 16)        2087856     item_input[0][0]                 
____________________________________________________________________________________________

In [23]:
model_all.fit_model(gmf_model, './pretrained/training_1', n_epochs=1)

Train on 15971866 samples
Epoch 00001: saving model to ./pretrained/training_1


<tensorflow.python.keras.callbacks.History at 0x29b6a0128>

In [25]:
gmf_pred=model_all.predict('gmf', user_test_input, item_test_input)


In [26]:
cal_result(gmf_pred, label_test_input)

382161

In [28]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras import regularizers

In [57]:
user_input_layer=layers.Input(shape=(1,), dtype='int32', name='user_input')
item_input_layer=layers.Input(shape=(1,), dtype='int32', name='item_input')
        
#num_users=self.num_users
#num_items=self.num_items
        
num_factors=16
num_layers=3
layer1_dim=num_factors*(2**(num_layers-1))


# GMF embedding layer
GMF_user_embedding=layers.Embedding(input_dim=num_users, output_dim=(int)(num_factors), embeddings_regularizer=regularizers.l2(0.), name='GMF_user_embedding', input_length=1)
GMF_item_embedding=layers.Embedding(input_dim=num_items, output_dim=(int)(num_factors), embeddings_regularizer=regularizers.l2(0.), name='GMF_item_embedding', input_length=1)

# MLP embedding layer
MLP_user_embedding=layers.Embedding(input_dim=num_users, output_dim=(int)(layer1_dim/2), embeddings_regularizer=regularizers.l2(0.), name='MLP_user_embedding', input_length=1)
MLP_item_embedding=layers.Embedding(input_dim=num_items, output_dim=(int)(layer1_dim/2), embeddings_regularizer=regularizers.l2(0.), name='MLP_item_embedding', input_length=1)

#flatten GMF embedding vector
GMF_user_latent=layers.Flatten()(GMF_user_embedding(user_input_layer))
GMF_item_latent=layers.Flatten()(GMF_item_embedding(item_input_layer))

# flatten MLP embeddding vector
MLP_user_latent=layers.Flatten()(MLP_user_embedding(user_input_layer))
MLP_item_latent=layers.Flatten()(MLP_item_embedding(item_input_layer))
        
# gmf - element wise product
GMF_vector=layers.multiply([GMF_user_latent, GMF_item_latent])
GMF_vector=layers.BatchNormalization()(GMF_vector)
# 나중에 삭제해도 될 듯(잘못만든듯)
GMF_vector=layers.Dense(128, activation='tanh')(GMF_vector)
        
# mlp
MLP_vector=layers.concatenate([MLP_user_latent, MLP_item_latent])
MLP_vector=layers.BatchNormalization()(MLP_vector)
        
for i in range(num_layers-1):
    MLP_vector=layers.Dense((int)(layer1_dim/(2**i)), activation='tanh', name='layer%s' %str(i+1))(MLP_vector)
    MLP_vector=layers.BatchNormalization()(MLP_vector)

#NeuMF layer
NeuMF_vector=layers.concatenate([GMF_vector, MLP_vector])
prediction=layers.Dense(1, activation='tanh', name='prediction')(NeuMF_vector)
        
model=Model([user_input_layer, item_input_layer], prediction)


In [58]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
MLP_user_embedding (Embedding)  (None, 1, 32)        4431808     user_input[0][0]                 
__________________________________________________________________________________________________
MLP_item_embedding (Embedding)  (None, 1, 32)        4175712     item_input[0][0]                 
____________________________________________________________________________________________

In [66]:
## get weight
#GMF embedding
gmf_user_embedding=gmf_model.get_layer('user_embedding').get_weights()
gmf_item_embedding=gmf_model.get_layer('item_embedding').get_weights()
model.get_layer('GMF_user_embedding').set_weights(gmf_user_embedding)
model.get_layer('GMF_item_embedding').set_weights(gmf_item_embedding)

#MLP embedding
mlp_user_embedding=mlp_model.get_layer('user_embedding').get_weights()
mlp_item_embedding=mlp_model.get_layer('item_embedding').get_weights()
model.get_layer('MLP_user_embedding').set_weights(mlp_user_embedding)
model.get_layer('MLP_item_embedding').set_weights(mlp_item_embedding)
        
for i in range(num_layers-1):
    tmp=str(i+1)
    mlp_layer=mlp_model.get_layer('layer%s' %tmp).get_weights()
    model.get_layer('layer%s' %tmp).set_weights(mlp_layer)

#prediction layer
gmf_prediction=gmf_model.get_layer('prediction').get_weights()
mlp_prediction=mlp_model.get_layer('prediction').get_weights()
        
new_weights=np.concatenate((gmf_prediction[0], mlp_prediction[0]), axis=0)
#new_b=gmf_prediction[1]+ mlp_prediction[1]
model.get_layer('prediction').set_weights([0.5*new_weights, 0.5*new_b])


In [None]:
model_all.fit_model(model, './pretrained/training_1/nmf', n_epochs=1)

Train on 15971866 samples