In [17]:
import pandas as pd
import numpy as np
import tensorflow as tf
import gc
from typing import List

In [18]:
#df_train = {'user_id': [0,0,1,1,1,2,2,3,3,3,0,0,0,1,1,2,2,2,3,3], 'item_id': [1,4,0,2,3,3,4,0,1,2,0,2,3,1,4,0,1,2,3,4], 'interaction': [0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1]}
df_train = {'user_id': [0,0,0,1,1,1,2,2,3,3,3,0,0,1,1,2,2,2,3,3], 'item_id': [2,3,4,0,2,3,3,4,0,1,2,0,1,1,4,0,1,2,3,4], 'interaction': [0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1]}

In [19]:
df_train = pd.DataFrame(df_train)

In [20]:
df_test = df_train

In [21]:
df_train

Unnamed: 0,user_id,item_id,interaction
0,0,2,0
1,0,3,0
2,0,4,0
3,1,0,0
4,1,2,0
5,1,3,0
6,2,3,0
7,2,4,0
8,3,0,0
9,3,1,0


In [22]:
import tensorflow.keras as keras
from tensorflow.keras.layers import (
    Concatenate,
    Dense,
    Embedding,
    Flatten,
    Input,
    Multiply,
)
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2


def create_ncf(
    number_of_users: int,
    number_of_items: int,
    latent_dim_mf: int = 64,
    latent_dim_mlp: int = 4,
    reg_mf: int = 0,
    reg_mlp: int = 0.01,
    dense_layers: List[int] = [8, 4],
    reg_layers: List[int] = [0.01, 0.01],
    activation_dense: str = "relu",
) -> keras.Model:

    # input layer
    user = Input(shape=(), dtype="int32", name="user_id")
    item = Input(shape=(), dtype="int32", name="item_id")

    # embedding layers
    mf_user_embedding = Embedding(
        input_dim=number_of_users,
        output_dim=latent_dim_mf,
        name="mf_user_embedding",
        embeddings_initializer="RandomNormal",
        embeddings_regularizer=l2(reg_mf),
        input_length=1,
    )
    mf_item_embedding = Embedding(
        input_dim=number_of_items,
        output_dim=latent_dim_mf,
        name="mf_item_embedding",
        embeddings_initializer="RandomNormal",
        embeddings_regularizer=l2(reg_mf),
        input_length=1,
    )

    mlp_user_embedding = Embedding(
        input_dim=number_of_users,
        output_dim=latent_dim_mlp,
        name="mlp_user_embedding",
        embeddings_initializer="RandomNormal",
        embeddings_regularizer=l2(reg_mlp),
        input_length=1,
    )
    mlp_item_embedding = Embedding(
        input_dim=number_of_items,
        output_dim=latent_dim_mlp,
        name="mlp_item_embedding",
        embeddings_initializer="RandomNormal",
        embeddings_regularizer=l2(reg_mlp),
        input_length=1,
    )

    # MF vector
    mf_user_latent = Flatten()(mf_user_embedding(user))
    mf_item_latent = Flatten()(mf_item_embedding(item))
    mf_cat_latent = Multiply()([mf_user_latent, mf_item_latent])

    # MLP vector
    mlp_user_latent = Flatten()(mlp_user_embedding(user))
    mlp_item_latent = Flatten()(mlp_item_embedding(item))
    mlp_cat_latent = Concatenate()([mlp_user_latent, mlp_item_latent])

    mlp_vector = mlp_cat_latent

    # build dense layers for model
    for i in range(len(dense_layers)):
        layer = Dense(
            dense_layers[i],
            activity_regularizer=l2(reg_layers[i]),
            activation=activation_dense,
            name="layer%d" % i,
        )
        mlp_vector = layer(mlp_vector)

    predict_layer = Concatenate()([mf_cat_latent, mlp_vector])

    result = Dense(
        1, activation="sigmoid", kernel_initializer="lecun_uniform", name="interaction"
    )

    output = result(predict_layer)

    model = Model(
        inputs=[user, item],
        outputs=[output],
    )

    return model

In [23]:
from tensorflow.keras.optimizers import Adam

n_users, n_items = 4, 5
ncf_model = create_ncf(n_users, n_items)

ncf_model.compile(
    optimizer=Adam(),
    loss="binary_crossentropy",
    metrics=[
        tf.keras.metrics.TruePositives(name="tp"),
        tf.keras.metrics.FalsePositives(name="fp"),
        tf.keras.metrics.TrueNegatives(name="tn"),
        tf.keras.metrics.FalseNegatives(name="fn"),
        tf.keras.metrics.BinaryAccuracy(name="accuracy"),
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall"),
        tf.keras.metrics.AUC(name="auc"),
    ],
)
ncf_model._name = "neural_collaborative_filtering"
ncf_model.summary()

Model: "neural_collaborative_filtering"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_id (InputLayer)            [(None,)]            0                                            
__________________________________________________________________________________________________
item_id (InputLayer)            [(None,)]            0                                            
__________________________________________________________________________________________________
mlp_user_embedding (Embedding)  (None, 4)            16          user_id[0][0]                    
__________________________________________________________________________________________________
mlp_item_embedding (Embedding)  (None, 4)            20          item_id[0][0]                    
_____________________________________________________________________

In [24]:
def make_tf_dataset(
    df: pd.DataFrame,
    targets: List[str],
    val_split: float = 0.1,
    batch_size: int = 4,
    seed=42,
):
    """Make TensorFlow dataset from Pandas DataFrame.
    :param df: input DataFrame - only contains features and target(s)
    :param targets: list of columns names corresponding to targets
    :param val_split: fraction of the data that should be used for validation
    :param batch_size: batch size for training
    :param seed: random seed for shuffling data - `None` won't shuffle the data"""

    n_val = round(df.shape[0] * val_split)
    if seed:
        # shuffle all the rows
        x = df.sample(frac=1, random_state=seed).to_dict("series")
    else:
        x = df.to_dict("series")
    y = dict()
    for t in targets:
        y[t] = x.pop(t)
    ds = tf.data.Dataset.from_tensor_slices((x, y))

    ds_val = ds.take(n_val).batch(batch_size)
    ds_train = ds.skip(n_val).batch(batch_size)
    return ds_train, ds_val

In [25]:
# create train and validation datasets
ds_train, ds_val = make_tf_dataset(df_train, ["interaction"])
ds_test, _ = make_tf_dataset(df_test, ["interaction"], val_split=0, seed=None)

In [26]:
import datetime
import os

# define logs and callbacks
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
#early_stopping_callback = tf.keras.callbacks.EarlyStopping(
#    monitor="val_loss", patience=0
#)


train_hist = ncf_model.fit(
    ds_train,
    validation_data=ds_val,
    epochs=40,
    #callbacks=[tensorboard_callback, early_stopping_callback],
    callbacks=[tensorboard_callback],
    verbose=1,
)

Epoch 1/40




Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [27]:
ncf_predictions = ncf_model.predict(ds_test)
df_test["predictions"] = ncf_predictions

In [28]:
df_test

Unnamed: 0,user_id,item_id,interaction,predictions
0,0,2,0,0.546499
1,0,3,0,0.303653
2,0,4,0,0.317334
3,1,0,0,0.410535
4,1,2,0,0.372464
5,1,3,0,0.457902
6,2,3,0,0.301893
7,2,4,0,0.321278
8,3,0,0,0.310026
9,3,1,0,0.31899


In [29]:
data = df_test.pivot(
    index="user_id", columns="item_id", values="predictions"
)
#print("Neural collaborative filtering predictions")
data

item_id,0,1,2,3,4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.661501,0.677724,0.546499,0.303653,0.317334
1,0.410535,0.53292,0.372464,0.457902,0.60344
2,0.657189,0.67861,0.539955,0.301893,0.321278
3,0.310026,0.31899,0.348193,0.65545,0.645988


In [30]:
df_train.pivot(index="user_id", columns="item_id", values="interaction")

item_id,0,1,2,3,4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1,1,0,0,0
1,0,1,0,0,1
2,1,1,1,0,0
3,0,0,0,1,1
