## Define Model
 - x1: item categorical feature
- x2: user categorical feature
- x3: item numeric features
- x4: user numeric features

In [29]:
import tensorflow as tf
import tensorflow.keras as keras

In [30]:
from tensorboard.plugins.hparams import api as hp

## Feature extraction


In [8]:
import numpy as np
import pandas as pd

In [35]:
def get_item_numeric_features(df):
    numeric_feature_names = [
        "all_rating_min_max",
        "members_min_max",
        "aired_from_min_max",
        "aired_to_min_max",
    ]

    num_df = df[numeric_feature_names]
    return num_df.to_numpy()


def get_user_numeric_features(df):
    numeric_feature_names = [
        "user_rating_ave_min_max",
        "user_rating_std_min_max",
        "user_aired_from_ave_min_max",
        "user_aired_to_ave_min_max",
    ]

    num_df = df[numeric_feature_names]
    return num_df.to_numpy()


def get_multihot_feature(df, feat_name):
    feat_df = df[[feat_name]]
    feat_vecs = feat_df.to_numpy()
    feat_vec = np.apply_along_axis(lambda v: v[0], 1, feat_vecs)
    return feat_vec


def get_label(df):
    label_df = df[["label"]]
    return label_df.to_numpy()


def get_all_features(df):
    return (
        get_multihot_feature(df, "genres_multihot"),
        get_multihot_feature(df, "user_liked_genres_multihot"),
        get_item_numeric_features(df),
        get_user_numeric_features(df),
    )

In [10]:
import os

In [11]:
def data_files():
    filenames = []
    for root, dirs, files in os.walk("../anime-data/dnn_feat_eng"):
        for file in files:
            if file.endswith(".parquet"):
                filenames.append(os.path.join(root, file))

    return filenames

In [12]:
filenames = data_files()
filenames

['../anime-data/dnn_feat_eng/part-00008-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00003-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00004-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00012-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00005-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00009-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00002-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00010-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00007-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime-data/dnn_feat_eng/part-00000-d84f2fb3-ad0e-403f-8f8c-278b9701b758-c000.snappy.parquet',
 '../anime

## Load Data and Train


In [31]:
def build_model_v0(x1_shape, x2_shape, x3_shape, x4_shape):
    x1_input = keras.layers.Input(shape=(x1_shape,))
    x2_input = keras.layers.Input(shape=(x2_shape,))
    x3_input = keras.layers.Input(shape=(x3_shape,))
    x4_input = keras.layers.Input(shape=(x4_shape,))

    # compact embedding for x1 and x2
    compact_x1 = keras.layers.Dense(10)(x1_input)  # 降维到10维
    compact_x2 = keras.layers.Dense(10)(x2_input)  # 降维到10维

    # concat all
    merge = keras.layers.concatenate([compact_x1, compact_x2, x3_input, x4_input])

    # hidden layers (n-th)
    h1 = keras.layers.Dense(128, activation="relu")(merge)
    h2 = keras.layers.Dense(128, activation="relu")(h1)

    # output (yes / no => sigmoid)
    output = keras.layers.Dense(1, activation="sigmoid")(h2)

    model = keras.models.Model(
        inputs=[x1_input, x2_input, x3_input, x4_input], outputs=output
    )

    print(model.summary())

    return model

In [32]:
model = build_model_v0(43, 43, 4, 4)

None


In [33]:
filenames = data_files()

In [37]:
test_x1s = []
test_x2s = []
test_x3s = []
test_x4s = []
test_ys = []

model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"],
)

for filename in filenames:
    df = pd.read_parquet(filename)

    train_df = df.sample(frac=0.8, random_state=1)
    test_df = df.drop(train_df.index)

    train_x1, train_x2, train_x3, train_x4 = get_all_features(train_df)
    test_x1, test_x2, test_x3, test_x4 = get_all_features(test_df)

    train_y = get_label(train_df)
    test_y = get_label(test_df)

    test_x1s.append(test_x1)
    test_x2s.append(test_x2)
    test_x3s.append(test_x3)
    test_x4s.append(test_x4)
    test_ys.append(test_y)

    model.fit([train_x1, train_x2, train_x3, train_x4], train_y, 
        batch_size=16, epochs=2, verbose=1)

Epoch 1/2
[1m26013/26013[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 431us/step - accuracy: 0.7158 - loss: 0.5516
Epoch 2/2
[1m26013/26013[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 429us/step - accuracy: 0.7422 - loss: 0.5188
Epoch 1/2
[1m25922/25922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 423us/step - accuracy: 0.7440 - loss: 0.5159
Epoch 2/2
[1m25922/25922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 422us/step - accuracy: 0.7455 - loss: 0.5134
Epoch 1/2
[1m25750/25750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 440us/step - accuracy: 0.7424 - loss: 0.5169
Epoch 2/2
[1m25750/25750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 448us/step - accuracy: 0.7439 - loss: 0.5163
Epoch 1/2
[1m9852/9852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 474us/step - accuracy: 0.7438 - loss: 0.5137
Epoch 2/2
[1m9852/9852[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 459us/step - accuracy: 0.7455 - loss: 0.511

In [38]:
test_x1 = np.vstack(test_x1s)
test_x2 = np.vstack(test_x2s)
test_x3 = np.vstack(test_x3s)
test_x4 = np.vstack(test_x4s)
test_y = np.vstack(test_ys)

In [39]:
test_loss, test_accuracy = model.evaluate([test_x1, test_x2, test_x3, test_x4], test_y)

print("\n\nTest Loss {}, Test Accuracy {}".format(test_loss, test_accuracy))

[1m39608/39608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 253us/step - accuracy: 0.7453 - loss: 0.5135


Test Loss 0.5131593942642212, Test Accuracy 0.7452802658081055


## Save Model


In [42]:
# model.save("mlp_model.keras")
# model.save("mlp_model.h5")



In [43]:
tf.saved_model.save(model, "./mlp_model")

INFO:tensorflow:Assets written to: ./mlp_model/assets


INFO:tensorflow:Assets written to: ./mlp_model/assets
