https://deepctr-doc.readthedocs.io/en/latest/Examples.html#regression-movielens

In [None]:
import os

DIR_DATA = os.path.join(os.environ["HOME"], "workspace/third_party/shenweichen/DeepCTR/examples")

In [None]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, get_feature_names

In [None]:
data = pd.read_csv(os.path.join(DIR_DATA, "movielens_sample.txt"))
sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip"]
target = ['rating']

In [None]:
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])

In [None]:
# 2.count #unique features for each sparse field
fixlen_feature_columns = [
    SparseFeat(feat, data[feat].max() + 1, embedding_dim=4)
    for feat in sparse_features
]

In [None]:
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns

In [None]:
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [None]:
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)

train_model_input = {name: train[name].values for name in feature_names}
test_model_input = {name: test[name].values for name in feature_names}

In [None]:
# 4.Define Model,train,predict and evaluate
model = DeepFM(
    linear_feature_columns,
    dnn_feature_columns,
    task='regression'
)

In [None]:
model.compile(
    "adam",
    loss="mse",
    metrics=['mse']
)

In [None]:
history = model.fit(
    train_model_input,
    train[target].values,
    batch_size=256,
    epochs=80,
    verbose=2,
    validation_split=0.2
)

In [None]:
pred_ans = model.predict(
    test_model_input,
    batch_size=256
)

In [None]:
print("test MSE", round(mean_squared_error(test[target].values, pred_ans), 4))

In [None]:
history_df = pd.DataFrame(history.history)

In [None]:
history_df.loc[:, ['mse', 'val_mse']].plot(title="MSE")