# run DeepCTR DeepFM on SageMaker

In [2]:
#environment settings
import json
import time
import boto3
import os
import glob
import pandas as pd

import sagemaker
from sagemaker import get_execution_role

sess = sagemaker.Session()
default_bucket = sess.default_bucket()
client = boto3.client('sagemaker')
s3_client = boto3.client('s3')
aws_region = sess.boto_region_name
role = sagemaker.get_execution_role()


In [3]:
#train.py
!mkdir code

In [4]:
%%writefile code/train.py
import pandas as pd
import tensorflow as tf
import os
import argparse
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from deepctr.models import *
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names

if __name__ == "__main__":
    parser = argparse.ArgumentParser()

#     parser.add_argument("--learning-rate", type=float, default=0.01)
#     parser.add_argument("--batch-size", type=int, default=128)
#     parser.add_argument("--batch-norm", type=bool, default=False)
#     parser.add_argument("--dnn-hidden-units", type=str, default="128,64,32")
#     parser.add_argument("--dropout-rate", type=float, default=0.0)

#     parser.add_argument("--checkpoint", type=str, default=None)
    parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
    parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))

    args, _ = parser.parse_known_args()
    
    
    data = pd.read_csv('/opt/ml/input/data/train/criteo_sample.txt')

    sparse_features = ['C' + str(i) for i in range(1, 27)]
    dense_features = ['I' + str(i) for i in range(1, 14)]

    data[sparse_features] = data[sparse_features].fillna('-1', )
    data[dense_features] = data[dense_features].fillna(0, )
    target = ['label']

    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    mms = MinMaxScaler(feature_range=(0, 1))
    data[dense_features] = mms.fit_transform(data[dense_features])

    # 2.count #unique features for each sparse field,and record dense feature field name

    fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4)
                              for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, )
                                                                            for feat in dense_features]

    dnn_feature_columns = fixlen_feature_columns
    linear_feature_columns = fixlen_feature_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

    # 3.generate input data for model

    train, test = train_test_split(data, test_size=0.2, random_state=2020)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    # 4.Define Model,train,predict and evaluate
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
    model.compile("adam", "binary_crossentropy",
                  metrics=['binary_crossentropy'], )

    history = model.fit(train_model_input, train[target].values,
                        batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
    model.summary()
    model.save('/opt/ml/model/deepctr-deepfm')
    pred_ans = model.predict(test_model_input, batch_size=256)
    print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
    print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))

Writing code/train.py


In [5]:
%%writefile code/requirements.txt
pandas
deepctr
sklearn

Writing code/requirements.txt


In [9]:
from sagemaker.tensorflow import TensorFlow

model_name = 'deepctr'
model_output_path =  f"s3://{default_bucket}/deepctr"

tf_estmator = TensorFlow(
    entry_point="train.py",
    source_dir="code",
    role=role,
    input_mode="File",
    instance_count=1,
    instance_type= "ml.c5.4xlarge",
    volume_size=250,
    output_path=model_output_path,
    disable_profiler=True,
    framework_version="2.3",
    py_version='py37',
    keep_alive_period_in_seconds=1800, # warm pool
)



channels= {"train": f's3://{default_bucket}/datasets/deepctr/'}

tf_estmator.fit(inputs=channels,
                job_name=model_name+time.strftime("%Y%m%d%H%M%S", time.localtime())
               )


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: deepctr20230109164620


2023-01-09 16:46:21 Starting - Starting the training job...
2023-01-09 16:46:36 Starting - Preparing the instances for training......
2023-01-09 16:47:51 Downloading - Downloading input data
2023-01-09 16:47:51 Training - Downloading the training image...
2023-01-09 16:47:56 Training - Training image download completed. Training in progress..[34m2023-01-09 16:48:21.808081: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2023-01-09 16:48:21.808221: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34m2023-01-09 16:48:21.833953: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2023-01-09 16:48:22,925 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2023-01-09 16:48:22,933