# Comparing DCN, DeepFM and xDeepFM in PyTorch

![](https://github.com/RecoHut-Stanzas/S516304/raw/main/images/process_flow.svg)

In [None]:
!mkdir recohut && git clone https://github.com/RecoHut-Stanzas/S516304.git recohut
%cd recohut

Cloning into 'stanza'...
remote: Enumerating objects: 1016, done.[K
remote: Counting objects: 100% (1016/1016), done.[K
remote: Compressing objects: 100% (837/837), done.[K
remote: Total 1016 (delta 259), reused 832 (delta 149), pack-reused 0[K
Receiving objects: 100% (1016/1016), 185.63 MiB | 24.93 MiB/s, done.
Resolving deltas: 100% (259/259), done.
/content/stanza


In [None]:
import logging
from datetime import datetime
import sys
import os

from recohut.code.pytorch.models import DCN, DeepFM, xDeepFM
from recohut.code.pytorch.utils import seed_everything
from recohut.code.datasets import data_generator
from recohut.code.datasets.taobao import FeatureEncoder
from recohut.code.utils import set_logger, print_to_json

In [None]:
sys.path.insert(0,'.')

## DCN

In [None]:
if __name__ == '__main__':
    feature_cols = [{'name': ["userid","adgroup_id","pid","cate_id","campaign_id","customer","brand","cms_segid",
                              "cms_group_id","final_gender_code","age_level","pvalue_level","shopping_level","occupation"],
                     'active': True, 'dtype': 'str', 'type': 'categorical'}]
    label_col = {'name': 'clk', 'dtype': float}

    params = {'model_id': 'DCN_demo',
              'dataset_id': 'tiny_data_demo',
              'train_data': './data/tiny_data/train_sample.csv',
              'valid_data': './data/tiny_data/valid_sample.csv',
              'test_data': './data/tiny_data/test_sample.csv',
              'model_root': '../checkpoints/',
              'data_root': './data/',
              'feature_cols': feature_cols,
              'label_col': label_col,
              'embedding_regularizer': 0,
              'net_regularizer': 0,
              'dnn_hidden_units': [64, 64],
              'dnn_activations': "relu",
              'crossing_layers': 3,
              'learning_rate': 1e-3,
              'net_dropout': 0,
              'batch_norm': False,
              'optimizer': 'adam',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'min_categr_count': 1,
              'embedding_dim': 10,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'monitor': 'AUC',
              'monitor_mode': 'max',
              'use_hdf5': True,
              'pickle_feature_encoder': True,
              'save_best_only': True,
              'every_x_epochs': 1,
              'patience': 2,
              'workers': 1,
              'verbose': 0,
              'version': 'pytorch',
              'gpu': -1}

    set_logger(params)
    logging.info('Start the demo...')
    logging.info(print_to_json(params))
    seed_everything(seed=params['seed'])

    feature_encoder = FeatureEncoder(feature_cols, 
                                     label_col, 
                                     dataset_id=params['dataset_id'], 
                                     data_root=params["data_root"],
                                     version=params['version'])
    feature_encoder.fit(train_data=params['train_data'], 
                        min_categr_count=params['min_categr_count'])

    train_gen, valid_gen, test_gen = data_generator(feature_encoder,
                                                    train_data=params['train_data'],
                                                    valid_data=params['valid_data'],
                                                    test_data=params['test_data'],
                                                    batch_size=params['batch_size'],
                                                    shuffle=params['shuffle'],
                                                    use_hdf5=params['use_hdf5'])
    model = DCN(feature_encoder.feature_map, **params)
    model.fit_generator(train_gen, validation_data=valid_gen, epochs=params['epochs'],
                        verbose=params['verbose'])
    model.load_weights(model.checkpoint)
    
    logging.info('***** validation/test results *****')
    model.evaluate_generator(valid_gen)
    model.evaluate_generator(test_gen)

2021-12-02 16:33:53,369 P322 INFO Start the demo...
2021-12-02 16:33:53,371 P322 INFO {
    "batch_norm": "False",
    "batch_size": "64",
    "crossing_layers": "3",
    "data_root": "./data/",
    "dataset_id": "tiny_data_demo",
    "dnn_activations": "relu",
    "dnn_hidden_units": "[64, 64]",
    "embedding_dim": "10",
    "embedding_regularizer": "0",
    "epochs": "3",
    "every_x_epochs": "1",
    "feature_cols": "[{'name': ['userid', 'adgroup_id', 'pid', 'cate_id', 'campaign_id', 'customer', 'brand', 'cms_segid', 'cms_group_id', 'final_gender_code', 'age_level', 'pvalue_level', 'shopping_level', 'occupation'], 'active': True, 'dtype': 'str', 'type': 'categorical'}]",
    "gpu": "-1",
    "label_col": "{'name': 'clk', 'dtype': <class 'float'>}",
    "learning_rate": "0.001",
    "loss": "binary_crossentropy",
    "metrics": "['logloss', 'AUC']",
    "min_categr_count": "1",
    "model_id": "DCN_demo",
    "model_root": "../checkpoints/",
    "monitor": "AUC",
    "monitor_mode"

## DeepFM

In [None]:
if __name__ == '__main__':
    feature_cols = [{'name': ["userid","adgroup_id","pid","cate_id","campaign_id","customer","brand","cms_segid",
                              "cms_group_id","final_gender_code","age_level","pvalue_level","shopping_level","occupation"],
                     'active': True, 'dtype': 'str', 'type': 'categorical'}]
    label_col = {'name': 'clk', 'dtype': float}

    params = {'model_id': 'DeepFM_demo',
              'dataset_id': 'tiny_data_demo',
              'train_data': './data/tiny_data/train_sample.csv',
              'valid_data': './data/tiny_data/valid_sample.csv',
              'test_data': './data/tiny_data/test_sample.csv',
              'model_root': '../checkpoints/',
              'data_root': './data/',
              'feature_cols': feature_cols,
              'label_col': label_col,
              'embedding_regularizer': 0,
              'net_regularizer': 0,
              'hidden_units': [64, 64],
              'hidden_activations': "relu",
              'learning_rate': 1e-3,
              'net_dropout': 0,
              'batch_norm': False,
              'optimizer': 'adam',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'min_categr_count': 1,
              'embedding_dim': 10,
              'batch_size': 16,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'monitor': 'AUC',
              'monitor_mode': 'max',
              'use_hdf5': True,
              'pickle_feature_encoder': True,
              'save_best_only': True,
              'every_x_epochs': 1,
              'patience': 2,
              'workers': 1,
              'verbose': 0,
              'version': 'pytorch',
              'gpu': -1}

    set_logger(params)
    logging.info('Start the demo...')
    logging.info(print_to_json(params))
    seed_everything(seed=params['seed'])

    feature_encoder = FeatureEncoder(feature_cols, 
                                     label_col, 
                                     dataset_id=params['dataset_id'], 
                                     data_root=params["data_root"],
                                     version=params['version'])
    feature_encoder.fit(train_data=params['train_data'], 
                        min_categr_count=params['min_categr_count'])

    train_gen, valid_gen, test_gen = data_generator(feature_encoder,
                                                    train_data=params['train_data'],
                                                    valid_data=params['valid_data'],
                                                    test_data=params['test_data'],
                                                    batch_size=params['batch_size'],
                                                    shuffle=params['shuffle'],
                                                    use_hdf5=params['use_hdf5'])
    model = DeepFM(feature_encoder.feature_map, **params)
    model.fit_generator(train_gen, validation_data=valid_gen, epochs=params['epochs'],
                        verbose=params['verbose'])
    model.load_weights(model.checkpoint)
    
    logging.info('***** validation/test results *****')
    model.evaluate_generator(valid_gen)
    model.evaluate_generator(test_gen)

2021-12-02 16:35:58,268 P322 INFO Start the demo...
2021-12-02 16:35:58,270 P322 INFO {
    "batch_norm": "False",
    "batch_size": "16",
    "data_root": "./data/",
    "dataset_id": "tiny_data_demo",
    "embedding_dim": "10",
    "embedding_regularizer": "0",
    "epochs": "3",
    "every_x_epochs": "1",
    "feature_cols": "[{'name': ['userid', 'adgroup_id', 'pid', 'cate_id', 'campaign_id', 'customer', 'brand', 'cms_segid', 'cms_group_id', 'final_gender_code', 'age_level', 'pvalue_level', 'shopping_level', 'occupation'], 'active': True, 'dtype': 'str', 'type': 'categorical'}]",
    "gpu": "-1",
    "hidden_activations": "relu",
    "hidden_units": "[64, 64]",
    "label_col": "{'name': 'clk', 'dtype': <class 'float'>}",
    "learning_rate": "0.001",
    "loss": "binary_crossentropy",
    "metrics": "['logloss', 'AUC']",
    "min_categr_count": "1",
    "model_id": "DeepFM_demo",
    "model_root": "../checkpoints/",
    "monitor": "AUC",
    "monitor_mode": "max",
    "net_dropout"

## xDeepFM

In [None]:
if __name__ == '__main__':
    feature_cols = [{'name': ["userid","adgroup_id","pid","cate_id","campaign_id","customer","brand","cms_segid",
                              "cms_group_id","final_gender_code","age_level","pvalue_level","shopping_level","occupation"],
                     'active': True, 'dtype': 'str', 'type': 'categorical'}]
    label_col = {'name': 'clk', 'dtype': float}

    params = {'model_id': 'xDeepFM_demo',
              'dataset_id': 'tiny_data_demo',
              'train_data': './data/tiny_data/train_sample.csv',
              'valid_data': './data/tiny_data/valid_sample.csv',
              'test_data': './data/tiny_data/test_sample.csv',
              'model_root': '../checkpoints/',
              'data_root': './data/',
              'feature_cols': feature_cols,
              'label_col': label_col,
              'embedding_regularizer': 0,
              'net_regularizer': 0,
              'dnn_hidden_units': [64, 64],
              'dnn_activations': "relu",
              'learning_rate': 1e-3,
              'net_dropout': 0,
              'cin_layer_units': [16, 16, 16],
              'batch_norm': False,
              'optimizer': 'adam',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'min_categr_count': 1,
              'embedding_dim': 10,
              'batch_size': 16,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'monitor': 'AUC',
              'monitor_mode': 'max',
              'use_hdf5': True,
              'pickle_feature_encoder': True,
              'save_best_only': True,
              'every_x_epochs': 1,
              'patience': 2,
              'workers': 1,
              'verbose': 0,
              'version': 'pytorch',
              'gpu': -1}

    set_logger(params)
    logging.info('Start the demo...')
    logging.info(print_to_json(params))
    seed_everything(seed=params['seed'])

    feature_encoder = FeatureEncoder(feature_cols, 
                                     label_col, 
                                     dataset_id=params['dataset_id'], 
                                     data_root=params["data_root"],
                                     version=params['version'])
    feature_encoder.fit(train_data=params['train_data'], 
                        min_categr_count=params['min_categr_count'])

    train_gen, valid_gen, test_gen = data_generator(feature_encoder,
                                                    train_data=params['train_data'],
                                                    valid_data=params['valid_data'],
                                                    test_data=params['test_data'],
                                                    batch_size=params['batch_size'],
                                                    shuffle=params['shuffle'],
                                                    use_hdf5=params['use_hdf5'])
    model = xDeepFM(feature_encoder.feature_map, **params)
    model.fit_generator(train_gen, validation_data=valid_gen, epochs=params['epochs'],
                        verbose=params['verbose'])
    model.load_weights(model.checkpoint)
    
    logging.info('***** Train/validation/test results *****')
    model.evaluate_generator(train_gen)
    model.evaluate_generator(valid_gen)
    model.evaluate_generator(test_gen)

2021-12-02 16:36:53,603 P322 INFO Start the demo...
2021-12-02 16:36:53,606 P322 INFO {
    "batch_norm": "False",
    "batch_size": "16",
    "cin_layer_units": "[16, 16, 16]",
    "data_root": "./data/",
    "dataset_id": "tiny_data_demo",
    "dnn_activations": "relu",
    "dnn_hidden_units": "[64, 64]",
    "embedding_dim": "10",
    "embedding_regularizer": "0",
    "epochs": "3",
    "every_x_epochs": "1",
    "feature_cols": "[{'name': ['userid', 'adgroup_id', 'pid', 'cate_id', 'campaign_id', 'customer', 'brand', 'cms_segid', 'cms_group_id', 'final_gender_code', 'age_level', 'pvalue_level', 'shopping_level', 'occupation'], 'active': True, 'dtype': 'str', 'type': 'categorical'}]",
    "gpu": "-1",
    "label_col": "{'name': 'clk', 'dtype': <class 'float'>}",
    "learning_rate": "0.001",
    "loss": "binary_crossentropy",
    "metrics": "['logloss', 'AUC']",
    "min_categr_count": "1",
    "model_id": "xDeepFM_demo",
    "model_root": "../checkpoints/",
    "monitor": "AUC",
   

---

In [None]:
!apt-get -qq install tree

In [None]:
!tree -h --du ../checkpoints

../checkpoints
└── [321K]  tiny_data_demo
    ├── [6.7K]  DCN_demo.log
    ├── [ 83K]  DCN_demo_model.ckpt
    ├── [6.0K]  DeepFM_demo.log
    ├── [ 88K]  DeepFM_demo_model.ckpt
    ├── [6.1K]  xDeepFM_demo.log
    └── [128K]  xDeepFM_demo_model.ckpt

 325K used in 1 directory, 6 files


In [None]:
!pip install -q watermark
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d

Author: Sparsh A.

Last updated: 2021-12-02 16:51:41

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.4.104+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 2
Architecture: 64bit

logging: 0.5.1.2
IPython: 5.5.0
sys    : 3.7.12 (default, Sep 10 2021, 00:21:48) 
[GCC 7.5.0]



---

**END**