<a href="https://colab.research.google.com/github/RecoHut-Projects/recohut/blob/US632593/tutorials/modeling/Training_13_CTR_models_on_Taobao_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q pytorch-lightning

!rm -r recohut
!git clone --branch v0.0.11.post2 https://github.com/RecoHut-Projects/recohut.git
!pip install -U ./recohut

In [1]:
from recohut.datasets.taobao import TaobaoDataModule
from recohut.trainers.pl_trainer import pl_trainer

import warnings
warnings.filterwarnings('ignore')

In [2]:
params = {'model_id': 'DCN_demo',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'dnn_hidden_units': [64, 64],
              'dnn_activations': "relu",
              'crossing_layers': 3,
              'learning_rate': 1e-3,
              'net_dropout': 0,
              'batch_norm': False,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [3]:
!rm -r /content/data
ds = TaobaoDataModule(**params)
ds.prepare_data()

Downloading https://github.com/RecoHut-Datasets/sample_ctr/raw/v1/train_sample.csv
Downloading https://github.com/RecoHut-Datasets/sample_ctr/raw/v1/valid_sample.csv
Downloading https://github.com/RecoHut-Datasets/sample_ctr/raw/v1/test_sample.csv
Processing...
Done!


---

## DeepCrossing

In [5]:
from recohut.models.deepcrossing import DeepCrossing

In [6]:
params = {'model_id': 'DeepCrossing',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'dnn_hidden_units': [64, 64],
              'dnn_activations': "relu",
              'learning_rate': 1e-3,
              'net_dropout': 0,
              'batch_norm': False,
              'optimizer': 'adamw',
              'use_residual': True,
              'residual_blocks': [500, 500, 500],
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [7]:
model = DeepCrossing(ds.dataset.feature_map, **params)

In [8]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | crossing_layer    | Sequential     | 422 K 
2 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
426 K     Trainable params
0         Non-trainable params
426 K     Total params
1.707     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.041101 - AUC: 1.000000
[Metrics] logloss: 0.143616 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.041101 - AUC: 1.000000
[Metrics] logloss: 0.143616 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.0780)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.0780)}}]

---

## AFN

In [9]:
from recohut.models.afn import AFN

In [10]:
params = {'model_id': 'AFN',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'batch_norm': False,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'logarithmic_neurons': 1200,
              'afn_hidden_units': [400, 400, 400],
              'afn_activations': 'relu',
              'afn_dropout': 0,
              'ensemble_dnn': False,
              'dnn_hidden_units': [400, 400, 400],
              'dnn_activations': 'relu',
              'dnn_dropout': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [11]:
model = AFN(ds.dataset.feature_map, **params)

In [12]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | coefficient_W     | Linear         | 16.8 K
2 | dense_layer       | MLP_Layer      | 5.1 M 
3 | log_batch_norm    | BatchNorm1d    | 28    
4 | exp_batch_norm    | BatchNorm1d    | 2.4 K 
5 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.582    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.251845 - AUC: 0.500000
[Metrics] logloss: 1.343175 - AUC: 0.500000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.251845 - AUC: 0.500000
[Metrics] logloss: 1.343175 - AUC: 0.500000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(0.5000), 'logloss': tensor(0.6447)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(0.5000), 'logloss': tensor(0.6447)}}]

## AutoInt

In [13]:
from recohut.models.autoint import AutoInt

In [14]:
params = {'model_id': 'AutoInt',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'dnn_hidden_units': [400, 400],
              'dnn_activations': 'relu',
              'net_dropout': 0,
              'num_heads': 2,
              'attention_layers': 3,
              'attention_dim': 40,
              'use_residual': True,
              'batch_norm': False,
              'layer_norm': False,
              'use_scale': False,
              'use_wide': False,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [15]:
model = AutoInt(ds.dataset.feature_map, **params)

In [16]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | dnn               | MLP_Layer      | 217 K 
2 | self_attention    | Sequential     | 41.6 K
3 | fc                | Linear         | 1.1 K 
4 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
264 K     Trainable params
0         Non-trainable params
264 K     Total params
1.059     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.073161 - AUC: 1.000000
[Metrics] logloss: 0.314330 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.073161 - AUC: 1.000000
[Metrics] logloss: 0.314330 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1600)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1600)}}]

## CCPM

In [17]:
from recohut.models.ccpm import CCPM

In [18]:
params = {'model_id': 'CCPM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'channels': [3, 3],
              'kernel_heights': [3, 3],
              'activation': 'Tanh',
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [19]:
model = CCPM(ds.dataset.feature_map, **params)

In [20]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | conv_layer        | CCPM_ConvLayer | 42    
2 | fc                | Linear         | 91    
3 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
4.9 K     Trainable params
0         Non-trainable params
4.9 K     Total params
0.020     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.303745 - AUC: 0.793651
[Metrics] logloss: 0.379345 - AUC: 0.818182


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.303745 - AUC: 0.793651
[Metrics] logloss: 0.379345 - AUC: 0.818182
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(0.8025), 'logloss': tensor(0.3310)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(0.8025), 'logloss': tensor(0.3310)}}]

## DeepFM

In [21]:
from recohut.models.deepfm import DeepFM

In [22]:
params = {'model_id': 'DeepFM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'hidden_units': [300, 300, 300],
              'hidden_activations': 'relu',
              'net_regularizer': 0,
              'embedding_regularizer': 0,
              'batch_norm': False,
              'net_dropout': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [23]:
model = DeepFM(ds.dataset.feature_map, **params)

In [24]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | fm_layer          | FM_Layer       | 854   
2 | dnn               | MLP_Layer      | 223 K 
3 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
228 K     Trainable params
378       Non-trainable params
228 K     Total params
0.915     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.080842 - AUC: 1.000000
[Metrics] logloss: 0.377655 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.080842 - AUC: 1.000000
[Metrics] logloss: 0.377655 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1877)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1877)}}]

## WideDeep

In [25]:
from recohut.models.widedeep import WideDeep

In [26]:
params = {'model_id': 'WideDeep',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'hidden_units': [300, 300, 300],
              'hidden_activations': 'relu',
              'net_regularizer': 0,
              'embedding_regularizer': 0,
              'batch_norm': False,
              'net_dropout': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [27]:
model = WideDeep(ds.dataset.feature_map, **params)

In [28]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | lr_layer          | LR_Layer       | 476   
2 | dnn               | MLP_Layer      | 223 K 
3 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
228 K     Trainable params
0         Non-trainable params
228 K     Total params
0.914     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.082667 - AUC: 1.000000
[Metrics] logloss: 0.334438 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.082667 - AUC: 1.000000
[Metrics] logloss: 0.334438 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1733)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1733)}}]

## xDeepFM

In [29]:
from recohut.models.xdeepfm import xDeepFM

In [30]:
params = {'model_id': 'xDeepFM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'dnn_hidden_units': [500, 500, 500],
              'cin_layer_units': [32, 32, 32],
              'hidden_activations': 'relu',
              'net_dropout': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [31]:
model = xDeepFM(ds.dataset.feature_map, **params)

In [32]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type                     | Params
---------------------------------------------------------------
0 | embedding_layer   | EmbeddingLayer           | 4.8 K 
1 | dnn               | MLP_Layer                | 572 K 
2 | lr_layer          | LR_Layer                 | 476   
3 | cin               | CompressedInteractionNet | 35.1 K
4 | output_activation | Sigmoid                  | 0     
---------------------------------------------------------------
612 K     Trainable params
0         Non-trainable params
612 K     Total params
2.449     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.049958 - AUC: 1.000000
[Metrics] logloss: 0.209271 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.049958 - AUC: 1.000000
[Metrics] logloss: 0.209271 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1073)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1073)}}]

## DeepIM

In [33]:
from recohut.models.deepim import DeepIM

In [34]:
params = {'model_id': 'DeepIM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'im_order': 2,
              'im_batch_norm': False,
              'hidden_units': [300, 300, 300],
              'hidden_activations': 'relu',
              'net_regularizer': 0,
              'embedding_regularizer': 0,
              'net_batch_norm': False,
              'net_dropout': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [35]:
model = DeepIM(ds.dataset.feature_map, **params)

In [36]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type               | Params
---------------------------------------------------------
0 | embedding_layer   | EmbeddingLayer     | 4.8 K 
1 | im_layer          | InteractionMachine | 21    
2 | dnn               | MLP_Layer          | 223 K 
3 | output_activation | Sigmoid            | 0     
---------------------------------------------------------
227 K     Trainable params
0         Non-trainable params
227 K     Total params
0.912     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.059543 - AUC: 1.000000
[Metrics] logloss: 0.268423 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.059543 - AUC: 1.000000
[Metrics] logloss: 0.268423 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1347)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.1347)}}]

## FFM

In [37]:
from recohut.models.ffm import FFM

In [38]:
params = {'model_id': 'FFM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 2,
              'regularizer': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [39]:
model = FFM(ds.dataset.feature_map, **params)

In [40]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type       | Params
-------------------------------------------------
0 | lr_layer          | LR_Layer   | 477   
1 | embedding_layers  | ModuleList | 12.4 K
2 | output_activation | Sigmoid    | 0     
-------------------------------------------------
12.9 K    Trainable params
0         Non-trainable params
12.9 K    Total params
0.051     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.495900 - AUC: 1.000000
[Metrics] logloss: 0.520576 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.495900 - AUC: 1.000000
[Metrics] logloss: 0.520576 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5048)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5048)}}]

## PNN

In [41]:
from recohut.models.pnn import PNN

In [42]:
params = {'model_id': 'PNN',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'optimizer': 'adamw',
              'hidden_units': [300, 300, 300],
              'hidden_activations': 'relu',
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [43]:
model = PNN(ds.dataset.feature_map, **params)

In [44]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name                | Type              | Params
----------------------------------------------------------
0 | embedding_layer     | EmbeddingLayer    | 4.8 K 
1 | inner_product_layer | InnerProductLayer | 378   
2 | dnn                 | MLP_Layer         | 250 K 
----------------------------------------------------------
255 K     Trainable params
378       Non-trainable params
255 K     Total params
1.023     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.096395 - AUC: 1.000000
[Metrics] logloss: 0.429098 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.096395 - AUC: 1.000000
[Metrics] logloss: 0.429098 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.2162)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.2162)}}]

## LorentzFM

In [4]:
from recohut.models.lorentzfm import LorentzFM

In [5]:
params = {'model_id': 'LorentzFM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'optimizer': 'adamw',
              'regularizer': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [6]:
model = LorentzFM(ds.dataset.feature_map, **params)

In [7]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name                | Type              | Params
----------------------------------------------------------
0 | embedding_layer     | EmbeddingLayer    | 4.8 K 
1 | inner_product_layer | InnerProductLayer | 378   
2 | output_activation   | Sigmoid           | 0     
----------------------------------------------------------
4.8 K     Trainable params
378       Non-trainable params
5.1 K     Total params
0.021     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.513260 - AUC: 1.000000
[Metrics] logloss: 0.527023 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.513260 - AUC: 1.000000
[Metrics] logloss: 0.527023 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5182)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5182)}}]

## HOFM

In [8]:
from recohut.models.hofm import HOFM

In [9]:
params = {'model_id': 'HOFM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'regularizer': 0,
              'order': 4,
              'embedding_dim': 10,
              'reuse_embedding': False,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [10]:
model = HOFM(ds.dataset.feature_map, **params)

In [11]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name                | Type              | Params
----------------------------------------------------------
0 | embedding_layers    | ModuleList        | 14.3 K
1 | inner_product_layer | InnerProductLayer | 378   
2 | lr_layer            | LR_Layer          | 477   
3 | output_activation   | Sigmoid           | 0     
----------------------------------------------------------
14.8 K    Trainable params
378       Non-trainable params
15.1 K    Total params
0.061     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.508917 - AUC: 1.000000
[Metrics] logloss: 0.530008 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.508917 - AUC: 1.000000
[Metrics] logloss: 0.530008 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5165)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5165)}}]

## FmFM

In [55]:
from recohut.models.fmfm import FmFM

In [56]:
params = {'model_id': 'FmFM',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'regularizer': 0,
              'field_interaction_type': 'matrixed',
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [57]:
model = FmFM(ds.dataset.feature_map, **params)

In [58]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | lr_layer          | LR_Layer       | 477   
2 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
14.3 K    Trainable params
0         Non-trainable params
14.3 K    Total params
0.057     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Metrics] logloss: 0.495687 - AUC: 1.000000
[Metrics] logloss: 0.521538 - AUC: 1.000000


Testing: 0it [00:00, ?it/s]

[Metrics] logloss: 0.495687 - AUC: 1.000000
[Metrics] logloss: 0.521538 - AUC: 1.000000
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5050)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(1.), 'logloss': tensor(0.5050)}}]