In [1]:
from omegaconf import OmegaConf


# Encoder settings
encoder_config = OmegaConf.create({
    "embedding": {
        "name": "FeatureEmbedding",
    },
    "backbone": {
        "name": "FTTransformerBackbone",
    }
})

# model settings (learning rate, scheduler...)
model_config = OmegaConf.create({
    "name": "MLPHeadModel"
})

# training settings (epoch, gpu...): not necessary
trainer_config = OmegaConf.create({
    "gpus": 0,
    "max_epochs": 1,
})

In [2]:
import os,sys; sys.path.append(os.path.abspath(".."))
from deep_table.data.data_module import TabularDatamodule
from deep_table.data.datasets import Adult


adult_dataset = Adult(root="../data")
adult_dataframes = adult_dataset.processed_dataframes()

datamodule = TabularDatamodule(
    train=adult_dataframes["train"],
    val=adult_dataframes["val"],
    test=adult_dataframes["test"],
    task=adult_dataset.task,
    dim_out=adult_dataset.dim_out,
    categorical_columns=adult_dataset.categorical_columns,
    continuous_columns=adult_dataset.continuous_columns,
    target=adult_dataset.target_columns,
    num_categories=adult_dataset.num_categories,
)


Downloading https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data
Using downloaded and verified file: ../data/Adult/raw/adult.data

Downloading https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test
Using downloaded and verified file: ../data/Adult/raw/adult.test



In [3]:
from deep_table.estimators.base import Estimator
from deep_table.utils import get_scores


estimator = Estimator(
    encoder_config,
    model_config, 
    trainer_config
)
estimator.fit(datamodule)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(
Global seed set to 0

  | Name    | Type              | Params
----------------------------------------------
0 | encoder | Encoder           | 30.8 K
1 | mlp     | Sequential        | 4.6 K 
2 | loss    | BCEWithLogitsLoss | 0     
----------------------------------------------
35.4 K    Trainable params
0         Non-trainable params
35.4 K    Total params
0.142     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 0


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [5]:
predict = estimator.predict(datamodule.dataloader(split="test"))
get_scores(predict, target=datamodule.dataloader(split="test"), task="binary")

Predicting: 191it [00:00, ?it/s]

{'accuracy': 0.8492107364412506,
 'AUC': 0.9056332241226922,
 'F1 score': 0.9063656127235974,
 'cross_entropy': 0.3209269697953147}

In [7]:
pretrain_model_config = OmegaConf.create({
    "name": "SAINTPretrainModel"
})

pretrain_model = Estimator(
    encoder_config,
    pretrain_model_config,
    trainer_config
)
pretrain_model.fit(datamodule)

estimator = Estimator(
    encoder_config, model_config, 
                      
    trainer_config)
estimator.fit(datamodule, from_pretrained=pretrain_model)


GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(
Global seed set to 0

  | Name               | Type             | Params
--------------------------------------------------------
0 | encoder            | Encoder          | 30.8 K
1 | cutmix             | Cutmix           | 0     
2 | mixup              | Mixup            | 0     
3 | g1                 | SimpleMLPLayer   | 62.2 K
4 | g2                 | SimpleMLPLayer   | 62.2 K
5 | feature_wise_mlp   | ModuleList       | 1.2 M 
6 | contranstive_loss  | InfoNCELoss      | 0     
7 | mse_loss           | MSELoss          | 0     
8 | cross_entropy_loss | CrossEntropyLoss | 0     
--------------------------------------------------------
1.3 M     Trainable params
0         Non-trainable params
1.3 M     Total params
5.347     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 0


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Global seed set to 0

  | Name    | Type              | Params
----------------------------------------------
0 | encoder | Encoder           | 30.8 K
1 | mlp     | Sequential        | 4.6 K 
2 | loss    | BCEWithLogitsLoss | 0     
----------------------------------------------
35.4 K    Trainable params
0         Non-trainable params
35.4 K    Total params
0.142     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 0


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [8]:
predict = estimator.predict(datamodule.dataloader(split="test"))
get_scores(predict, target=datamodule.dataloader(split="test"), task="binary")

Predicting: 191it [00:00, ?it/s]

{'accuracy': 0.8535716479331736,
 'AUC': 0.9061936735611765,
 'F1 score': 0.9071867943626878,
 'cross_entropy': 0.3190468799196811}