In [1]:
#pip install seaborn
import warnings
warnings.filterwarnings('ignore')
from evaluation.generalevaluator import *
from modelsdefinition.GATE import GATE
from factory import create_data_loader
import pandas as pd

# pip install pytorch_tabular[extra]
from pytorch_tabular import TabularModel
from pytorch_tabular.config import (
    DataConfig,  # ExperimentConfig,
    OptimizerConfig,
    TrainerConfig,
)
from pytorch_tabular.models import GatedAdditiveTreeEnsembleConfig


In [44]:
data_loader = create_data_loader('ageconditions', test_size=0.2, normalize_features = "mean_std", return_extra_info = True)
X_train, X_val, y_train, y_val, extra_info = data_loader.load_data()

0 Index([], dtype='object')


In [6]:
model = TabularModel.load_model(dir = "../output/modelsaves/ageconditions/GATE/202307-1622-2744-6b41faa3-9de4-4bf3-9e73-62cac06161ec", strict = False)

2023-07-16 22:29:46,508 - {pytorch_tabular.tabular_model:129} - INFO - Experiment Tracking is turned off
2023-07-16 22:29:46,512 - {pytorch_tabular.tabular_model:268} - INFO - Preparing the Trainer
  rank_zero_deprecation(
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


# GATE Tests

In [3]:
help(GatedAdditiveTreeEnsembleConfig)

Help on class GatedAdditiveTreeEnsembleConfig in module pytorch_tabular.models.gate.config:

class GatedAdditiveTreeEnsembleConfig(pytorch_tabular.config.config.ModelConfig)
 |  GatedAdditiveTreeEnsembleConfig(task: str, head: Optional[str] = 'LinearHead', head_config: Optional[Dict] = <factory>, embedding_dims: Optional[List] = None, embedding_dropout: float = 0.0, batch_norm_continuous_input: bool = True, learning_rate: float = 0.001, loss: Optional[str] = None, metrics: Optional[List[str]] = None, metrics_prob_input: Optional[List[bool]] = None, metrics_params: Optional[List] = None, target_range: Optional[List] = None, seed: int = 42, _module_src: str = 'models.gate', _model_name: str = 'GatedAdditiveTreeEnsembleModel', _backbone_name: str = 'GatedAdditiveTreesBackbone', _config_name: str = 'GatedAdditiveTreeEnsembleConfig', gflu_stages: int = 6, gflu_dropout: float = 0.0, tree_depth: int = 4, num_trees: int = 10, binning_activation: str = 'sparsemoid', feature_mask_function: str =

In [4]:
help(OptimizerConfig)

Help on class OptimizerConfig in module pytorch_tabular.config.config:

class OptimizerConfig(builtins.object)
 |  OptimizerConfig(optimizer: str = 'Adam', optimizer_params: Dict = <factory>, lr_scheduler: Optional[str] = None, lr_scheduler_params: Optional[Dict] = <factory>, lr_scheduler_monitor_metric: Optional[str] = 'valid_loss') -> None
 |  
 |  Optimizer and Learning Rate Scheduler configuration.
 |  Args:
 |      optimizer (str): Any of the standard optimizers from
 |              [torch.optim](https://pytorch.org/docs/stable/optim.html#algorithms).
 |  
 |      optimizer_params (Dict): The parameters for the optimizer. If left blank, will use default
 |              parameters.
 |  
 |      lr_scheduler (Optional[str]): The name of the LearningRateScheduler to use, if any, from
 |              [torch.optim.lr_scheduler](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-
 |              rate). If None, will not use any scheduler. Defaults to `None`
 |  
 |      l

In [5]:
help(TrainerConfig)

Help on class TrainerConfig in module pytorch_tabular.config.config:

class TrainerConfig(builtins.object)
 |  TrainerConfig(batch_size: int = 64, data_aware_init_batch_size: int = 2000, fast_dev_run: bool = False, max_epochs: int = 10, min_epochs: Optional[int] = 1, max_time: Optional[int] = None, gpus: Optional[int] = None, accelerator: Optional[str] = 'auto', devices: Optional[int] = None, devices_list: Optional[List[int]] = None, accumulate_grad_batches: int = 1, auto_lr_find: bool = False, auto_select_gpus: bool = True, check_val_every_n_epoch: int = 1, gradient_clip_val: float = 0.0, overfit_batches: float = 0.0, deterministic: bool = False, profiler: Optional[str] = None, early_stopping: Optional[str] = 'valid_loss', early_stopping_min_delta: float = 0.001, early_stopping_mode: str = 'min', early_stopping_patience: int = 3, early_stopping_kwargs: Optional[Dict[str, Any]] = <factory>, checkpoints: Optional[str] = 'valid_loss', checkpoints_path: str = 'saved_models', checkpoints_e

In [59]:
import torch.optim as optim
data_config = DataConfig(
    target=['target'],
    continuous_cols= [i for i in extra_info["num_col_names"] if i != "target"],
    categorical_cols=extra_info["cat_col_names"],
    #num_workers = 4
)
trainer_config = TrainerConfig(
    auto_lr_find=False, # Runs the LRFinder to automatically derive a learning rate
    batch_size=100,
    max_epochs=10,
    early_stopping="valid_accuracy", # Monitor valid_loss for early stopping
    early_stopping_mode = "min", # Set the mode as min because for val_loss, lower is better
    early_stopping_patience=2, # No. of epochs of degradation training will wait before terminating
    checkpoints="valid_loss", # Save best checkpoint monitoring val_loss
    load_best=True, # After training, load the best checkpoint
)

optimizer_config = OptimizerConfig(
    optimizer="Adam",
    optimizer_params={
        "weight_decay": 0.001,
        #"lr":0.001
    },
    lr_scheduler="ReduceLROnPlateau",
    lr_scheduler_params={
        "mode": "min",
        "factor": 0.1,
        "patience": 5,
        "verbose": True
    },
    lr_scheduler_monitor_metric="valid_loss"
)

model_config = GatedAdditiveTreeEnsembleConfig(
    task="classification",
    tree_depth  =  5,
    num_trees   =  5,
    chain_trees = False, # akin to bagging, True is akin to boosting
    gflu_stages =  2,
    learning_rate = 0.001,
    metrics=['accuracy', "auroc"],
    metrics_params=[dict(task="multiclass", num_classes=2), dict(task="multiclass", num_classes=2)]
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

In [60]:
# Calculate the weighted loss
from pytorch_tabular.utils import get_class_weighted_cross_entropy
weighted_loss = get_class_weighted_cross_entropy(y_train.values.ravel(), mu =1.4)



In [61]:
X_train.shape

(493, 56)

In [62]:
y_train

99     0
256    1
577    0
76     1
599    0
      ..
200    0
450    0
606    0
149    0
471    1
Name: target, Length: 493, dtype: int64

In [63]:
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

# Merge X_train and y_train  
train = pd.concat([X_train, y_train], axis=1)

# Merge X_val and y_val
validation = pd.concat([X_val, y_val], axis=1)

tabular_model.fit(
    train=train, 
    validation=validation,
    loss=weighted_loss)

Global seed set to 42


Auto select gpus: [0]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

ValueError: Expected `preds` to have one more dimension than `target` but got 1 and 1

In [23]:
train

Unnamed: 0,AB,AF,AH,AM,AR,AX,AY,AZ,BC,BD,...,FL,FR,FS,GB,GE,GF,GH,GI,GL,target
99,-0.632923,-0.949501,-0.263144,-0.354858,-0.201277,0.083184,-0.030176,-0.258262,-0.039161,0.013384,...,-0.438346,-0.036026,0.194507,-0.106732,1.054971,0.004009,-0.481870,1.424759,1.316702,0
256,0.110749,0.717663,-0.062110,-0.454650,-0.201277,0.148114,0.019607,-0.416150,0.076005,-0.831181,...,-0.438346,-0.063017,-0.185328,-0.521587,-0.394262,-0.714218,-0.799016,-1.143319,1.316702,1
577,-0.537802,-0.476063,-0.263144,-0.293009,-0.201277,-0.767734,0.098212,-0.691327,-0.071581,-0.446374,...,-0.438346,-0.063017,-0.255668,-0.920874,-0.394262,-0.163882,0.413069,-0.676792,1.316702,0
76,0.326933,1.097547,-0.130946,3.038008,2.391755,-0.487512,-0.082579,0.152247,-0.103421,-0.135086,...,1.073151,-0.057340,-0.213464,-0.444661,-0.143927,-0.741026,-0.242316,2.795991,-0.821683,1
599,1.260848,-0.085977,0.025847,0.100712,-0.201277,1.026370,-0.082579,-0.156011,-0.103421,0.383064,...,-0.416992,-0.051529,0.883837,-0.011489,-0.394262,-0.026810,1.177307,-0.804475,-0.669534,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-0.312971,0.905750,0.238654,-0.341811,-0.201277,-0.183368,-0.082579,1.110102,-0.103421,0.710945,...,-0.438346,-0.052274,-0.255668,0.342008,0.060801,-0.465036,-0.315387,-0.137747,1.316702,0
450,-0.468624,1.558569,-0.263144,-0.457458,-0.201277,-0.883923,-0.082579,-1.728878,-0.103421,-0.980172,...,-0.438346,-0.052150,-0.255668,-1.236824,-0.394262,0.023072,0.517403,-1.380618,1.316702,0
606,-0.010314,-0.197598,-0.263144,-0.133638,-0.201277,-0.870254,-0.082579,0.504112,-0.103421,0.108919,...,-0.061630,-0.048342,-0.068095,0.690926,-0.394262,0.263109,-1.005424,-1.129180,-0.796277,0
149,-0.615629,1.832286,-0.263144,-0.274264,-0.201277,0.011420,-0.082579,0.347728,-0.103421,-0.098937,...,-0.217110,-0.046168,0.142924,-0.114974,0.078609,-0.761871,-0.587711,1.771031,-0.764117,0


In [14]:
y_true = validation["target"]
y_pred = tabular_model.predict(validation)["prediction"]
evaluator = Evaluator(
                y_true=y_true,
                y_pred=y_pred,
                run_metrics=["mse", "f1", "accuracy"],
                metric="mse",
                problem_type="multiclass_classification",
            )
output_metrics = evaluator.evaluate_model()

Output()

In [15]:
output_metrics

{'mse': 0.0, 'accuracy': 1.0, 'f1': 1.0}