Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ci:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: end-of-file-fixer
exclude: "setup.cfg"
Expand Down Expand Up @@ -48,15 +48,15 @@ repos:
)

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.2
rev: v0.6.4
hooks:
- id: ruff
args: ["--fix"]
- id: ruff-format
- id: ruff

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
rev: v3.1.0
hooks:
- id: prettier
files: \.(json|yml|yaml|toml)
Expand Down
112 changes: 61 additions & 51 deletions examples/PyTorch Tabular with Bank Marketing Dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.datasets import fetch_openml\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, log_loss"
"from sklearn.metrics import accuracy_score, log_loss\n",
"from sklearn.model_selection import train_test_split"
]
},
{
Expand Down Expand Up @@ -55,12 +54,23 @@
"metadata": {},
"outputs": [],
"source": [
"cat_cols = ['job', 'marital', 'education', 'default', 'housing',\n",
" 'loan', 'contact', 'day', 'month', 'campaign',\n",
" 'previous', 'poutcome']\n",
"cat_cols = [\n",
" \"job\",\n",
" \"marital\",\n",
" \"education\",\n",
" \"default\",\n",
" \"housing\",\n",
" \"loan\",\n",
" \"contact\",\n",
" \"day\",\n",
" \"month\",\n",
" \"campaign\",\n",
" \"previous\",\n",
" \"poutcome\",\n",
"]\n",
"\n",
"num_cols = ['age', 'balance', 'duration', 'pdays']\n",
"target=[\"y\"]"
"num_cols = [\"age\", \"balance\", \"duration\", \"pdays\"]\n",
"target = [\"y\"]"
]
},
{
Expand Down Expand Up @@ -96,8 +106,8 @@
"test_enc = test.copy()\n",
"for col in cat_cols:\n",
" enc = OrdinalEncoder(handle_unknown=\"use_encoded_value\", encoded_missing_value=np.nan, unknown_value=np.nan)\n",
" train_enc[col] = enc.fit_transform(train_enc[col].values.reshape(-1,1))\n",
" test_enc[col] = enc.transform(test_enc[col].values.reshape(-1,1))"
" train_enc[col] = enc.fit_transform(train_enc[col].values.reshape(-1, 1))\n",
" test_enc[col] = enc.transform(test_enc[col].values.reshape(-1, 1))"
]
},
{
Expand Down Expand Up @@ -153,15 +163,15 @@
"outputs": [],
"source": [
"from pytorch_tabular import TabularModel\n",
"from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig\n",
"from pytorch_tabular.models import (\n",
" CategoryEmbeddingModelConfig, \n",
" FTTransformerConfig, \n",
" TabNetModelConfig, \n",
" GatedAdditiveTreeEnsembleConfig, \n",
" TabTransformerConfig, \n",
" AutoIntConfig\n",
" AutoIntConfig,\n",
" CategoryEmbeddingModelConfig,\n",
" FTTransformerConfig,\n",
" GatedAdditiveTreeEnsembleConfig,\n",
" TabNetModelConfig,\n",
" TabTransformerConfig,\n",
")\n",
"from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig\n",
"from pytorch_tabular.models.common.heads import LinearHeadConfig"
]
},
Expand All @@ -183,29 +193,29 @@
"outputs": [],
"source": [
"data_config = DataConfig(\n",
" target=target, #target should always be a list.\n",
" target=target, # target should always be a list.\n",
" continuous_cols=num_cols,\n",
" categorical_cols=cat_cols,\n",
")\n",
"\n",
"trainer_config = TrainerConfig(\n",
"# auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate\n",
" # auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate\n",
" batch_size=256,\n",
" max_epochs=500,\n",
" early_stopping=\"valid_loss\", # Monitor valid_loss for early stopping\n",
" early_stopping_mode = \"min\", # Set the mode as min because for val_loss, lower is better\n",
" early_stopping_patience=5, # No. of epochs of degradation training will wait before terminating\n",
" checkpoints=\"valid_loss\", # Save best checkpoint monitoring val_loss\n",
" load_best=True, # After training, load the best checkpoint\n",
" early_stopping=\"valid_loss\", # Monitor valid_loss for early stopping\n",
" early_stopping_mode=\"min\", # Set the mode as min because for val_loss, lower is better\n",
" early_stopping_patience=5, # No. of epochs of degradation training will wait before terminating\n",
" checkpoints=\"valid_loss\", # Save best checkpoint monitoring val_loss\n",
" load_best=True, # After training, load the best checkpoint\n",
")\n",
"\n",
"optimizer_config = OptimizerConfig()\n",
"\n",
"head_config = LinearHeadConfig(\n",
" layers=\"\", # No additional layer in head, just a mapping layer to output_dim\n",
" layers=\"\", # No additional layer in head, just a mapping layer to output_dim\n",
" dropout=0.1,\n",
" initialization=\"kaiming\"\n",
").__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)"
" initialization=\"kaiming\",\n",
").__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)"
]
},
{
Expand Down Expand Up @@ -442,10 +452,10 @@
"model_config = CategoryEmbeddingModelConfig(\n",
" task=\"classification\",\n",
" layers=\"64-32\", # Number of nodes in each layer\n",
" activation=\"ReLU\", # Activation between each layers\n",
" learning_rate = 1e-3,\n",
" head = \"LinearHead\", #Linear Head\n",
" head_config = head_config, # Linear Head Config\n",
" activation=\"ReLU\", # Activation between each layers\n",
" learning_rate=1e-3,\n",
" head=\"LinearHead\", # Linear Head\n",
" head_config=head_config, # Linear Head Config\n",
")\n",
"\n",
"tabular_model = TabularModel(\n",
Expand All @@ -455,7 +465,7 @@
" trainer_config=trainer_config,\n",
")\n",
"tabular_model.fit(train=train)\n",
"tabular_model.evaluate(test)\n"
"tabular_model.evaluate(test)"
]
},
{
Expand Down Expand Up @@ -709,9 +719,9 @@
"source": [
"model_config = GatedAdditiveTreeEnsembleConfig(\n",
" task=\"classification\",\n",
" learning_rate = 1e-3,\n",
" head = \"LinearHead\", #Linear Head\n",
" head_config = head_config, # Linear Head Config\n",
" learning_rate=1e-3,\n",
" head=\"LinearHead\", # Linear Head\n",
" head_config=head_config, # Linear Head Config\n",
")\n",
"\n",
"tabular_model = TabularModel(\n",
Expand Down Expand Up @@ -983,13 +993,13 @@
"source": [
"model_config = GatedAdditiveTreeEnsembleConfig(\n",
" task=\"classification\",\n",
" learning_rate = 1e-3,\n",
" head = \"LinearHead\", #Linear Head\n",
" head_config = head_config, # Linear Head Config\n",
" learning_rate=1e-3,\n",
" head=\"LinearHead\", # Linear Head\n",
" head_config=head_config, # Linear Head Config\n",
" gflu_stages=4,\n",
" num_trees=30,\n",
" tree_depth=5,\n",
" chain_trees=False\n",
" chain_trees=False,\n",
")\n",
"\n",
"tabular_model = TabularModel(\n",
Expand Down Expand Up @@ -1265,9 +1275,9 @@
"source": [
"model_config = FTTransformerConfig(\n",
" task=\"classification\",\n",
" learning_rate = 1e-3,\n",
" head = \"LinearHead\", #Linear Head\n",
" head_config = head_config, # Linear Head Config\n",
" learning_rate=1e-3,\n",
" head=\"LinearHead\", # Linear Head\n",
" head_config=head_config, # Linear Head Config\n",
")\n",
"\n",
"tabular_model = TabularModel(\n",
Expand Down Expand Up @@ -1543,9 +1553,9 @@
"source": [
"model_config = TabTransformerConfig(\n",
" task=\"classification\",\n",
" learning_rate = 1e-3,\n",
" head = \"LinearHead\", #Linear Head\n",
" head_config = head_config, # Linear Head Config\n",
" learning_rate=1e-3,\n",
" head=\"LinearHead\", # Linear Head\n",
" head_config=head_config, # Linear Head Config\n",
")\n",
"\n",
"tabular_model = TabularModel(\n",
Expand Down Expand Up @@ -1819,9 +1829,9 @@
"source": [
"model_config = AutoIntConfig(\n",
" task=\"classification\",\n",
" learning_rate = 1e-3,\n",
" head = \"LinearHead\", #Linear Head\n",
" head_config = head_config, # Linear Head Config\n",
" learning_rate=1e-3,\n",
" head=\"LinearHead\", # Linear Head\n",
" head_config=head_config, # Linear Head Config\n",
")\n",
"\n",
"tabular_model = TabularModel(\n",
Expand Down Expand Up @@ -2095,9 +2105,9 @@
"source": [
"model_config = TabNetModelConfig(\n",
" task=\"classification\",\n",
" learning_rate = 1e-3,\n",
" head = \"LinearHead\", #Linear Head\n",
" head_config = head_config, # Linear Head Config\n",
" learning_rate=1e-3,\n",
" head=\"LinearHead\", # Linear Head\n",
" head_config=head_config, # Linear Head Config\n",
")\n",
"\n",
"tabular_model = TabularModel(\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/__only_for_dev__/to_test_classification.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path

import pandas as pd
from sklearn.model_selection import train_test_split

# from torch.utils import data
from pytorch_tabular.config import DataConfig, ExperimentConfig, OptimizerConfig, TrainerConfig
Expand All @@ -9,7 +10,6 @@

# import wget
from pytorch_tabular.utils import get_class_weighted_cross_entropy
from sklearn.model_selection import train_test_split

# torch.manual_seed(0)
# np.random.seed(0)
Expand Down
3 changes: 2 additions & 1 deletion examples/__only_for_dev__/to_test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing, fetch_covtype

from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.node import NodeConfig
from pytorch_tabular.tabular_model import TabularModel
from sklearn.datasets import fetch_california_housing, fetch_covtype


def regression_data():
Expand Down
3 changes: 2 additions & 1 deletion examples/__only_for_dev__/to_test_regression.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pandas as pd
import torch
from sklearn.datasets import fetch_california_housing

from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.category_embedding.config import CategoryEmbeddingModelConfig
from pytorch_tabular.tabular_model import TabularModel
from sklearn.datasets import fetch_california_housing

# from pytorch_tabular.models.mixture_density import (
# CategoryEmbeddingMDNConfig,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import torch
import torch.nn as nn
from omegaconf import DictConfig
from sklearn.datasets import fetch_california_housing

from pytorch_tabular.config import DataConfig, ModelConfig, OptimizerConfig, TrainerConfig

# from pytorch_tabular.models.deep_gmm import (
Expand All @@ -14,7 +16,6 @@

# from pytorch_tabular.models.node import utils as utils
from pytorch_tabular.tabular_model import TabularModel
from sklearn.datasets import fetch_california_housing


@dataclass
Expand Down
3 changes: 2 additions & 1 deletion examples/covertype_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

import pandas as pd
import wget
from sklearn.model_selection import train_test_split

from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models import CategoryEmbeddingModelConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig
from pytorch_tabular.tabular_model import TabularModel
from sklearn.model_selection import train_test_split

BASE_DIR = Path.home().joinpath("data")
datafile = BASE_DIR.joinpath("covtype.data.gz")
Expand Down
3 changes: 2 additions & 1 deletion examples/covertype_classification_using_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import pandas as pd
import wget
from pytorch_tabular.tabular_model import TabularModel
from sklearn.model_selection import train_test_split

from pytorch_tabular.tabular_model import TabularModel

BASE_DIR = Path.home().joinpath("data")
datafile = BASE_DIR.joinpath("covtype.data.gz")
datafile.parent.mkdir(parents=True, exist_ok=True)
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python

"""The setup script."""

import os

from setuptools import find_packages, setup
Expand Down
1 change: 1 addition & 0 deletions src/pytorch_tabular/categorical_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# For license information, see LICENSE.TXT
# Modified https://github.com/tcassou/mlencoders/blob/master/mlencoders/base_encoder.py to suit NN encoding
"""Category Encoders."""

from pandas import DataFrame, Series, unique

try:
Expand Down
1 change: 1 addition & 0 deletions src/pytorch_tabular/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Author: Manu Joseph <manujoseph@gmail.com>
# For license information, see LICENSE.TXT
"""Config."""

import os
import re
from dataclasses import MISSING, dataclass, field
Expand Down
1 change: 1 addition & 0 deletions src/pytorch_tabular/models/autoint/autoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# For license information, see LICENSE.TXT
# Inspired by https://github.com/rixwew/pytorch-fm/blob/master/torchfm/model/afi.py
"""AutomaticFeatureInteraction Model."""

import torch
import torch.nn as nn
from omegaconf import DictConfig
Expand Down
1 change: 1 addition & 0 deletions src/pytorch_tabular/models/autoint/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Author: Manu Joseph <manujoseph@gmail.com>
# For license information, see LICENSE.TXT
"""AutomaticFeatureInteraction Config."""

from dataclasses import dataclass, field
from typing import Optional

Expand Down
1 change: 1 addition & 0 deletions src/pytorch_tabular/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Author: Manu Joseph <manujoseph@gmail.com>
# For license information, see LICENSE.TXT
"""Base Model."""

import importlib
import warnings
from abc import ABCMeta, abstractmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Author: Manu Joseph <manujoseph@gmail.com>
# For license information, see LICENSE.TXT
"""Category Embedding Model."""

import torch
import torch.nn as nn
from omegaconf import DictConfig
Expand Down
1 change: 1 addition & 0 deletions src/pytorch_tabular/models/category_embedding/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Author: Manu Joseph <manujoseph@gmail.com>
# For license information, see LICENSE.TXT
"""Category Embedding Model Config."""

from dataclasses import dataclass, field

from pytorch_tabular.config import ModelConfig
Expand Down
Loading