Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/__only_for_dev__/to_test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_regression(
continuous_feature_transform=continuous_feature_transform,
normalize_continuous_features=normalize_continuous_features,
)
model_config_params = dict(task="regression", depth=2, embed_categorical=embed_categorical)
model_config_params = {"task": "regression", "depth": 2, "embed_categorical": embed_categorical}
model_config = NodeConfig(**model_config_params)
# model_config_params = dict(task="regression")
# model_config = NodeConfig(**model_config_params)
Expand Down Expand Up @@ -98,7 +98,7 @@ def test_classification(
continuous_feature_transform=continuous_feature_transform,
normalize_continuous_features=normalize_continuous_features,
)
model_config_params = dict(task="classification", depth=2, embed_categorical=embed_categorical)
model_config_params = {"task": "classification", "depth": 2, "embed_categorical": embed_categorical}
model_config = NodeConfig(**model_config_params)
trainer_config = TrainerConfig(max_epochs=1, checkpoints=None, early_stopping=None)
optimizer_config = OptimizerConfig()
Expand Down
2 changes: 1 addition & 1 deletion examples/__only_for_dev__/to_test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
# batch_norm_continuous_input=True,
# attention_pooling=True,
# )
model_config = CategoryEmbeddingModelConfig(task="regression", dropout=0.2, head_config=dict(layers="32-16"))
model_config = CategoryEmbeddingModelConfig(task="regression", dropout=0.2, head_config={"layers": "32-16"})

trainer_config = TrainerConfig(
# checkpoints=None,
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ select = [
# "D", # see: https://pypi.org/project/pydocstyle
# "N", # see: https://pypi.org/project/pep8-naming
]
extend-select = [
"C4", # see: https://pypi.org/project/flake8-comprehensions
# "SIM", # see: https://pypi.org/project/flake8-simplify
# "RET", # see: https://pypi.org/project/flake8-return
# "PT", # see: https://pypi.org/project/flake8-pytest-style
]
ignore = [
"E731", # Do not assign a lambda expression, use a def
]
Expand Down
6 changes: 3 additions & 3 deletions src/pytorch_tabular/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ class InferredConfig:
def __post_init__(self):
if self.embedding_dims is not None:
assert all(
[(isinstance(t, Iterable) and len(t) == 2) for t in self.embedding_dims]
(isinstance(t, Iterable) and len(t) == 2) for t in self.embedding_dims
), "embedding_dims must be a list of tuples (cardinality, embedding_dim)"
self.embedded_cat_dim = sum([t[1] for t in self.embedding_dims])
else:
Expand Down Expand Up @@ -468,7 +468,7 @@ class TrainerConfig:
metadata={"help": "The number of epochs to wait until there is no further improvements in loss/metric"},
)
early_stopping_kwargs: Optional[Dict[str, Any]] = field(
default_factory=lambda: dict(),
default_factory=lambda: {},
metadata={
"help": "Additional keyword arguments for the early stopping callback."
" See the documentation for the PyTorch Lightning EarlyStopping callback for more details."
Expand Down Expand Up @@ -505,7 +505,7 @@ class TrainerConfig:
metadata={"help": "The number of best models to save"},
)
checkpoints_kwargs: Optional[Dict[str, Any]] = field(
default_factory=lambda: dict(),
default_factory=lambda: {},
metadata={
"help": "Additional keyword arguments for the checkpoints callback. See the documentation"
" for the PyTorch Lightning ModelCheckpoint callback for more details."
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
continue
batch[k] = v.to(self.tabular_model.model.device)
if self.tabular_model.config.task == "ssl":
ret_value = dict(backbone_features=self.tabular_model.model.predict(batch, ret_model_output=True))
ret_value = {"backbone_features": self.tabular_model.model.predict(batch, ret_model_output=True)}
else:
_, ret_value = self.tabular_model.model.predict(batch, ret_model_output=True)
for k in self.extract_keys:
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ def create_plotly_histogram(self, arr, name, bin_dict=None):
# Overlay both histograms
fig.update_layout(
barmode="overlay",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1},
)
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.5)
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/models/common/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def __init__(
def forward(self, x):
h = self.n_heads
q, k, v = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), (q, k, v))
q, k, v = (rearrange(t, "b n (h d) -> b h n d", h=h) for t in (q, k, v))
sim = einsum("b h i d, b h j d -> b h i j", q, k) * self.scale

attn = sim.softmax(dim=-1)
Expand Down
16 changes: 8 additions & 8 deletions src/pytorch_tabular/models/ft_transformer/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def __post_init__(self):
" and `out_ff_initialization` as head_config is passed."
)
else:
if any([p is not None for p in deprecated_args]):
if any(p is not None for p in deprecated_args):
warnings.warn(
"The `out_ff_layers`, `out_ff_activation`, `out_ff_dropoout`, and `out_ff_initialization`"
" arguments are deprecated and will be removed next release."
Expand All @@ -263,13 +263,13 @@ def __post_init__(self):
)
# TODO: Remove this once we deprecate the old config
# Fill the head_config using deprecated parameters
self.head_config = dict(
layers=ifnone(self.out_ff_layers, ""),
activation=ifnone(self.out_ff_activation, "ReLU"),
dropout=ifnone(self.out_ff_dropout, 0.0),
use_batch_norm=False,
initialization=ifnone(self.out_ff_initialization, "kaiming"),
)
self.head_config = {
"layers": ifnone(self.out_ff_layers, ""),
"activation": ifnone(self.out_ff_activation, "ReLU"),
"dropout": ifnone(self.out_ff_dropout, 0.0),
"use_batch_norm": False,
"initialization": ifnone(self.out_ff_initialization, "kaiming"),
}

return super().__post_init__()

Expand Down
6 changes: 3 additions & 3 deletions src/pytorch_tabular/models/mixture_density/mdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ def _build_network(self):
def forward(self, x: Dict):
if isinstance(self.backbone, TabTransformerBackbone):
if self.hparams.categorical_dim > 0:
x_cat = self.embed_input(dict(categorical=x["categorical"]))
x = self.compute_backbone(dict(categorical=x_cat, continuous=x["continuous"]))
x_cat = self.embed_input({"categorical": x["categorical"]})
x = self.compute_backbone({"categorical": x_cat, "continuous": x["continuous"]})
else:
x = self.embedding_layer(x)
x = self.compute_backbone(x)
Expand Down Expand Up @@ -230,7 +230,7 @@ def validation_epoch_end(self, outputs) -> None:
commit=False,
)
if self.head.hparams.log_debug_plot:
fig = self.create_plotly_histogram(pi, "pi", bin_dict=dict(start=0.0, end=1.0, size=0.1))
fig = self.create_plotly_histogram(pi, "pi", bin_dict={"start": 0.0, "end": 1.0, "size": 0.1})
wandb.log(
{
"valid_pi": fig,
Expand Down
16 changes: 8 additions & 8 deletions src/pytorch_tabular/models/tab_transformer/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def __post_init__(self):
" and `out_ff_initialization` as head_config is passed."
)
else:
if any([p is not None for p in deprecated_args]):
if any(p is not None for p in deprecated_args):
warnings.warn(
"The `out_ff_layers`, `out_ff_activation`, `out_ff_dropoout`, and `out_ff_initialization`"
" arguments are deprecated and will be removed next release."
Expand All @@ -252,13 +252,13 @@ def __post_init__(self):
)
# TODO: Remove this once we deprecate the old config
# Fill the head_config using deprecated parameters
self.head_config = dict(
layers=ifnone(self.out_ff_layers, ""),
activation=ifnone(self.out_ff_activation, "ReLU"),
dropout=ifnone(self.out_ff_dropout, 0.0),
use_batch_norm=False,
initialization=ifnone(self.out_ff_initialization, "kaiming"),
)
self.head_config = {
"layers": ifnone(self.out_ff_layers, ""),
"activation": ifnone(self.out_ff_activation, "ReLU"),
"dropout": ifnone(self.out_ff_dropout, 0.0),
"use_batch_norm": False,
"initialization": ifnone(self.out_ff_initialization, "kaiming"),
}
return super().__post_init__()


Expand Down
4 changes: 2 additions & 2 deletions src/pytorch_tabular/models/tab_transformer/tab_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,10 @@ def _build_network(self):
# Redefining forward because this model flow is slightly different
def forward(self, x: Dict):
if self.hparams.categorical_dim > 0:
x_cat = self.embed_input(dict(categorical=x["categorical"]))
x_cat = self.embed_input({"categorical": x["categorical"]})
else:
x_cat = None
x = self.compute_backbone(dict(categorical=x_cat, continuous=x["continuous"]))
x = self.compute_backbone({"categorical": x_cat, "continuous": x["continuous"]})
return self.compute_head(x)

# Redefining compute_backbone because this model flow is slightly different
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/models/tabnet/tabnet_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _build_network(self):
n_a=self.hparams.n_a,
n_steps=self.hparams.n_steps,
gamma=self.hparams.gamma,
cat_idxs=[i for i in range(self.hparams.categorical_dim)],
cat_idxs=list(range(self.hparams.categorical_dim)),
cat_dims=[cardinality for cardinality, _ in self.hparams.embedding_dims],
cat_emb_dim=[embed_dim for _, embed_dim in self.hparams.embedding_dims],
n_independent=self.hparams.n_independent,
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/ssl_models/common/heads.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, in_features, n_binary=0, n_categorical=0, n_numerical=0, card
self.numerical_linear = nn.Linear(in_features, n_numerical) if n_numerical else None

def forward(self, features):
outputs = dict()
outputs = {}

if self.binary_linear:
outputs["binary"] = self.binary_linear(features)
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/ssl_models/dae/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class DenoisingAutoEncoderConfig(SSLModelConfig):
)
# Union not supported by omegaconf. Currently Union[float, Dict[str, float]]
noise_probabilities: Dict[str, float] = field(
default_factory=lambda: dict(),
default_factory=lambda: {},
metadata={
"help": "Dict of individual probabilities to corrupt the input features with swap/zero noise."
" Key should be the feature name and if any feature is missing,"
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/ssl_models/dae/dae.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def forward(self, x: Dict):
reconstructed_in = self.reconstruction(z_hat)
# mask reconstruction
reconstructed_mask = self.mask_reconstruction(z_hat)
output_dict = dict(mask=self.output_tuple(mask, reconstructed_mask))
output_dict = {"mask": self.output_tuple(mask, reconstructed_mask)}
if "continuous" in reconstructed_in.keys():
output_dict["continuous"] = self.output_tuple(
torch.cat(
Expand Down
12 changes: 6 additions & 6 deletions src/pytorch_tabular/tabular_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,19 @@ class TabularDatamodule(pl.LightningDataModule):
CONTINUOUS_TRANSFORMS = {
"quantile_uniform": {
"callable": QuantileTransformer,
"params": dict(output_distribution="uniform", random_state=None),
"params": {"output_distribution": "uniform", "random_state": None},
},
"quantile_normal": {
"callable": QuantileTransformer,
"params": dict(output_distribution="normal", random_state=None),
"params": {"output_distribution": "normal", "random_state": None},
},
"box-cox": {
"callable": PowerTransformer,
"params": dict(method="box-cox", standardize=False),
"params": {"method": "box-cox", "standardize": False},
},
"yeo-johnson": {
"callable": PowerTransformer,
"params": dict(method="yeo-johnson", standardize=False),
"params": {"method": "yeo-johnson", "standardize": False},
},
}

Expand Down Expand Up @@ -224,7 +224,7 @@ def _label_encode_target(self, data: pd.DataFrame, stage: str) -> pd.DataFrame:
def _target_transform(self, data: pd.DataFrame, stage: str) -> pd.DataFrame:
if self.config.task == "regression":
# target transform only for regression
if all([col in data.columns for col in self.config.target]):
if all(col in data.columns for col in self.config.target):
if self.do_target_transform:
if stage == "fit":
target_transforms = []
Expand Down Expand Up @@ -607,7 +607,7 @@ def prepare_inference_dataloader(self, df: pd.DataFrame, batch_size: Optional[in
categorical_cols=self.config.categorical_cols,
continuous_cols=self.config.continuous_cols,
embed_categorical=(not self.do_leave_one_out_encoder()),
target=self.target if all([col in df.columns for col in self.target]) else None,
target=self.target if all(col in df.columns for col in self.target) else None,
)
return DataLoader(
dataset,
Expand Down
10 changes: 5 additions & 5 deletions src/pytorch_tabular/tabular_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ def _run_validation(self):
if self.config.target_range is not None:
if (
(len(self.config.target_range) != len(self.config.target))
or any([len(range_) != 2 for range_ in self.config.target_range])
or any([range_[0] > range_[1] for range_ in self.config.target_range])
or any(len(range_) != 2 for range_ in self.config.target_range)
or any(range_[0] > range_[1] for range_ in self.config.target_range)
):
raise ValueError(
"Targe Range, if defined, should be list tuples of length two(min,max)."
Expand Down Expand Up @@ -1159,7 +1159,7 @@ def predict(
" Please set it explicitly.",
DeprecationWarning,
)
assert all([q <= 1 and q >= 0 for q in quantiles]), "Quantiles should be a decimal between 0 and 1"
assert all(q <= 1 and q >= 0 for q in quantiles), "Quantiles should be a decimal between 0 and 1"
if device is not None:
if isinstance(device, str):
device = torch.device(device)
Expand Down Expand Up @@ -1328,7 +1328,7 @@ def save_model_for_inference(
self,
path: Union[str, Path],
kind: str = "pytorch",
onnx_export_params: Dict = dict(opset_version=12),
onnx_export_params: Dict = {"opset_version": 12},
) -> bool:
"""Saves the model for inference.

Expand Down Expand Up @@ -1362,7 +1362,7 @@ def save_model_for_inference(
len(self.config.continuous_cols),
requires_grad=True,
)
x = dict(continuous=cont, categorical=cat)
x = {"continuous": cont, "categorical": cat}
torch.onnx.export(self.model, x, str(path), **onnx_export_params)
return True
else:
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_tabular/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_logger(name):


def _make_smooth_weights_for_balanced_classes(y_train, mu=1.0):
labels_dict = {label: count for label, count in zip(np.unique(y_train), np.bincount(y_train))}
labels_dict = dict(zip(np.unique(y_train), np.bincount(y_train)))
total = np.sum(list(labels_dict.values()))
keys = sorted(labels_dict.keys())
weight = []
Expand Down
4 changes: 2 additions & 2 deletions tests/test_autoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_regression(
continuous_feature_transform=continuous_feature_transform,
normalize_continuous_features=normalize_continuous_features,
)
model_config_params = dict(task="regression")
model_config_params = {"task": "regression"}
if target_range:
_target_range = []
for target in data_config.target:
Expand Down Expand Up @@ -112,7 +112,7 @@ def test_classification(
continuous_feature_transform=continuous_feature_transform,
normalize_continuous_features=normalize_continuous_features,
)
model_config_params = dict(task="classification")
model_config_params = {"task": "classification"}
model_config_params["deep_layers"] = deep_layers
model_config_params["batch_norm_continuous_input"] = batch_norm_continuous_input
model_config = AutoIntConfig(**model_config_params)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_categorical_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_regression(
continuous_feature_transform=continuous_feature_transform,
normalize_continuous_features=normalize_continuous_features,
)
model_config_params = dict(task="regression")
model_config_params = {"task": "regression"}
if target_range:
_target_range = []
for target in data_config.target:
Expand All @@ -86,7 +86,7 @@ def test_regression(
model_config_params["target_range"] = _target_range
if custom_head_config is not None:
model_config_params["head"] = "LinearHead"
model_config_params["head_config"] = dict(layers=custom_head_config)
model_config_params["head_config"] = {"layers": custom_head_config}
model_config = CategoryEmbeddingModelConfig(**model_config_params)
trainer_config = TrainerConfig(
max_epochs=3,
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_classification(
continuous_feature_transform=continuous_feature_transform,
normalize_continuous_features=normalize_continuous_features,
)
model_config_params = dict(task="classification")
model_config_params = {"task": "classification"}
model_config = CategoryEmbeddingModelConfig(**model_config_params)
trainer_config = TrainerConfig(
max_epochs=3,
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_embedding_transformer(regression_data):
],
categorical_cols=["HouseAgeBin"],
)
model_config_params = dict(task="regression")
model_config_params = {"task": "regression"}
model_config = CategoryEmbeddingModelConfig(**model_config_params)
trainer_config = TrainerConfig(
max_epochs=1,
Expand All @@ -215,4 +215,4 @@ def test_embedding_transformer(regression_data):
train_transform = transformer.fit_transform(train)
embed_cols = [col for col in train_transform.columns if "HouseAgeBin_embed_dim" in col]
assert len(train["HouseAgeBin"].unique()) + 1 == len(transformer._mapping["HouseAgeBin"].keys())
assert all([val.shape[0] == len(embed_cols) for val in transformer._mapping["HouseAgeBin"].values()])
assert all(val.shape[0] == len(embed_cols) for val in transformer._mapping["HouseAgeBin"].values())
Loading