Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes #123

Merged
merged 53 commits into from
Sep 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
2c1412e
changed logging
D1MK4real Jul 19, 2023
bb95575
lint fix
D1MK4real Jul 19, 2023
3d76b96
changed logging
D1MK4real Jul 19, 2023
697ebfd
Added timm cv-library, bug-fix in multilabel linear model, added soft…
D1MK4real Jul 25, 2023
7f2616d
Merge branch 'bugfix/blender_weights' into NODE
D1MK4real Jul 31, 2023
af4eb74
Merge branch 'master' into timm_cv
dev-rinchin Aug 2, 2023
a9d1466
Added NODE neural network, added NODE example with tunning params
D1MK4real Aug 3, 2023
0608183
Merge branch 'master' into timm_cv
D1MK4real Aug 3, 2023
6415c28
added tutorial run
D1MK4real Aug 3, 2023
a2680de
new example
D1MK4real Aug 3, 2023
cc269dd
Merge branch 'master' into NODE
D1MK4real Aug 3, 2023
1f4b29d
chnged autonlp params
D1MK4real Aug 7, 2023
12c54b6
Merge branch 'timm_cv' into autoint++
D1MK4real Aug 8, 2023
81c444f
add autoint
D1MK4real Aug 10, 2023
eaa9001
added flatten versions of embeddings
D1MK4real Aug 10, 2023
294383f
not done still
D1MK4real Aug 15, 2023
984f4b0
not done still
D1MK4real Aug 15, 2023
fbc5076
not done still
D1MK4real Aug 15, 2023
3a62edb
-Lambda + MP
D1MK4real Aug 15, 2023
dc003fa
changed on comments
D1MK4real Aug 15, 2023
e7c9c91
Merge branch 'NODE' into autoint++
D1MK4real Aug 15, 2023
32bae0a
changes on comments
D1MK4real Aug 15, 2023
ade7c46
Merge branch 'timm_cv' into autoint++
D1MK4real Aug 15, 2023
a58df59
added changes on comments
D1MK4real Aug 15, 2023
4038d9e
resolve merge conflicts
D1MK4real Aug 15, 2023
788d381
resolve merge conflicts
D1MK4real Aug 15, 2023
f6fdb58
resolve merge conflicts
D1MK4real Aug 15, 2023
a7fe9be
PLR + SOFTEmb
D1MK4real Aug 16, 2023
f58b4c5
no-changes
D1MK4real Aug 28, 2023
e4e56ea
resolve merge conflicts
D1MK4real Aug 29, 2023
a50a90f
beautiful CV tutorial
D1MK4real Aug 29, 2023
c66b3c2
added some changes on comments
D1MK4real Aug 29, 2023
6b8648e
resolve mc
D1MK4real Aug 29, 2023
d045b8e
removed useless function
D1MK4real Aug 29, 2023
eb12a7f
Merge branch 'autoint++' into tabnet
D1MK4real Aug 29, 2023
382b385
removed for-for
D1MK4real Aug 29, 2023
4955b2e
WeightedEmbedder bugfix
D1MK4real Aug 30, 2023
6b332d5
Merge branch 'autoint++' into tabnet
D1MK4real Aug 30, 2023
1bdf9d5
delete unused import
D1MK4real Aug 30, 2023
b51e4de
changed link
D1MK4real Aug 30, 2023
439ed48
Merge branch 'autoint++' into tabnet
D1MK4real Aug 30, 2023
6d4a74e
add tabnet/plr/softemb
D1MK4real Aug 30, 2023
2557c4c
bugfix
D1MK4real Sep 1, 2023
42fd85f
changed import links
D1MK4real Sep 1, 2023
98aa0c7
changed import links
D1MK4real Sep 1, 2023
b462b01
resolve merge conflicts
D1MK4real Sep 6, 2023
7a8bf65
changed import links
D1MK4real Sep 6, 2023
94fdd76
bugfix
D1MK4real Sep 6, 2023
4e1aa5c
bugfix
D1MK4real Sep 6, 2023
1c4170e
some new changes
D1MK4real Sep 7, 2023
2035113
now we dont count VC for cat features for every embedding
D1MK4real Sep 7, 2023
0afe07f
no embedder bugfix
D1MK4real Sep 8, 2023
39beb9e
scheduler params
D1MK4real Sep 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lightautoml/automl/presets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ def upd_params(old: dict, new: dict) -> dict:
Updated parameters.

"""
not_updatable_params = ["scheduler_params"]
for k in new:
if type(new[k]) is dict and k in old and type(old[k]) is dict:
if type(new[k]) is dict and k in old and type(old[k]) is dict and k not in not_updatable_params:
upd_params(old[k], new[k])
else:
old[k] = new[k]
Expand Down
2 changes: 1 addition & 1 deletion lightautoml/automl/presets/tabular_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ nn_params:
# scheduler
sch: ReduceLROnPlateau
# params of ReduceLROnPlateau scheduler
scheduler_params: {} #{ 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 }
scheduler_params: { 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 }
# using snapshot ensembles
# https://arxiv.org/abs/1704.00109
is_snap: false
Expand Down
4 changes: 3 additions & 1 deletion lightautoml/ml_algo/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,9 @@ def fit_predict(self, train_valid_iterator: TrainValidIterator) -> NumpyDataset:
iterator_len = len(train_valid_iterator)
if iterator_len > 1:
logger.info("Start fitting \x1b[1m{}\x1b[0m ...".format(self._name))
logger.debug(f"Training params: {self.params}")
stop_params = ["cat_features", "cont_features", "cat_dims", "cat_vc"]
printable_params = {key: value for key, value in self.params.items() if key not in stop_params}
logger.debug(f"Training params: {printable_params}")

# save features names
self._features = train_valid_iterator.features
Expand Down
31 changes: 20 additions & 11 deletions lightautoml/ml_algo/dl_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,11 @@
"cat_no_dropout": BasicCatEmbeddingFlat,
"weighted": WeightedCatEmbeddingFlat,
}
cat_embedder_by_name = {"cat_no_dropout": BasicCatEmbedding, "weighted": WeightedCatEmbedding}
cat_embedder_by_name = {
"cat_no_dropout": BasicCatEmbedding,
"cat_no_dropout": BasicCatEmbedding,
"weighted": WeightedCatEmbedding,
}

cont_embedder_by_name_flat = {
"cont": ContEmbedder,
Expand All @@ -124,9 +128,13 @@
"plr": PLREmbeddingFlat,
"soft": SoftEmbeddingFlat,
}
cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding, "plr": PLREmbedding, "soft": SoftEmbedding}
cont_embedder_by_name_flat = {"cont": ContEmbedder, "linear": LinearEmbeddingFlat, "dense": DenseEmbeddingFlat}
cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding}
cont_embedder_by_name = {
"cont": LinearEmbedding,
"linear": LinearEmbedding,
"dense": DenseEmbedding,
"plr": PLREmbedding,
"soft": SoftEmbedding,
}


class TorchModel(TabularMLAlgo):
Expand Down Expand Up @@ -301,7 +309,7 @@ def _infer_params(self):
net_params={
"task": self.task,
"cont_embedder_": cont_embedder_by_name.get(params["cont_embedder"], LinearEmbedding)
if input_type_by_name[params["model"]] == "seq"
if input_type_by_name[params["model"]] == "seq" and is_cont
else cont_embedder_by_name_flat.get(params["cont_embedder"], ContEmbedder)
if is_cont
else None,
Expand All @@ -314,7 +322,7 @@ def _infer_params(self):
if is_cont
else None,
"cat_embedder_": cat_embedder_by_name.get(params["cat_embedder"], BasicCatEmbedding)
if input_type_by_name[params["model"]] == "seq"
if input_type_by_name[params["model"]] == "seq" and is_cat
else cat_embedder_by_name_flat.get(params["cat_embedder"], CatEmbedder)
if is_cat
else None,
Expand Down Expand Up @@ -423,11 +431,12 @@ def _init_params_on_input(self, train_valid_iterator) -> dict:
)
+ 1
)
values, counts = np.unique(
np.concatenate([train_valid_iterator.train[:, cat_feature].data, valid[:, cat_feature].data]),
return_counts=True,
)
cat_value_counts.append(dict(zip(values, counts)))
if params["cat_embedder"] == "weighted":
values, counts = np.unique(
np.concatenate([train_valid_iterator.train[:, cat_feature].data, valid[:, cat_feature].data]),
return_counts=True,
)
cat_value_counts.append(dict(zip(values, counts)))
cat_dims.append(num_unique_categories)
new_params["cat_dims"] = cat_dims
new_params["cat_vc"] = cat_value_counts
Expand Down
15 changes: 8 additions & 7 deletions lightautoml/ml_algo/torch_based/nn_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def __init__(
dropout_first: bool = True,
bn_momentum: float = 0.1,
ghost_batch: Optional[int] = 64,
leaky_gate: bool = True,
use_skip: bool = True,
leaky_gate: bool = True,
weighted_sum: bool = True,
device: torch.device = torch.device("cuda:0"),
**kwargs,
Expand All @@ -180,7 +180,7 @@ def __init__(
self.features.add_module("dense0", nn.Linear(n_in, num_features))

if leaky_gate:
self.features.add_module("leakygate0", LeakyGate(n_in))
self.features.add_module("leakygate0", LeakyGate(num_features))

if dropout_first and drop_rate[0] > 0:
self.features.add_module("dropout0", nn.Dropout(drop_rate[0]))
Expand Down Expand Up @@ -228,7 +228,7 @@ def forward(self, X: torch.Tensor) -> torch.Tensor:
x = X
input = x.detach().clone()
for name, layer in self.features.named_children():
if name != "denseblock1" and name != "dense0" and self.concat_input:
if name not in ["dropout0", "leakygate0", "denseblock1", "dense0"] and self.concat_input:
x = torch.cat([x, input], 1)
x = layer(x)
out = self.fc(x)
Expand Down Expand Up @@ -976,6 +976,7 @@ def __init__(
use_skip=mlp_use_skip,
device=device,
)
self.use_skip = True
if weighted_sum:
self.mix = nn.Parameter(torch.tensor([0.0], device=device))
else:
Expand Down Expand Up @@ -1127,16 +1128,16 @@ def __init__(
self,
n_in,
n_out,
n_d=8,
n_a=8,
n_steps=3,
n_d=32,
n_a=32,
n_steps=1,
gamma=1.3,
n_independent=2,
n_shared=2,
epsilon=1e-15,
virtual_batch_size=128,
momentum=0.02,
mask_type="sparsemax",
mask_type="entemax",
group_attention_matrix=None,
**kwargs,
):
Expand Down
3 changes: 2 additions & 1 deletion lightautoml/text/embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import torch.nn as nn
from torch import Tensor
import operator
import numpy as np

try:
from transformers import AutoModel
Expand Down Expand Up @@ -572,7 +573,7 @@ def get_out_shape(self) -> int:

def forward(self, x: Tensor) -> Tensor:
"""Forward-pass."""
x = self._cos_sin(2 * torch.pi * self.coefficients[None] * x[..., None])
x = self._cos_sin(2 * np.pi * self.coefficients[None] * x[..., None])
if self.flatten_output:
return x.view(x.shape[0], -1)
return x
Expand Down
57 changes: 44 additions & 13 deletions lightautoml/text/nn_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,34 @@ def __init__(
)

if bias is not None:
try:
last_layer = list(
filter(
lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential),
list(self.torch_model.children()),
)
)[-1]
self._set_last_layer(self.torch_model, bias)

self.сlump = Clump()
self.sig = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)

def _set_last_layer(self, torch_model, bias):
try:
use_skip = torch_model.use_skip
self._init_last_layers(torch_model, bias, use_skip)
except:
self._init_last_layers(torch_model, bias, False)

def _init_last_layers(self, torch_model, bias, use_skip=False):
try:
all_layers = list(torch_model.children())
layers = list(
filter(
lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential),
all_layers,
)
)
if len(layers) == 0:
last_layer = all_layers[-1]
self._set_last_layer(last_layer, bias)

else:
last_layer = layers[-1]
while isinstance(last_layer, nn.Sequential):
last_layer = list(
filter(lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), last_layer)
Expand All @@ -177,12 +198,22 @@ def __init__(
last_layer.bias.data = bias
shape = last_layer.weight.data.shape
last_layer.weight.data = torch.zeros(shape[0], shape[1], requires_grad=True)
except:
logger.info3("Last linear layer not founded, so init_bias=False")

self.сlump = Clump()
self.sig = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
if use_skip:
if len(layers) <= 1:
last_layer = all_layers[-2]
self._set_last_layer(last_layer, bias)
else:
pre_last_layer = layers[-2]
while isinstance(last_layer, nn.Sequential):
pre_last_layer = list(
filter(lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), pre_last_layer)
)[-1]
bias = torch.Tensor(bias)
pre_last_layer.bias.data = bias
shape = pre_last_layer.weight.data.shape
pre_last_layer.weight.data = torch.zeros(shape[0], shape[1], requires_grad=True)
except:
logger.info3("Last linear layer not founded, so init_bias=False")

def get_logits(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor:
"""Forward-pass of model with embeddings."""
Expand Down
Loading