sb-ai-lab · DESimakov · Sep 9, 2023 · Jul 19, 2023 · Jul 19, 2023 · Jul 19, 2023
diff --git a/lightautoml/automl/presets/base.py b/lightautoml/automl/presets/base.py
@@ -37,8 +37,9 @@ def upd_params(old: dict, new: dict) -> dict:
         Updated parameters.
 
     """
+    not_updatable_params = ["scheduler_params"]
     for k in new:
-        if type(new[k]) is dict and k in old and type(old[k]) is dict:
+        if type(new[k]) is dict and k in old and type(old[k]) is dict and k not in not_updatable_params:
             upd_params(old[k], new[k])
         else:
             old[k] = new[k]

diff --git a/lightautoml/automl/presets/tabular_config.yml b/lightautoml/automl/presets/tabular_config.yml
@@ -184,7 +184,7 @@ nn_params:
   # scheduler
   sch: ReduceLROnPlateau
   # params of ReduceLROnPlateau scheduler
-  scheduler_params: {} #{ 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 }
+  scheduler_params: { 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 }
   # using snapshot ensembles
   # https://arxiv.org/abs/1704.00109
   is_snap: false

diff --git a/lightautoml/ml_algo/base.py b/lightautoml/ml_algo/base.py
@@ -240,7 +240,9 @@ def fit_predict(self, train_valid_iterator: TrainValidIterator) -> NumpyDataset:
         iterator_len = len(train_valid_iterator)
         if iterator_len > 1:
             logger.info("Start fitting \x1b[1m{}\x1b[0m ...".format(self._name))
-            logger.debug(f"Training params: {self.params}")
+            stop_params = ["cat_features", "cont_features", "cat_dims", "cat_vc"]
+            printable_params = {key: value for key, value in self.params.items() if key not in stop_params}
+            logger.debug(f"Training params: {printable_params}")
 
         # save features names
         self._features = train_valid_iterator.features

diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py
@@ -115,7 +115,11 @@
     "cat_no_dropout": BasicCatEmbeddingFlat,
     "weighted": WeightedCatEmbeddingFlat,
 }
-cat_embedder_by_name = {"cat_no_dropout": BasicCatEmbedding, "weighted": WeightedCatEmbedding}
+cat_embedder_by_name = {
+    "cat_no_dropout": BasicCatEmbedding,
+    "cat_no_dropout": BasicCatEmbedding,
+    "weighted": WeightedCatEmbedding,
+}
 
 cont_embedder_by_name_flat = {
     "cont": ContEmbedder,
@@ -124,9 +128,13 @@
     "plr": PLREmbeddingFlat,
     "soft": SoftEmbeddingFlat,
 }
-cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding, "plr": PLREmbedding, "soft": SoftEmbedding}
-cont_embedder_by_name_flat = {"cont": ContEmbedder, "linear": LinearEmbeddingFlat, "dense": DenseEmbeddingFlat}
-cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding}
+cont_embedder_by_name = {
+    "cont": LinearEmbedding,
+    "linear": LinearEmbedding,
+    "dense": DenseEmbedding,
+    "plr": PLREmbedding,
+    "soft": SoftEmbedding,
+}
 
 
 class TorchModel(TabularMLAlgo):
@@ -301,7 +309,7 @@ def _infer_params(self):
             net_params={
                 "task": self.task,
                 "cont_embedder_": cont_embedder_by_name.get(params["cont_embedder"], LinearEmbedding)
-                if input_type_by_name[params["model"]] == "seq"
+                if input_type_by_name[params["model"]] == "seq" and is_cont
                 else cont_embedder_by_name_flat.get(params["cont_embedder"], ContEmbedder)
                 if is_cont
                 else None,
@@ -314,7 +322,7 @@ def _infer_params(self):
                 if is_cont
                 else None,
                 "cat_embedder_": cat_embedder_by_name.get(params["cat_embedder"], BasicCatEmbedding)
-                if input_type_by_name[params["model"]] == "seq"
+                if input_type_by_name[params["model"]] == "seq" and is_cat
                 else cat_embedder_by_name_flat.get(params["cat_embedder"], CatEmbedder)
                 if is_cat
                 else None,
@@ -423,11 +431,12 @@ def _init_params_on_input(self, train_valid_iterator) -> dict:
                 )
                 + 1
             )
-            values, counts = np.unique(
-                np.concatenate([train_valid_iterator.train[:, cat_feature].data, valid[:, cat_feature].data]),
-                return_counts=True,
-            )
-            cat_value_counts.append(dict(zip(values, counts)))
+            if params["cat_embedder"] == "weighted":
+                values, counts = np.unique(
+                    np.concatenate([train_valid_iterator.train[:, cat_feature].data, valid[:, cat_feature].data]),
+                    return_counts=True,
+                )
+                cat_value_counts.append(dict(zip(values, counts)))
             cat_dims.append(num_unique_categories)
         new_params["cat_dims"] = cat_dims
         new_params["cat_vc"] = cat_value_counts

diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py
@@ -154,8 +154,8 @@ def __init__(
         dropout_first: bool = True,
         bn_momentum: float = 0.1,
         ghost_batch: Optional[int] = 64,
-        leaky_gate: bool = True,
         use_skip: bool = True,
+        leaky_gate: bool = True,
         weighted_sum: bool = True,
         device: torch.device = torch.device("cuda:0"),
         **kwargs,
@@ -180,7 +180,7 @@ def __init__(
             self.features.add_module("dense0", nn.Linear(n_in, num_features))
 
         if leaky_gate:
-            self.features.add_module("leakygate0", LeakyGate(n_in))
+            self.features.add_module("leakygate0", LeakyGate(num_features))
 
         if dropout_first and drop_rate[0] > 0:
             self.features.add_module("dropout0", nn.Dropout(drop_rate[0]))
@@ -228,7 +228,7 @@ def forward(self, X: torch.Tensor) -> torch.Tensor:
         x = X
         input = x.detach().clone()
         for name, layer in self.features.named_children():
-            if name != "denseblock1" and name != "dense0" and self.concat_input:
+            if name not in ["dropout0", "leakygate0", "denseblock1", "dense0"] and self.concat_input:
                 x = torch.cat([x, input], 1)
             x = layer(x)
         out = self.fc(x)
@@ -976,6 +976,7 @@ def __init__(
                 use_skip=mlp_use_skip,
                 device=device,
             )
+            self.use_skip = True
             if weighted_sum:
                 self.mix = nn.Parameter(torch.tensor([0.0], device=device))
             else:
@@ -1127,16 +1128,16 @@ def __init__(
         self,
         n_in,
         n_out,
-        n_d=8,
-        n_a=8,
-        n_steps=3,
+        n_d=32,
+        n_a=32,
+        n_steps=1,
         gamma=1.3,
         n_independent=2,
         n_shared=2,
         epsilon=1e-15,
         virtual_batch_size=128,
         momentum=0.02,
-        mask_type="sparsemax",
+        mask_type="entemax",
         group_attention_matrix=None,
         **kwargs,
     ):

diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py
@@ -12,6 +12,7 @@
 import torch.nn as nn
 from torch import Tensor
 import operator
+import numpy as np
 
 try:
     from transformers import AutoModel
@@ -572,7 +573,7 @@ def get_out_shape(self) -> int:
 
     def forward(self, x: Tensor) -> Tensor:
         """Forward-pass."""
-        x = self._cos_sin(2 * torch.pi * self.coefficients[None] * x[..., None])
+        x = self._cos_sin(2 * np.pi * self.coefficients[None] * x[..., None])
         if self.flatten_output:
             return x.view(x.shape[0], -1)
         return x

diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py
@@ -162,13 +162,34 @@ def __init__(
         )
 
         if bias is not None:
-            try:
-                last_layer = list(
-                    filter(
-                        lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential),
-                        list(self.torch_model.children()),
-                    )
-                )[-1]
+            self._set_last_layer(self.torch_model, bias)
+
+        self.сlump = Clump()
+        self.sig = nn.Sigmoid()
+        self.softmax = nn.Softmax(dim=1)
+
+    def _set_last_layer(self, torch_model, bias):
+        try:
+            use_skip = torch_model.use_skip
+            self._init_last_layers(torch_model, bias, use_skip)
+        except:
+            self._init_last_layers(torch_model, bias, False)
+
+    def _init_last_layers(self, torch_model, bias, use_skip=False):
+        try:
+            all_layers = list(torch_model.children())
+            layers = list(
+                filter(
+                    lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential),
+                    all_layers,
+                )
+            )
+            if len(layers) == 0:
+                last_layer = all_layers[-1]
+                self._set_last_layer(last_layer, bias)
+
+            else:
+                last_layer = layers[-1]
                 while isinstance(last_layer, nn.Sequential):
                     last_layer = list(
                         filter(lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), last_layer)
@@ -177,12 +198,22 @@ def __init__(
                 last_layer.bias.data = bias
                 shape = last_layer.weight.data.shape
                 last_layer.weight.data = torch.zeros(shape[0], shape[1], requires_grad=True)
-            except:
-                logger.info3("Last linear layer not founded, so init_bias=False")
-
-        self.сlump = Clump()
-        self.sig = nn.Sigmoid()
-        self.softmax = nn.Softmax(dim=1)
+            if use_skip:
+                if len(layers) <= 1:
+                    last_layer = all_layers[-2]
+                    self._set_last_layer(last_layer, bias)
+                else:
+                    pre_last_layer = layers[-2]
+                    while isinstance(last_layer, nn.Sequential):
+                        pre_last_layer = list(
+                            filter(lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), pre_last_layer)
+                        )[-1]
+                    bias = torch.Tensor(bias)
+                    pre_last_layer.bias.data = bias
+                    shape = pre_last_layer.weight.data.shape
+                    pre_last_layer.weight.data = torch.zeros(shape[0], shape[1], requires_grad=True)
+        except:
+            logger.info3("Last linear layer not founded, so init_bias=False")
 
     def get_logits(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor:
         """Forward-pass of model with embeddings."""