Skip to content

Commit da79bb9

Browse files
committed
fix(KDP): added fixes for the distribution estimator and tests
1 parent 27dff94 commit da79bb9

File tree

11 files changed

+812
-360
lines changed

11 files changed

+812
-360
lines changed

.ruff.toml

Whitespace-only changes.

docs/distribution_aware_encoder.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ encoder = DistributionAwareEncoder(
123123
|-----------|------|---------|-------------|
124124
| num_bins | int | 1000 | Number of bins for quantile encoding |
125125
| epsilon | float | 1e-6 | Small value for numerical stability |
126-
| detect_periodicity | bool | True | Enable periodic pattern detection |
126+
| detect_periodicity | bool | True | Enable periodic pattern detection | Remove this parameter when having multimodal functions/distributions
127127
| handle_sparsity | bool | True | Enable special handling for sparse data |
128128
| adaptive_binning | bool | True | Enable adaptive bin boundaries |
129129
| mixture_components | int | 3 | Number of components for mixture models |

kdp/custom_layers.py

Lines changed: 299 additions & 82 deletions
Large diffs are not rendered by default.

kdp/features.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ def __init__(
5353
**kwargs: Additional keyword arguments for the feature.
5454
"""
5555
self.name = name
56-
self.feature_type = FeatureType.from_string(feature_type) if isinstance(feature_type, str) else feature_type
56+
self.feature_type = (
57+
FeatureType.from_string(feature_type)
58+
if isinstance(feature_type, str)
59+
else feature_type
60+
)
5761
self.preprocessors = preprocessors or []
5862
self.kwargs = kwargs
5963

@@ -93,7 +97,9 @@ def from_string(type_str: str) -> "FeatureType":
9397
class NumericalFeature(Feature):
9498
"""NumericalFeature with dynamic kwargs passing."""
9599

96-
def __init__(self, name: str, feature_type: FeatureType = FeatureType.FLOAT, **kwargs) -> None:
100+
def __init__(
101+
self, name: str, feature_type: FeatureType = FeatureType.FLOAT, **kwargs
102+
) -> None:
97103
"""Initializes a NumericalFeature instance.
98104
99105
Args:
@@ -126,7 +132,9 @@ def __init__(
126132
"""
127133
super().__init__(name, feature_type, **kwargs)
128134
self.category_encoding = category_encoding
129-
self.dtype = tf.int32 if feature_type == FeatureType.INTEGER_CATEGORICAL else tf.string
135+
self.dtype = (
136+
tf.int32 if feature_type == FeatureType.INTEGER_CATEGORICAL else tf.string
137+
)
130138
self.kwargs = kwargs
131139

132140
def _embedding_size_rule(self, nr_categories: int) -> int:
@@ -144,7 +152,9 @@ def _embedding_size_rule(self, nr_categories: int) -> int:
144152
class TextFeature(Feature):
145153
"""TextFeature with dynamic kwargs passing."""
146154

147-
def __init__(self, name: str, feature_type: FeatureType = FeatureType.TEXT, **kwargs) -> None:
155+
def __init__(
156+
self, name: str, feature_type: FeatureType = FeatureType.TEXT, **kwargs
157+
) -> None:
148158
"""Initializes a TextFeature instance.
149159
150160
Args:
@@ -160,7 +170,9 @@ def __init__(self, name: str, feature_type: FeatureType = FeatureType.TEXT, **kw
160170
class DateFeature(Feature):
161171
"""TextFeature with dynamic kwargs passing."""
162172

163-
def __init__(self, name: str, feature_type: FeatureType = FeatureType.DATE, **kwargs) -> None:
173+
def __init__(
174+
self, name: str, feature_type: FeatureType = FeatureType.DATE, **kwargs
175+
) -> None:
164176
"""Initializes a DateFeature instance.
165177
166178
Args:

kdp/layers_factory.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818

1919
class PreprocessorLayerFactory:
2020
@staticmethod
21-
def create_layer(layer_class: str | object, name: str = None, **kwargs) -> tf.keras.layers.Layer:
21+
def create_layer(
22+
layer_class: str | object, name: str = None, **kwargs
23+
) -> tf.keras.layers.Layer:
2224
"""Create a layer using the layer class name, automatically filtering kwargs based on the layer class.
2325
2426
Args:
@@ -39,7 +41,9 @@ def create_layer(layer_class: str | object, name: str = None, **kwargs) -> tf.ke
3941
constructor_params = inspect.signature(layer_class.__init__).parameters
4042

4143
# Filter kwargs to only include those that the constructor can accept
42-
filtered_kwargs = {key: value for key, value in kwargs.items() if key in constructor_params}
44+
filtered_kwargs = {
45+
key: value for key, value in kwargs.items() if key in constructor_params
46+
}
4347

4448
# Add the 'name' argument if provided else default the class name lowercase option
4549
if name:
@@ -86,7 +90,9 @@ def distribution_aware_encoder(
8690
)
8791

8892
@staticmethod
89-
def text_preprocessing_layer(name: str = "text_preprocessing", **kwargs: dict) -> tf.keras.layers.Layer:
93+
def text_preprocessing_layer(
94+
name: str = "text_preprocessing", **kwargs: dict
95+
) -> tf.keras.layers.Layer:
9096
"""Create a TextPreprocessingLayer layer.
9197
9298
Args:
@@ -103,7 +109,9 @@ def text_preprocessing_layer(name: str = "text_preprocessing", **kwargs: dict) -
103109
)
104110

105111
@staticmethod
106-
def cast_to_float32_layer(name: str = "cast_to_float32", **kwargs: dict) -> tf.keras.layers.Layer:
112+
def cast_to_float32_layer(
113+
name: str = "cast_to_float32", **kwargs: dict
114+
) -> tf.keras.layers.Layer:
107115
"""Create a CastToFloat32Layer layer.
108116
109117
Args:
@@ -120,7 +128,9 @@ def cast_to_float32_layer(name: str = "cast_to_float32", **kwargs: dict) -> tf.k
120128
)
121129

122130
@staticmethod
123-
def date_parsing_layer(name: str = "date_parsing_layer", **kwargs: dict) -> tf.keras.layers.Layer:
131+
def date_parsing_layer(
132+
name: str = "date_parsing_layer", **kwargs: dict
133+
) -> tf.keras.layers.Layer:
124134
"""Create a DateParsingLayer layer.
125135
126136
Args:
@@ -137,7 +147,9 @@ def date_parsing_layer(name: str = "date_parsing_layer", **kwargs: dict) -> tf.k
137147
)
138148

139149
@staticmethod
140-
def date_encoding_layer(name: str = "date_encoding_layer", **kwargs: dict) -> tf.keras.layers.Layer:
150+
def date_encoding_layer(
151+
name: str = "date_encoding_layer", **kwargs: dict
152+
) -> tf.keras.layers.Layer:
141153
"""Create a DateEncodingLayer layer.
142154
143155
Args:
@@ -154,7 +166,9 @@ def date_encoding_layer(name: str = "date_encoding_layer", **kwargs: dict) -> tf
154166
)
155167

156168
@staticmethod
157-
def date_season_layer(name: str = "date_season_layer", **kwargs: dict) -> tf.keras.layers.Layer:
169+
def date_season_layer(
170+
name: str = "date_season_layer", **kwargs: dict
171+
) -> tf.keras.layers.Layer:
158172
"""Create a SeasonLayer layer.
159173
160174
Args:
@@ -171,7 +185,9 @@ def date_season_layer(name: str = "date_season_layer", **kwargs: dict) -> tf.ker
171185
)
172186

173187
@staticmethod
174-
def transformer_block_layer(name: str = "transformer", **kwargs: dict) -> tf.keras.layers.Layer:
188+
def transformer_block_layer(
189+
name: str = "transformer", **kwargs: dict
190+
) -> tf.keras.layers.Layer:
175191
"""Create a TransformerBlock layer.
176192
177193
Args:
@@ -241,7 +257,9 @@ def multi_resolution_attention_layer(
241257
)
242258

243259
@staticmethod
244-
def variable_selection_layer(name: str = "variable_selection", **kwargs: dict) -> tf.keras.layers.Layer:
260+
def variable_selection_layer(
261+
name: str = "variable_selection", **kwargs: dict
262+
) -> tf.keras.layers.Layer:
245263
"""Create a VariableSelection layer.
246264
247265
Args:

kdp/pipeline.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88

99
class ProcessingStep:
10-
def __init__(self, layer_creator: Callable[..., tf.keras.layers.Layer], **layer_kwargs) -> None:
10+
def __init__(
11+
self, layer_creator: Callable[..., tf.keras.layers.Layer], **layer_kwargs
12+
) -> None:
1113
"""Initialize a processing step.
1214
1315
Args:
@@ -94,7 +96,9 @@ def __init__(self, name: str) -> None:
9496
self.name = name
9597
self.pipeline = Pipeline(name=name)
9698

97-
def add_processing_step(self, layer_creator: Callable[..., tf.keras.layers.Layer] = None, **layer_kwargs) -> None:
99+
def add_processing_step(
100+
self, layer_creator: Callable[..., tf.keras.layers.Layer] = None, **layer_kwargs
101+
) -> None:
98102
"""Add a processing step to the feature preprocessor.
99103
100104
Args:

0 commit comments

Comments
 (0)