fix(KDP): changed the order of the transormers and the tabularAttention applications

Gandalfdore · Gandalfdore · commit d38782687295 · 2025-01-27T15:30:43.000+02:00
diff --git a/kdp/processor.py b/kdp/processor.py
@@ -880,45 +880,6 @@ def _prepare_outputs(self) -> None:
             else:
                 raise ValueError("No features available for concatenation")
 
-            # Add transformer blocks if specified
-            if self.transfo_nr_blocks:
-                if self.transfo_placement == TransformerBlockPlacementOptions.CATEGORICAL and concat_cat is not None:
-                    logger.info(f"Adding transformer blocks to categorical features: #{self.transfo_nr_blocks}")
-                    transformed = concat_cat
-                    for block_idx in range(self.transfo_nr_blocks):
-                        transformed = PreprocessorLayerFactory.transformer_block_layer(
-                            dim_model=transformed.shape[-1],
-                            num_heads=self.transfo_nr_heads,
-                            ff_units=self.transfo_ff_units,
-                            dropout_rate=self.transfo_dropout_rate,
-                            name=f"transformer_block_{block_idx}_{self.transfo_nr_heads}heads",
-                        )(transformed)
-                    # Reshape transformer output to remove the extra dimension
-                    transformed = tf.keras.layers.Reshape(
-                        target_shape=(-1,),  # Flatten to match numeric shape
-                        name="reshape_transformer_output",
-                    )(transformed)
-
-                    # Recombine with numeric features if they exist
-                    if concat_num is not None:
-                        self.concat_all = tf.keras.layers.Concatenate(
-                            name="ConcatenateTransformed",
-                            axis=-1,
-                        )([concat_num, transformed])
-                    else:
-                        self.concat_all = transformed
-
-                elif self.transfo_placement == TransformerBlockPlacementOptions.ALL_FEATURES:
-                    logger.info(f"Adding transformer blocks to all features: #{self.transfo_nr_blocks}")
-                    for block_idx in range(self.transfo_nr_blocks):
-                        self.concat_all = PreprocessorLayerFactory.transformer_block_layer(
-                            dim_model=self.concat_all.shape[-1],
-                            num_heads=self.transfo_nr_heads,
-                            ff_units=self.transfo_ff_units,
-                            dropout_rate=self.transfo_dropout_rate,
-                            name=f"transformer_block_{block_idx}_{self.transfo_nr_heads}heads",
-                        )(self.concat_all)
-
             # Add tabular attention if specified
             if self.tabular_attention:
                 if self.tabular_attention_placement == TabularAttentionPlacementOptions.MULTI_RESOLUTION:
@@ -1047,6 +1008,45 @@ def _prepare_outputs(self) -> None:
                         else:
                             self.concat_all = concat_cat
 
+            # Add transformer blocks if specified
+            if self.transfo_nr_blocks:
+                if self.transfo_placement == TransformerBlockPlacementOptions.CATEGORICAL and concat_cat is not None:
+                    logger.info(f"Adding transformer blocks to categorical features: #{self.transfo_nr_blocks}")
+                    transformed = concat_cat
+                    for block_idx in range(self.transfo_nr_blocks):
+                        transformed = PreprocessorLayerFactory.transformer_block_layer(
+                            dim_model=transformed.shape[-1],
+                            num_heads=self.transfo_nr_heads,
+                            ff_units=self.transfo_ff_units,
+                            dropout_rate=self.transfo_dropout_rate,
+                            name=f"transformer_block_{block_idx}_{self.transfo_nr_heads}heads",
+                        )(transformed)
+                    # Reshape transformer output to remove the extra dimension
+                    transformed = tf.keras.layers.Reshape(
+                        target_shape=(-1,),  # Flatten to match numeric shape
+                        name="reshape_transformer_output",
+                    )(transformed)
+
+                    # Recombine with numeric features if they exist
+                    if concat_num is not None:
+                        self.concat_all = tf.keras.layers.Concatenate(
+                            name="ConcatenateTransformed",
+                            axis=-1,
+                        )([concat_num, transformed])
+                    else:
+                        self.concat_all = transformed
+
+                elif self.transfo_placement == TransformerBlockPlacementOptions.ALL_FEATURES:
+                    logger.info(f"Adding transformer blocks to all features: #{self.transfo_nr_blocks}")
+                    for block_idx in range(self.transfo_nr_blocks):
+                        self.concat_all = PreprocessorLayerFactory.transformer_block_layer(
+                            dim_model=self.concat_all.shape[-1],
+                            num_heads=self.transfo_nr_heads,
+                            ff_units=self.transfo_ff_units,
+                            dropout_rate=self.transfo_dropout_rate,
+                            name=f"transformer_block_{block_idx}_{self.transfo_nr_heads}heads",
+                        )(self.concat_all)
+
             logger.info("Concatenating outputs mode enabled")
         else:
             # Dictionary mode
diff --git a/test/test_processor.py b/test/test_processor.py
@@ -1004,6 +1004,7 @@ def test_preprocessor_all_features_with_transformer_and_attention(self):
             overwrite_stats=True,
             output_mode=OutputModeOptions.CONCAT,
             tabular_attention=True,
+            tabular_attention_placement="all_features",
             tabular_attention_heads=4,
             tabular_attention_dim=64,
             transfo_nr_blocks=2,
@@ -1025,7 +1026,7 @@ def test_preprocessor_all_features_with_transformer_and_attention(self):
 
         # Check output dimensions
         self.assertEqual(len(preprocessed.shape), 2)  # (batch_size, d_model)
-        self.assertEqual(preprocessed.shape[-1], 64)  # Example dimension
+        self.assertEqual(preprocessed.shape[-1], 1 + 4 + 12)  # The dimensions for num1, cat1, date1
 
     def test_preprocessor_all_features_with_transformer_and_attention_v2(self):
         """Test all feature types with both transformer and attention."""
@@ -1050,6 +1051,7 @@ def test_preprocessor_all_features_with_transformer_and_attention_v2(self):
             features_stats_path=self.features_stats_path,
             overwrite_stats=True,
             output_mode=OutputModeOptions.CONCAT,
+            tabular_attention_placement="all_features",
             tabular_attention=True,
             tabular_attention_heads=4,
             tabular_attention_dim=64,
@@ -1072,7 +1074,7 @@ def test_preprocessor_all_features_with_transformer_and_attention_v2(self):
 
         # Check output dimensions
         self.assertEqual(len(preprocessed.shape), 2)  # (batch_size, d_model)
-        self.assertEqual(preprocessed.shape[-1], 64)  # Example dimension
+        self.assertEqual(preprocessed.shape[-1], 65)  # Example dimension
 
     def test_preprocessor_all_features_with_transformer_and_attention_v3(self):
         """Test all feature types with both transformer and attention."""
@@ -1122,7 +1124,7 @@ def test_preprocessor_all_features_with_transformer_and_attention_v3(self):
         self.assertIsNotNone(preprocessed)
 
         # Check output dimensions
-        self.assertEqual(len(preprocessed.shape), 2)  # (batch_size, d_model)
+        self.assertEqual(len(preprocessed.shape), 3)  # (batch_size, d_model)
         self.assertEqual(preprocessed.shape[-1], 23)  # Example dimension
 
     def test_preprocessor_all_features_with_transformer_and_attention_v4(self):
@@ -1288,7 +1290,7 @@ def test_preprocessor_parameter_combinations(self):
             },
             {
                 "tabular_attention": True,
-                "tabular_attention_placement": "categorical",
+                "tabular_attention_placement": "all_features",
                 "tabular_attention_heads": 2,
                 "tabular_attention_dim": 32,
                 "tabular_attention_dropout": 0.1,
@@ -1363,17 +1365,13 @@ def test_preprocessor_parameter_combinations(self):
 
                 if test_case["output_mode"] == OutputModeOptions.CONCAT:
                     if test_case["tabular_attention"] == True:
-                        # Check output dimensions for concatenated output
-                        self.assertEqual(len(preprocessed.shape), 2)  # (batch_size, d_model)
-                        self.assertEqual(
-                            preprocessed.shape[-1], test_case["tabular_attention_dim"]
-                        )  # Example dimension
+                        # Check output dimensions for concatenated output with attention
+                        self.assertEqual(len(preprocessed.shape), 3)  # (batch_size, d_model)
+                        self.assertEqual(preprocessed.shape[-1], test_case["tabular_attention_dim"])
                     else:
-                        # Check output dimensions for concatenated output
+                        # Check output dimensions for concatenated output without attention
                         self.assertEqual(len(preprocessed.shape), 2)  # (batch_size, d_model)
-                        self.assertEqual(
-                            preprocessed.shape[-1], 65
-                        )  # The dimension of these features at the end should be 65
+                        self.assertEqual(preprocessed.shape[-1], 65)  # Base feature dimension
                 else:
                     # Check output dimensions for dictionary output
                     for key, tensor in preprocessed.items():