fix(KDP): Added get_feature_importances() method and fixed the docs.

Gandalfdore · Gandalfdore · commit 664023f2f5fe · 2025-01-30T15:26:30.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -166,6 +166,4 @@ kdp/data/fake_data.csv
 my_tests/*
 
 # derivative files
-data.csv
-sample_data.csv
-stats.json
+*.csv
diff --git a/complex_model.png b/complex_model.png
diff --git a/docs/complex_example.md b/docs/complex_example.md
@@ -130,15 +130,16 @@ Now if one wants to plot the a block diagram of the model or get the outout of t
 # Plot the model architecture
 ppr.plot_model("complex_model.png")
 
-# Get predictions with an example test batch from the example data
-processed_data = ppr.transform(test_batch)  # this returns a dict with "transformed_data" and "feature_weights"
-print("Output shape:", processed_data["transformed_data"].shape)
-
-# Analyze feature importance if feature selection is enabled
-if "feature_weights" in processed_data:
-    for feature_name in features:
-        weights = processed_data[f"{feature_name}_weights"]
-        print(f"Feature {feature_name} importance: {weights.mean()}")
+# Transform data using direct model prediction
+transformed_data = ppr.model.predict(test_batch)
+
+# Transform data using batch_predict
+transformed_data = ppr.batch_predict(test_batch)
+transformed_batches = list(transformed_data)  # For better visualization
+
+# Get feature importances
+feature_importances = ppr.get_feature_importances()
+print("Feature importances:", feature_importances)
 ```
 
 
diff --git a/kdp/processor.py b/kdp/processor.py
@@ -1459,16 +1459,23 @@ def _convert_to_dataset(self, data: tf.data.Dataset | pd.DataFrame | dict) -> tf
         else:
             raise ValueError("Input data must be a DataFrame, dict, or TensorFlow Dataset")
 
-    def _extract_feature_weights(self) -> dict[str, np.ndarray]:
-        """Extract feature importance weights from feature selection layers.
+    def get_feature_importances(self) -> dict[str, float]:
+        """Get feature importance scores from feature selection layers.
 
         Returns:
-            dict[str, np.ndarray]: Dictionary mapping feature names to their importance weights.
+            dict[str, float]: Dictionary mapping feature names to their importance scores,
+                             where scores are averaged across all dimensions.
         """
-        weights = {}
+        feature_importances = {}
+
         for layer in self.model.layers:
             if "feature_selection" in layer.name:
                 layer_weights = layer.get_weights()
                 for i, feature_name in enumerate(self.features_specs.keys()):
-                    weights[f"{feature_name}_weights"] = layer_weights[0][:, i]
-        return weights
+                    weights = layer_weights[0][:, i]
+                    feature_importances[feature_name] = float(np.mean(weights))
+
+        if not feature_importances:
+            logger.warning("No feature selection layers found in the model")
+
+        return feature_importances