UnicoLab
diff --git a/‎docs/features/time_series_features.md‎
Lines changed: 234 additions & 100 deletions b/‎docs/features/time_series_features.md‎
Lines changed: 234 additions & 100 deletions
diff --git a/‎examples/custom_preprocessing_example.py‎
Lines changed: 5 additions & 3 deletions b/‎examples/custom_preprocessing_example.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎examples/dynamic_pipeline_examples.py‎
Lines changed: 15 additions & 13 deletions b/‎examples/dynamic_pipeline_examples.py‎
Lines changed: 15 additions & 13 deletions
diff --git a/‎examples/time_series_features_example.py‎
Lines changed: 181 additions & 0 deletions b/‎examples/time_series_features_example.py‎
Lines changed: 181 additions & 0 deletions
diff --git a/‎ideas.md‎
Lines changed: 41 additions & 0 deletions b/‎ideas.md‎
Lines changed: 41 additions & 0 deletions
@@ -4,19 +4,21 @@
 This example demonstrates how to define and use custom preprocessing pipelines
 for various feature types in the KDP framework.
 """
+# ruff: noqa: E402
 
 import os
 import sys
+
+# Add the project root to the Python path to allow module imports
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 import numpy as np
 import pandas as pd
 import logging
 import tensorflow as tf
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error
 
-# Add the project root to the Python path to allow module imports
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
 from kdp.processor import PreprocessingModel
 from kdp.features import (
     NumericalFeature,
 
@@ -4,6 +4,7 @@
 This script demonstrates how to use the DynamicPreprocessingPipeline to create
 a flexible pipeline of preprocessing layers, with customizable transformations.
 """
+# ruff: noqa: E402
 
 import numpy as np
 import tensorflow as tf
@@ -25,6 +26,7 @@
 np.random.seed(42)
 tf.random.set_seed(42)
 
+
 # Example 1: Basic Custom Layers
 class ScalingLayer(tf.keras.layers.Layer):
     """Custom layer to scale numeric input by a factor."""
@@ -294,54 +296,54 @@ def data_generator():
 def example_5_normalize_transform():
     """Create a pipeline that normalizes data and then applies a log transform."""
     print("\n=== Example 5: Normalize and Transform Pipeline ===")
-    
+
     # Generate random data - lognormal distribution (right-skewed)
     data = np.random.lognormal(mean=0, sigma=1, size=(1000, 1)).astype(np.float32)
-    
+
     # Create a normalization layer
     normalize_layer = tf.keras.layers.Normalization(name="normalize")
     normalize_layer.adapt(data)
-    
+
     # Create a log transform layer using our factory
     log_transform = PreprocessorLayerFactory.distribution_transform_layer(
         transform_type="log", name="log_transform"
     )
-    
+
     # Create our pipeline with both layers
     pipeline = DynamicPreprocessingPipeline([normalize_layer, log_transform])
-    
+
     # Create a dataset
     dataset = tf.data.Dataset.from_tensor_slices({"normalize": data}).batch(32)
-    
+
     # Process the data
     processed_data = pipeline.process(dataset)
-    
+
     # Examine the results
     for batch in processed_data.take(1):
         original_mean = np.mean(data)
         transformed_mean = batch["log_transform"].numpy().mean()
-        
+
         print(f"Original data mean: {original_mean:.4f}")
         print(f"Transformed data mean: {transformed_mean:.4f}")
-        
+
         # Visualize the transformation
         plt.figure(figsize=(12, 5))
-        
+
         plt.subplot(1, 2, 1)
         plt.hist(data, bins=50, alpha=0.7)
         plt.title("Original Data Distribution")
         plt.xlabel("Value")
         plt.ylabel("Frequency")
-        
+
         plt.subplot(1, 2, 2)
         plt.hist(batch["log_transform"].numpy(), bins=50, alpha=0.7)
         plt.title("Normalized + Log Transformed Data")
         plt.xlabel("Value")
         plt.ylabel("Frequency")
-        
+
         plt.tight_layout()
         plt.show()
-    
+
     return pipeline
 
 
 
@@ -0,0 +1,181 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Example of using the new time series feature layers in keras-data-processor.
+
+This example demonstrates how to use the WaveletTransformLayer and TSFreshFeatureLayer
+for extracting features from time series data.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, Dense, Concatenate
+
+from kdp.layers.time_series import (
+    WaveletTransformLayer,
+    TSFreshFeatureLayer,
+    LagFeatureLayer,
+)
+
+
+def generate_sample_data(n_samples=1000, n_features=1):
+    """Generate a sample time series dataset."""
+    np.random.seed(42)
+
+    # Time steps
+    t = np.linspace(0, 10 * np.pi, n_samples)
+
+    # Base sine wave with increasing frequency
+    base_signal = np.sin(t * (1 + t / (10 * np.pi)))
+
+    # Add trends and seasonality for complexity
+    trend = 0.3 * t / (10 * np.pi)
+    seasonality = 0.5 * np.sin(0.5 * t)
+
+    # Create signal with noise
+    signal = base_signal + trend + seasonality + np.random.normal(0, 0.2, n_samples)
+
+    # Normalize
+    signal = (signal - np.mean(signal)) / np.std(signal)
+
+    # For multiple features, create variations
+    if n_features > 1:
+        signals = [signal]
+        for i in range(1, n_features):
+            # Create different variations with phase shifts and scaling
+            variation = np.sin(t * (1 + t / (10 * np.pi) + i * 0.2)) + trend * (
+                1.0 + 0.1 * i
+            )
+            variation = (variation - np.mean(variation)) / np.std(variation)
+            signals.append(variation)
+        signal = np.column_stack(signals)
+
+    # Create test/train split
+    train_size = int(0.8 * n_samples)
+    X_train = signal[:train_size]
+    X_test = signal[train_size:]
+
+    # Create target variable (for regression task)
+    # We'll predict the next value in the series
+    y_train = (
+        signal[1 : train_size + 1, 0] if n_features > 1 else signal[1 : train_size + 1]
+    )
+    y_test = signal[train_size + 1 :, 0] if n_features > 1 else signal[train_size + 1 :]
+
+    return X_train, y_train, X_test, y_test
+
+
+def build_model_with_feature_layers(input_shape):
+    """Build a model that uses various time series feature layers."""
+    inputs = Input(shape=input_shape)
+
+    # 1. Extract wavelet transform features
+    wavelet_features = WaveletTransformLayer(
+        levels=3, window_sizes=[4, 8, 16], flatten_output=True
+    )(inputs)
+
+    # 2. Extract statistical features using TSFreshFeatureLayer
+    tsfresh_features = TSFreshFeatureLayer(
+        features=["mean", "std", "min", "max", "median", "skewness", "kurtosis"],
+        normalize=True,
+    )(inputs)
+
+    # 3. Extract lag features for temporal patterns
+    lag_features = LagFeatureLayer(
+        lag_indices=[1, 2, 3, 5, 7, 14, 21],
+        drop_na=False,  # We'll get zeros for missing values
+    )(inputs)
+
+    # Combine all features
+    combined_features = Concatenate()(
+        [wavelet_features, tsfresh_features, lag_features]
+    )
+
+    # Dense layers for prediction
+    x = Dense(64, activation="relu")(combined_features)
+    x = Dense(32, activation="relu")(x)
+    outputs = Dense(1)(x)
+
+    model = Model(inputs=inputs, outputs=outputs)
+    model.compile(optimizer="adam", loss="mse", metrics=["mae"])
+
+    return model
+
+
+def main():
+    """Run the example."""
+    # Generate sample data
+    X_train, y_train, X_test, y_test = generate_sample_data(
+        n_samples=1000, n_features=2
+    )
+
+    print(f"X_train shape: {X_train.shape}")
+    print(f"y_train shape: {y_train.shape}")
+
+    # Reshape for the model (add batch dimension if not already present)
+    if len(X_train.shape) == 1:
+        X_train = X_train.reshape(-1, 1)
+        X_test = X_test.reshape(-1, 1)
+
+    # Build model
+    model = build_model_with_feature_layers(input_shape=(X_train.shape[1],))
+
+    # Print model summary
+    model.summary()
+
+    # Train model
+    history = model.fit(
+        X_train,
+        y_train,
+        validation_data=(X_test, y_test),
+        epochs=50,
+        batch_size=32,
+        verbose=1,
+    )
+
+    # Plot training history
+    plt.figure(figsize=(12, 4))
+
+    plt.subplot(1, 2, 1)
+    plt.plot(history.history["loss"])
+    plt.plot(history.history["val_loss"])
+    plt.title("Model loss")
+    plt.ylabel("Loss (MSE)")
+    plt.xlabel("Epoch")
+    plt.legend(["Train", "Validation"], loc="upper right")
+
+    plt.subplot(1, 2, 2)
+    plt.plot(history.history["mae"])
+    plt.plot(history.history["val_mae"])
+    plt.title("Model MAE")
+    plt.ylabel("MAE")
+    plt.xlabel("Epoch")
+    plt.legend(["Train", "Validation"], loc="upper right")
+
+    plt.tight_layout()
+    plt.savefig("time_series_features_training.png")
+    print("Training plot saved as 'time_series_features_training.png'")
+
+    # Evaluate on test set
+    test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
+    print(f"Test Loss (MSE): {test_loss:.4f}")
+    print(f"Test MAE: {test_mae:.4f}")
+
+    # Make predictions and plot
+    predictions = model.predict(X_test)
+
+    plt.figure(figsize=(12, 6))
+    plt.plot(y_test, label="Actual")
+    plt.plot(predictions, label="Predicted")
+    plt.title("Time Series Prediction with Feature Layers")
+    plt.xlabel("Time Step")
+    plt.ylabel("Value")
+    plt.legend()
+    plt.savefig("time_series_features_prediction.png")
+    print("Prediction plot saved as 'time_series_features_prediction.png'")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,41 @@
+For Timeseries Features:
+
+Based on your current implementation, I can suggest several advanced features to enhance your time series preprocessing capabilities:
+
+Automatic Time Series Decomposition
+Implement seasonal-trend decomposition (STL) to separate time series into trend, seasonal, and residual components
+This would allow models to learn from each component separately, improving performance on seasonal data
+
+Dynamic Feature Generation
+Add configurable lag feature windows that automatically determine optimal lag values based on autocorrelation analysis
+Implement change point detection to identify regime shifts in time series data
+
+Advanced Signal Processing Features
+Fast Fourier Transform (FFT) layers to extract frequency domain features
+Wavelet transforms for multi-resolution analysis of time series data
+Spectral analysis features to capture cyclical patterns
+
+Improved Missing Value Handling
+Add specialized interpolation methods for time series (cubic spline, LOCF, etc.)
+Implement masking mechanism to handle irregular time series with missing timestamps
+
+Time-Aware Attention Mechanisms
+Implement temporal attention layers that focus on relevant time steps
+Create a positional encoding layer specifically for time series to encode temporal distance
+
+Multi-Scale Processing
+Implement automatic resampling at multiple time scales (hourly, daily, weekly)
+Create hierarchical time series preprocessors that handle different granularities
+
+
+Enhanced Seasonality Handling
+Add calendar feature generation (holidays, day of week, etc.)
+Implement multiple seasonal period detection and encoding
+
+Causal Inference Features
+Add Granger causality testing as a preprocessing step
+Implement transfer entropy calculations for multivariate time series
+
+Temporal Feature Extraction
+Add automatic feature extraction using tsfresh-inspired statistical features
+Implement shapelets detection for pattern recognition