In [94]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import StandardScaler

In [95]:
def load_and_validate_data(transformer_predictions_file):
    """
    Load and validate transformer predictions CSV file
    
    Args:
        transformer_predictions_file (str): Path to CSV file
    
    Returns:
        pandas.DataFrame: Validated dataframe
    """
    try:
        df = pd.read_csv(transformer_predictions_file)
        
        # Basic validation checks
        required_columns = ['Sequence', 'Step', 'Predicted_Proportion', 
                            'GroundTruth_Increment', 'GroundTruth_Cumulative']
        
        for col in required_columns:
            if col not in df.columns:
                raise ValueError(f"Missing required column: {col}")
        
        return df
    
    except Exception as e:
        print(f"Data loading error: {e}")
        raise

In [96]:
def bin_proportions(proportions, bin_size=0.05):
    """
    Bin proportions and normalize
    
    Args:
        proportions (array-like): Original proportion values
        bin_size (float): Bin resolution
    
    Returns:
        numpy.ndarray: Binned and normalized proportions
    """
    binned_props = np.round(proportions / bin_size) * bin_size
    total = np.sum(binned_props)
    
    if abs(total - 1.0) > 0.1:
        binned_props = binned_props / total
    
    return binned_props

In [97]:
def process_transformer_predictions(transformer_predictions_file):
    """
    Process transformer predictions for LSTM model
    
    Args:
        transformer_predictions_file (str): Path to CSV file
    
    Returns:
        dict: Processed data for LSTM training
    """
    # Load data
    df = load_and_validate_data(transformer_predictions_file)
    
    # Extract unique sequences
    sequences = df['Sequence'].unique()
    
    # Initialize lists for data storage
    X_data = []        # Input features
    y_increments = []  # Ground truth increments
    y_cumulative = []  # Ground truth cumulative times
    masks = []         # Masks for valid data
    
    # Feature scaling
    scaler = StandardScaler()
    
    # Process each sequence
    for seq in sequences:
        seq_data = df[df['Sequence'] == seq].sort_values('Step')
        
        # Bin proportions
        binned_proportions = bin_proportions(seq_data['Predicted_Proportion'].values)
        
        # Feature extraction with scaling
        features = np.column_stack([
            scaler.fit_transform(binned_proportions.reshape(-1, 1)).flatten(),
            scaler.fit_transform(seq_data['GroundTruth_Increment'].values.reshape(-1, 1)).flatten(),
            scaler.fit_transform(seq_data['GroundTruth_Cumulative'].values.reshape(-1, 1)).flatten(),
            seq_data['Step'].values / max(seq_data['Step'].values)
        ])
        
        # Ground truth values
        increments = seq_data['GroundTruth_Increment'].values
        cumulative = seq_data['GroundTruth_Cumulative'].values
        
        # Add to lists
        X_data.append(features)
        y_increments.append(increments)
        y_cumulative.append(cumulative)
        masks.append(np.ones(len(features)))
    
    # Pad sequences to uniform length
    max_length = max(len(x) for x in X_data)
    
    X_padded = np.zeros((len(X_data), max_length, X_data[0].shape[1]))
    y_increments_padded = np.zeros((len(y_increments), max_length))
    y_cumulative_padded = np.zeros((len(y_cumulative), max_length))
    masks_padded = np.zeros((len(masks), max_length))
    
    for i in range(len(X_data)):
        seq_len = len(X_data[i])
        X_padded[i, :seq_len, :] = X_data[i]
        y_increments_padded[i, :seq_len] = y_increments[i]
        y_cumulative_padded[i, :seq_len] = y_cumulative[i]
        masks_padded[i, :seq_len] = masks[i]
    
    # Extract total times
    y_total_times = np.array([seq[-1] if len(seq) > 0 else 0 for seq in y_cumulative])
    
    return {
        'X': X_padded,
        'y_increments': y_increments_padded,
        'y_cumulative': y_cumulative_padded,
        'y_total_times': y_total_times,
        'masks': masks_padded,
        'sequences': sequences,
        'df': df
    }

In [98]:
def custom_masked_huber_loss(mask, delta=1.0):
    """
    Custom masked Huber loss function
    
    Args:
        mask: Tensor mask for valid tokens
        delta: Huber loss threshold
    
    Returns:
        Callable loss function
    """
    def masked_huber(y_true, y_pred):
        mask_float = tf.cast(mask, tf.float32)
        current_batch_size = tf.shape(y_true)[0]
        mask_batch = mask_float[:current_batch_size]
        
        # Compute Huber loss
        error = y_true - y_pred
        is_small_error = tf.abs(error) <= delta
        squared_loss = 0.5 * tf.square(error)
        linear_loss = delta * (tf.abs(error) - 0.5 * delta)
        
        huber_loss = tf.where(is_small_error, squared_loss, linear_loss)
        masked_loss = huber_loss * mask_batch
        
        valid_tokens = tf.reduce_sum(mask_batch, axis=1, keepdims=True)
        valid_tokens = tf.where(valid_tokens == 0, tf.ones_like(valid_tokens), valid_tokens)
        
        return tf.reduce_mean(tf.reduce_sum(masked_loss, axis=1) / valid_tokens)
    
    return masked_huber


In [99]:
class ImprovedTimeDiffLSTM(tf.keras.Model):
    def __init__(self, hidden_units=64, dropout_rate=0.3):
        super(ImprovedTimeDiffLSTM, self).__init__()
        
        # Input normalization
        self.input_normalization = layers.LayerNormalization()
        
        # LSTM layers with regularization and residual connections
        self.lstm_layer1 = layers.LSTM(
            hidden_units, 
            return_sequences=True, 
            dropout=dropout_rate,
            recurrent_dropout=dropout_rate,
            kernel_regularizer=tf.keras.regularizers.l2(0.001)
        )
        
        self.lstm_layer2 = layers.LSTM(
            hidden_units // 2, 
            return_sequences=True,
            dropout=dropout_rate,
            recurrent_dropout=dropout_rate
        )
        
        # Multi-head attention
        self.attention = layers.MultiHeadAttention(
            num_heads=4, 
            key_dim=hidden_units,
            dropout=dropout_rate
        )
        
        # Output layers with non-negativity constraints
        self.time_diff_head = layers.Dense(1, activation='relu')
        self.total_time_head = layers.Dense(1, activation='softplus')
        
        # Residual connection dense layer
        self.residual_dense = layers.Dense(hidden_units, activation='relu')
    
    def call(self, inputs):
        # Normalize inputs
        x = self.input_normalization(inputs)
        
        # First LSTM layer with residual connection
        lstm_out1 = self.lstm_layer1(x)
        residual = self.residual_dense(x)
        lstm_out1 += residual
        
        # Second LSTM layer
        lstm_out2 = self.lstm_layer2(lstm_out1)
        
        # Attention mechanism
        attn_out = self.attention(lstm_out2, lstm_out2, lstm_out2)
        
        # Time difference prediction
        time_diffs = self.time_diff_head(attn_out)
        time_diffs = tf.squeeze(time_diffs, axis=-1)
        
        # Total time prediction
        sequence_encoding = tf.reduce_mean(attn_out, axis=1)
        total_time = self.total_time_head(sequence_encoding)
        
        # Cumulative times calculation
        cumulative_times = tf.cumsum(time_diffs, axis=1)
        
        return time_diffs, total_time, cumulative_times

In [100]:
def generate_lstm_predictions_from_transformer_csv(lstm_model, data):
    """
    Generate predictions using the LSTM model trained on transformer predictions
    """
    # Get LSTM predictions
    time_diffs_pred, total_time_pred, cumulative_times_pred = lstm_model(data['X'])
    
    # Convert to numpy for further processing
    time_diffs_pred = time_diffs_pred.numpy()
    total_time_pred = total_time_pred.numpy()
    cumulative_times_pred = cumulative_times_pred.numpy()
    
    # Create results dataframe with both transformer and LSTM predictions
    results_list = []
    
    # Original dataframe with transformer predictions
    original_df = data['df']
    
    # Process each sequence
    for seq_idx, seq in enumerate(data['sequences']):
        seq_data = original_df[original_df['Sequence'] == seq].sort_values('Step')
        
        # Get number of steps in this sequence
        seq_steps = len(seq_data)
        
        # Add LSTM predictions
        seq_data = seq_data.copy()
        seq_data['LSTM_Predicted_TimeDiff'] = time_diffs_pred[seq_idx, :seq_steps]
        seq_data['LSTM_Predicted_Cumulative'] = cumulative_times_pred[seq_idx, :seq_steps]
        seq_data['LSTM_Predicted_TotalTime'] = total_time_pred[seq_idx, 0]
        
        # Calculate improvement metrics
        seq_data['TimeDiff_Improvement_Pct'] = (
            (abs(seq_data['GroundTruth_Increment'] - seq_data['Predicted_Increment']) - 
             abs(seq_data['GroundTruth_Increment'] - seq_data['LSTM_Predicted_TimeDiff'])) / 
             abs(seq_data['GroundTruth_Increment'] - seq_data['Predicted_Increment']) * 100
        ).fillna(0)
        
        seq_data['Cumulative_Improvement_Pct'] = (
            (abs(seq_data['GroundTruth_Cumulative'] - seq_data['Predicted_Cumulative']) - 
             abs(seq_data['GroundTruth_Cumulative'] - seq_data['LSTM_Predicted_Cumulative'])) / 
             abs(seq_data['GroundTruth_Cumulative'] - seq_data['Predicted_Cumulative']) * 100
        ).fillna(0)
        
        results_list.append(seq_data)
    
    # Combine all results
    results_df = pd.concat(results_list, ignore_index=True)
    results_df.to_csv('predictions_lstm_refined.csv', index=False)
    print("Combined predictions saved to predictions_lstm_refined.csv")
    
    # Calculate overall improvement statistics
    transformer_time_diff_mae = np.mean(abs(results_df['GroundTruth_Increment'] - results_df['Predicted_Increment']))
    lstm_time_diff_mae = np.mean(abs(results_df['GroundTruth_Increment'] - results_df['LSTM_Predicted_TimeDiff']))
    
    transformer_cumulative_mae = np.mean(abs(results_df['GroundTruth_Cumulative'] - results_df['Predicted_Cumulative']))
    lstm_cumulative_mae = np.mean(abs(results_df['GroundTruth_Cumulative'] - results_df['LSTM_Predicted_Cumulative']))
    
    print("\nModel Performance Comparison (Mean Absolute Error)")
    print(f"Time Differences: Transformer {transformer_time_diff_mae:.4f}, LSTM {lstm_time_diff_mae:.4f}, " 
          f"Improvement: {(1 - lstm_time_diff_mae/transformer_time_diff_mae)*100:.2f}%")
    
    print(f"Cumulative Times: Transformer {transformer_cumulative_mae:.4f}, LSTM {lstm_cumulative_mae:.4f}, "
          f"Improvement: {(1 - lstm_cumulative_mae/transformer_cumulative_mae)*100:.2f}%")
    
    return results_df

In [101]:
def train_improved_lstm(transformer_predictions_file):
    """
    Train the improved LSTM model
    
    Args:
        transformer_predictions_file (str): Path to transformer predictions CSV
    
    Returns:
        tuple: Trained model, training history, processed data
    """
    # Process data
    data = process_transformer_predictions(transformer_predictions_file)
    
    # Create improved model
    lstm_model = ImprovedTimeDiffLSTM()
    
    # Robust Huber loss function
    masked_huber_loss = custom_masked_huber_loss(data['masks'])
    
    # Advanced optimizer with learning rate decay
    optimizer = tf.keras.optimizers.AdamW(
        learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=0.001,
            decay_steps=100,
            decay_rate=0.96
        ),
        weight_decay=0.001
    )
    
    # Compile model
    lstm_model.compile(
        optimizer=optimizer,
        loss=[masked_huber_loss, 'mse', masked_huber_loss],
        loss_weights=[0.4, 0.3, 0.3]
    )
    
    # Advanced callbacks
    callbacks = [
        EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.5, 
            patience=5
        )
    ]
    
    # Train with advanced strategy
    history = lstm_model.fit(
        data['X'],
        [data['y_increments'], data['y_total_times'], data['y_cumulative']],
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    return lstm_model, history, data

In [102]:
def main():
    """
    Main function to run the LSTM model using transformer predictions and visualize results
    
    Enhanced with robust error handling and NaN filtering
    """
    try:
        transformer_predictions_file = "predictions_transformer.csv"
        
        # Train LSTM model using transformer predictions
        print("Training LSTM model using transformer predictions...")
        lstm_model, history, data = train_lstm_from_transformer_predictions(
            transformer_predictions_file, epochs=50, batch_size=32
        )
        
        # Generate and save predictions
        print("Generating LSTM predictions...")
        results_df = generate_lstm_predictions_from_transformer_csv(lstm_model, data)
        
        # Display sample results
        print("\nSample Combined Predictions:")
        display_cols = ['Sequence', 'Step', 'SourceID', 'Predicted_Increment', 
                         'LSTM_Predicted_TimeDiff', 'GroundTruth_Increment', 
                         'TimeDiff_Improvement_Pct']
        print(results_df[display_cols].head(10))
        
        # Generate visualizations
        print("\nGenerating visualizations...")
        visualize_results(results_df, data, lstm_model)
        print("Visualization completed. Check the generated PNG files.")
        
        # Plot training history
        import matplotlib.pyplot as plt
        import numpy as np
        
        plt.figure(figsize=(12, 5))
        
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss During Training')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        # Robust error calculation and filtering
        def filter_valid_errors(predictions, ground_truth):
            """
            Filter out NaN and infinite values from predictions and ground truth
            
            Args:
                predictions (np.ndarray): Model predictions
                ground_truth (np.ndarray): Ground truth values
            
            Returns:
                np.ndarray: Mean absolute errors for valid predictions
            """
            # Ensure predictions and ground truth are numpy arrays
            predictions = np.asarray(predictions)
            ground_truth = np.asarray(ground_truth)
            
            # Calculate absolute errors
            abs_errors = np.abs(predictions - ground_truth)
            
            # Filter out NaN and infinite values
            valid_mask = np.isfinite(abs_errors)
            filtered_errors = abs_errors[valid_mask]
            
            return filtered_errors
        
        # Predict on training and validation data
        try:
            # Ensure only using training portion of the data
            train_size = int(0.8 * len(data['X']))
            
            # Predict and calculate errors for training data
            train_pred = lstm_model.predict(data['X'][:train_size])
            train_errors = filter_valid_errors(train_pred[0], data['y_increments'][:train_size])
            
            # Predict and calculate errors for validation data
            val_pred = lstm_model.predict(data['X'][train_size:])
            val_errors = filter_valid_errors(val_pred[0], data['y_increments'][train_size:])
            
            # Plot error distribution
            plt.subplot(1, 2, 2)
            
            # Only plot if we have valid errors
            if len(train_errors) > 0 and len(val_errors) > 0:
                plt.hist(train_errors, bins=30, alpha=0.5, label='Training Errors')
                plt.hist(val_errors, bins=30, alpha=0.5, label='Validation Errors')
                plt.title('Error Distribution')
                plt.xlabel('Mean Absolute Error')
                plt.ylabel('Frequency')
                plt.legend()
            else:
                plt.text(0.5, 0.5, 'No valid errors to display', 
                         horizontalalignment='center', 
                         verticalalignment='center')
                plt.title('Error Distribution')
        
        except Exception as pred_error:
            print(f"Error in prediction or error calculation: {pred_error}")
            plt.subplot(1, 2, 2)
            plt.text(0.5, 0.5, f'Error in prediction: {pred_error}', 
                     horizontalalignment='center', 
                     verticalalignment='center')
            plt.title('Error Distribution')
        
        plt.tight_layout()
        plt.savefig('training_performance.png')
        print("Saved training performance visualization")
        
        return lstm_model, results_df, history, data
        
    except Exception as e:
        print(f"Error in main function: {e}")
        import traceback
        traceback.print_exc()

In [103]:
def train_improved_lstm(transformer_predictions_file):
    """
    Train the improved LSTM model with a more flexible learning rate approach
    
    Args:
        transformer_predictions_file (str): Path to transformer predictions CSV
    
    Returns:
        tuple: Trained model, training history, processed data
    """
    # Enhanced data processing
    data = process_transformer_predictions(transformer_predictions_file)
    
    # Create improved model
    lstm_model = ImprovedTimeDiffLSTM()
    
    # Robust loss functions
    masked_huber_loss = custom_masked_huber_loss(data['masks'])
    
    # Create a learning rate schedule
    learning_rate_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.001,  # Starting learning rate
        decay_steps=100,               # Decay every 100 steps
        decay_rate=0.96                # Reduce to 96% of previous rate
    )
    
    # Advanced optimizer with dynamic learning rate
    optimizer = tf.keras.optimizers.AdamW(
        learning_rate=learning_rate_schedule,  # Use the schedule directly
        weight_decay=0.001
    )
    
    # Compile model with robust loss functions
    lstm_model.compile(
        optimizer=optimizer,
        loss=[masked_huber_loss, 'mse', masked_huber_loss],
        loss_weights=[0.4, 0.3, 0.3]
    )
    
    # Advanced callbacks
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.5, 
            patience=5
        )
    ]
    
    # Train with advanced strategy
    history = lstm_model.fit(
        data['X'],
        [data['y_increments'], data['y_total_times'], data['y_cumulative']],
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    return lstm_model, history, data

In [104]:
def main():
    """
    Main function to train and demonstrate the LSTM model
    """
    try:
        # Replace with your actual transformer predictions file path
        transformer_predictions_file = "predictions_transformer.csv"
        
        # Train improved LSTM model
        lstm_model, history, data = train_improved_lstm(transformer_predictions_file)
        
        # Optional: Add visualization or further analysis here
        print("Model training completed successfully!")
        
        return lstm_model, history, data
    
    except Exception as e:
        print(f"Error in main function: {e}")
        import traceback
        traceback.print_exc()

In [105]:
if __name__ == "__main__":
    main()

  binned_props = binned_props / total
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 153ms/step - loss: nan - masked_huber_loss: nan - mse_loss: nan - val_loss: nan - val_masked_huber_loss: nan - val_mse_loss: nan - learning_rate: 9.9796e-04
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: nan - masked_huber_loss: nan - mse_loss: nan - val_loss: nan - val_masked_huber_loss: nan - val_mse_loss: nan - learning_rate: 9.9593e-04
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: nan - masked_huber_loss: nan - mse_loss: nan - val_loss: nan - val_masked_huber_loss: nan - val_mse_loss: nan - learning_rate: 9.9390e-04
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: nan - masked_huber_loss: nan - mse_loss: nan - val_loss: nan - val_masked_huber_loss: nan - val_mse_loss: nan - learning_rate: 9.9187e-04
Epoch 5/100
[1m4/5[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 20ms/

Traceback (most recent call last):
  File "C:\Users\lukis\AppData\Local\Temp\ipykernel_37224\830203811.py", line 10, in main
    lstm_model, history, data = train_improved_lstm(transformer_predictions_file)
  File "C:\Users\lukis\AppData\Local\Temp\ipykernel_37224\2435613863.py", line 55, in train_improved_lstm
    history = lstm_model.fit(
  File "C:\Users\lukis\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\keras\src\utils\traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\lukis\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\keras\src\optimizers\base_optimizer.py", line 597, in learning_rate
    raise TypeError(
TypeError: This optimizer was created with a `LearningRateSchedule` object as its `learning_rate` constructor argument, hence its learning rate is not settable.