In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
import os # For checking file existence
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler # For target scaling

In [None]:
# Helper function to compute increments and cumulative times from proportions and total time
def calculate_times_from_proportions(proportions_per_step, total_time_for_sequence, mask_per_step):
    """
    Calculates time increments and cumulative times from step-wise proportions and a total time.
    """
    proportions_tf = tf.cast(proportions_per_step, tf.float32)
    total_time_tf = tf.cast(total_time_for_sequence, tf.float32)
    mask_tf = tf.cast(mask_per_step, tf.float32)

    if len(tf.shape(total_time_tf)) == 1:
        total_time_tf = tf.expand_dims(total_time_tf, axis=-1)

    masked_proportions = proportions_tf * mask_tf
    row_sums = tf.reduce_sum(masked_proportions, axis=1, keepdims=True)
    row_sums = tf.where(tf.equal(row_sums, 0), tf.ones_like(row_sums), row_sums) 
    normalized_proportions = masked_proportions / row_sums
    increments = normalized_proportions * total_time_tf 
    cumulative_times = tf.cumsum(increments, axis=1)
    
    increments *= mask_tf
    cumulative_times *= mask_tf
    normalized_proportions *= mask_tf

    return normalized_proportions, increments, cumulative_times

# %%
class TotalTimeLSTM(tf.keras.Model):
    """
    Enhanced LSTM model with a manual Bahdanau-style Attention to predict the total time.
    """
    def __init__(self, hidden_units=256, 
                 dense_units_1=128, 
                 dense_units_2=64, 
                 dropout_rate=0.4):
        super(TotalTimeLSTM, self).__init__()
        
        self.hidden_units = hidden_units
        self.dense_units_1 = dense_units_1
        self.dense_units_2 = dense_units_2
        self.dropout_rate = dropout_rate
        self.bilstm_output_dim = hidden_units * 2

        # --- Layers for sequential input ---
        self.bi_lstm_layer = layers.Bidirectional(
            layers.LSTM(self.hidden_units, return_sequences=True, dropout=self.dropout_rate, recurrent_dropout=0.25),
            name="bidirectional_lstm_v16"
        )
        
        # --- Manual Attention Layers ---
        self.W1 = layers.Dense(self.bilstm_output_dim, name="attention_dense_W1")
        self.W2 = layers.Dense(self.bilstm_output_dim, name="attention_dense_W2")
        self.V = layers.Dense(1, name="attention_dense_V")
        
        # --- Layers for combined features ---
        self.concat_layer = layers.Concatenate(name="concatenate_features_v16")
        self.dense_1 = layers.Dense(
            self.dense_units_1, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001), name="dense_1_v16"
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate, name="dropout_1_v16")
        self.dense_2 = layers.Dense(
            self.dense_units_2, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001), name="dense_2_v16"
        )
        self.dropout_2 = layers.Dropout(self.dropout_rate, name="dropout_2_v16")
        self.total_time_head = layers.Dense(1, activation='linear', name="total_time_dense_v16") 
    
    def call(self, inputs, training=False): 
        sequence_input, global_features_input = inputs 
        mask_bool_seq = tf.reduce_any(tf.not_equal(sequence_input, 0.0), axis=-1)
        
        lstm_output = self.bi_lstm_layer(sequence_input, mask=mask_bool_seq, training=training)
        
        query_summary = tf.reduce_mean(lstm_output, axis=1)
        query_with_time_axis = tf.expand_dims(query_summary, 1)

        score = self.V(tf.nn.tanh(self.W1(lstm_output) + self.W2(query_with_time_axis)))

        mask_for_scores = tf.expand_dims(tf.cast(mask_bool_seq, tf.float32), -1)
        masked_score = score - (1. - mask_for_scores) * 1e9 # Use subtraction for clarity
        
        attention_weights = tf.nn.softmax(masked_score, axis=1)

        context_vector = tf.reduce_sum(attention_weights * lstm_output, axis=1)
        
        combined_features = self.concat_layer([context_vector, global_features_input])
        
        x = self.dense_1(combined_features)
        x = self.dropout_1(x, training=training)
        x = self.dense_2(x)
        x = self.dropout_2(x, training=training)
        total_time_pred = self.total_time_head(x)
        
        return total_time_pred

# %%
def process_input_data_for_lstm(transformer_predictions_file):
    """
    Process the transformer predictions CSV to prepare data for LSTM training.
    Global features now exclude sequence length.
    """
    print(f"Processing data from: {transformer_predictions_file}")
    if not os.path.exists(transformer_predictions_file):
        raise FileNotFoundError(f"Transformer predictions file not found: {transformer_predictions_file}")
    df = pd.read_csv(transformer_predictions_file)
    
    required_cols = ['Predicted_Proportion', 'GroundTruth_Cumulative', 'GroundTruth_Increment', 'Sequence', 'Step']
    for col in required_cols:
        if col not in df.columns: raise ValueError(f"CSV must contain '{col}' column.")

    sequences = df['Sequence'].unique()
    
    X_sequential_data_list = []; X_global_features_list = []
    y_total_times_list = []; original_dfs_list = [] 
    transformer_proportions_list = []; ground_truth_increments_list = []
    ground_truth_cumulative_list = [] 

    for seq_id in sequences:
        seq_df = df[df['Sequence'] == seq_id].sort_values('Step').copy()
        if seq_df.empty:
            original_dfs_list.append(seq_df); continue
        original_dfs_list.append(seq_df) 

        actual_sequence_length = float(len(seq_df))
        props_for_seq = seq_df['Predicted_Proportion'].values
        
        sum_props = np.sum(props_for_seq)
        mean_props = np.mean(props_for_seq) if actual_sequence_length > 0 else 0.0
        std_props = np.std(props_for_seq) if actual_sequence_length > 1 else 0.0
        max_prop = np.max(props_for_seq) if actual_sequence_length > 0 else 0.0

        current_max_steps = seq_df['Step'].max()
        if current_max_steps == 0: current_max_steps = 1 
        
        sequential_features = np.column_stack([props_for_seq, seq_df['Step'].values / current_max_steps])
        X_sequential_data_list.append(sequential_features)
        
        # --- MODIFICATION: Removed actual_sequence_length from global features ---
        global_features_for_seq = np.array([
            sum_props,
            mean_props,
            std_props,
            max_prop
        ], dtype=np.float32)
        X_global_features_list.append(global_features_for_seq)
        
        gt_cumulative_for_seq = seq_df['GroundTruth_Cumulative'].values
        total_time_for_seq = np.max(gt_cumulative_for_seq) if len(gt_cumulative_for_seq) > 0 else 0.0
        y_total_times_list.append(total_time_for_seq)
        
        transformer_proportions_list.append(props_for_seq)
        ground_truth_increments_list.append(seq_df['GroundTruth_Increment'].values)
        ground_truth_cumulative_list.append(gt_cumulative_for_seq) 

    if not X_sequential_data_list: raise ValueError("No valid sequences processed.")

    y_total_times_array_unpadded = np.array(y_total_times_list)
    print(f"\nStatistics for TARGET y_total_times_list (max GT_Cumulative per seq, {len(y_total_times_array_unpadded)} sequences):")
    print(f"  Mean: {np.mean(y_total_times_array_unpadded):.4f}, Std Dev: {np.std(y_total_times_array_unpadded):.4f}")
    print(f"  Min: {np.min(y_total_times_array_unpadded):.4f}, Max: {np.max(y_total_times_array_unpadded):.4f}")
    print(f"  Number of zeros (<=1e-6): {np.sum(y_total_times_array_unpadded <= 1e-6)}\n")

    max_length_sequential = max(len(x) for x in X_sequential_data_list) if X_sequential_data_list else 0
    if max_length_sequential == 0: raise ValueError("Max length for sequential features is 0.")
    num_sequential_features = X_sequential_data_list[0].shape[1]
    num_global_features = X_global_features_list[0].shape[0]

    X_sequential_padded = np.zeros((len(X_sequential_data_list), max_length_sequential, num_sequential_features), dtype=np.float32)
    masks_padded_float = np.zeros((len(X_sequential_data_list), max_length_sequential), dtype=np.float32) 
    transformer_proportions_padded = np.zeros((len(X_sequential_data_list), max_length_sequential), dtype=np.float32)
    gt_increments_padded_original = np.zeros((len(X_sequential_data_list), max_length_sequential), dtype=np.float32)
    gt_cumulative_padded_original = np.zeros((len(X_sequential_data_list), max_length_sequential), dtype=np.float32)

    for i, seq_data in enumerate(X_sequential_data_list):
        seq_len = len(seq_data)
        if seq_len > 0:
            X_sequential_padded[i, :seq_len, :] = seq_data
            masks_padded_float[i, :seq_len] = 1.0 
            transformer_proportions_padded[i, :seq_len] = transformer_proportions_list[i]
            gt_increments_padded_original[i, :seq_len] = ground_truth_increments_list[i]
            gt_cumulative_padded_original[i, :seq_len] = ground_truth_cumulative_list[i]
        
    y_total_times_np = np.array(y_total_times_list, dtype=np.float32)
    X_global_features_np = np.array(X_global_features_list, dtype=np.float32)

    return {
        'X_sequential_input': X_sequential_padded, 
        'X_global_features_input': X_global_features_np, 
        'y_lstm_target_total_times': y_total_times_np,
        'masks_for_calc': masks_padded_float, 
        'sequences_ids': sequences, 
        'original_dfs': original_dfs_list, 
        'transformer_proportions_padded': transformer_proportions_padded, 
        'gt_increments_padded_original': gt_increments_padded_original,
        'gt_cumulative_padded_original': gt_cumulative_padded_original,
        'max_len_sequential': max_length_sequential,
        'num_sequential_features': num_sequential_features,
        'num_global_features': num_global_features
    }

# %%
def train_total_time_lstm(transformer_predictions_file, epochs=50, batch_size=32, val_split_ratio=0.2):
    """
    Train the enhanced LSTM model to predict total_time.
    """
    print("Processing data for LSTM training...")
    data_for_lstm = process_input_data_for_lstm(transformer_predictions_file)
    
    print(f"Num sequential features: {data_for_lstm['num_sequential_features']}, Max seq length: {data_for_lstm['max_len_sequential']}")
    print(f"Num global features: {data_for_lstm['num_global_features']}")

    lstm_model = TotalTimeLSTM(hidden_units=256, dense_units_1=128, dense_units_2=64, dropout_rate=0.4) 
    
    X_sequential_all = data_for_lstm['X_sequential_input']
    X_global_all = data_for_lstm['X_global_features_input']
    y_targets_all = data_for_lstm['y_lstm_target_total_times']
    
    if len(X_sequential_all) > 0:
        sample_seq_input_for_build = tf.convert_to_tensor(X_sequential_all[:1], dtype=tf.float32)
        sample_glob_input_for_build = tf.convert_to_tensor(X_global_all[:1], dtype=tf.float32)
        _ = lstm_model((sample_seq_input_for_build, sample_glob_input_for_build)) 
        print("\nEnhanced LSTM Model Summary (v16 - after sample call):")
        lstm_model.summary(expand_nested=True) 
    else: print("Warning: No data to build model with sample call.")

    lstm_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse' ) 
    
    if np.any(np.isnan(X_sequential_all)) or np.any(np.isinf(X_sequential_all)): print("CRITICAL WARNING: NaN/Inf in X_sequential_all.")
    if np.any(np.isnan(X_global_all)) or np.any(np.isinf(X_global_all)): print("CRITICAL WARNING: NaN/Inf in X_global_all.")
    if np.any(np.isnan(y_targets_all)) or np.any(np.isinf(y_targets_all)): print("CRITICAL WARNING: NaN/Inf in y_targets_all.")
    if len(y_targets_all) > 0 and np.all(np.abs(y_targets_all) <= 1e-6) : print("CRITICAL WARNING: All target total times are near zero.")

    target_scaler = StandardScaler()
    y_targets_all_reshaped = y_targets_all.reshape(-1, 1)
    global_feature_scaler = StandardScaler()
    indices = np.arange(len(X_sequential_all))

    if len(X_sequential_all) < 10: 
        print("Warning: Very few samples (<10), using all for training.")
        X_train_seq = X_sequential_all
        X_train_glob_scaled = global_feature_scaler.fit_transform(X_global_all)
        y_train_scaled = target_scaler.fit_transform(y_targets_all_reshaped)
        validation_data_for_fit = None
    else:
        train_indices, val_indices = train_test_split(indices, test_size=val_split_ratio, random_state=42, shuffle=True)
        X_train_seq = X_sequential_all[train_indices]; X_val_seq = X_sequential_all[val_indices]
        X_train_glob = X_global_all[train_indices]; X_val_glob = X_global_all[val_indices]
        X_train_glob_scaled = global_feature_scaler.fit_transform(X_train_glob) 
        X_val_glob_scaled = global_feature_scaler.transform(X_val_glob)     
        y_train_orig_reshaped = y_targets_all_reshaped[train_indices]; y_val_orig_reshaped = y_targets_all_reshaped[val_indices]
        y_train_scaled = target_scaler.fit_transform(y_train_orig_reshaped) 
        y_val_scaled = target_scaler.transform(y_val_orig_reshaped)         
        validation_data_for_fit = ([X_val_seq, X_val_glob_scaled], y_val_scaled) 
        print(f"\nManually split data: {len(X_train_seq)} train, {len(X_val_seq)} validation samples.")
        print(f"Training target stats (orig scale): Mean={np.mean(y_train_orig_reshaped):.2f}, Std={np.std(y_train_orig_reshaped):.2f}")
        print(f"Validation target stats (orig scale): Mean={np.mean(y_val_orig_reshaped):.2f}, Std={np.std(y_val_orig_reshaped):.2f}\n")

    callbacks_list = [
        EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True, verbose=1), 
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=15, min_lr=1e-7, verbose=1) 
    ]
    
    print("Starting LSTM model training (with scaled targets and global features)...")
    history = lstm_model.fit(
        [X_train_seq, X_train_glob_scaled], y_train_scaled,          
        epochs=epochs, batch_size=batch_size,
        validation_data=validation_data_for_fit, 
        callbacks=callbacks_list, verbose=1
    )
    
    print("LSTM training finished.")
    data_for_lstm['target_scaler'] = target_scaler
    data_for_lstm['global_feature_scaler'] = global_feature_scaler 
    return lstm_model, history, data_for_lstm

# %%
def generate_refined_predictions_with_lstm(lstm_model, processed_data):
    """
    Generate refined time predictions with a cleaned-up CSV output.
    Output CSV changed to _v16.
    """
    print("Generating refined predictions using LSTM's total time...")
    
    X_sequential_input_all = processed_data['X_sequential_input']
    X_global_features_input_all_unscaled = processed_data['X_global_features_input']
    
    global_feature_scaler = processed_data['global_feature_scaler']
    X_global_features_input_all_scaled = global_feature_scaler.transform(X_global_features_input_all_unscaled)

    lstm_predicted_scaled_total_times = lstm_model.predict(
        [X_sequential_input_all, X_global_features_input_all_scaled] 
    ) 
    
    target_scaler = processed_data['target_scaler']
    lstm_predicted_total_times_original_scale = target_scaler.inverse_transform(lstm_predicted_scaled_total_times)
    lstm_predicted_total_times_original_scale = np.squeeze(lstm_predicted_total_times_original_scale)
    lstm_predicted_total_times_original_scale = np.maximum(0, lstm_predicted_total_times_original_scale) 

    transformer_step_proportions = processed_data['transformer_proportions_padded'] 
    masks_for_calc = processed_data['masks_for_calc'] 

    _, lstm_refined_increments, lstm_refined_cumulative = calculate_times_from_proportions(
        transformer_step_proportions,
        lstm_predicted_total_times_original_scale, 
        masks_for_calc 
    )

    lstm_refined_increments_np = lstm_refined_increments.numpy()
    lstm_refined_cumulative_np = lstm_refined_cumulative.numpy()

    results_list_df = []
    original_dfs_from_processing = processed_data['original_dfs'] 
    gt_total_times_all = processed_data['y_lstm_target_total_times'] 

    for i, seq_id in enumerate(processed_data['sequences_ids']):
        if i >= len(original_dfs_from_processing): continue
        original_seq_df = original_dfs_from_processing[i]
        seq_len = len(original_seq_df)
        if seq_len == 0:
            if original_seq_df.empty: results_list_df.append(original_seq_df) 
            continue
        if i >= len(lstm_predicted_total_times_original_scale): continue

        source_ids = original_seq_df['SourceID'].values
        
        output_dict = {
            'Sequence': seq_id, 'Step': np.arange(1, seq_len + 1), 'SourceID': source_ids,
            'GroundTruth_Increment': processed_data['gt_increments_padded_original'][i, :seq_len],
            'GroundTruth_Cumulative': processed_data['gt_cumulative_padded_original'][i, :seq_len],
            'LSTM_Predicted_Increment': lstm_refined_increments_np[i, :seq_len],
            'LSTM_Predicted_Cumulative': lstm_refined_cumulative_np[i, :seq_len],
            'LSTM_Predicted_TotalTime': np.full(seq_len, np.nan, dtype=np.float32),
            'TotalTime_Difference': np.full(seq_len, np.nan, dtype=np.float32) 
        }
        
        predicted_total = lstm_predicted_total_times_original_scale[i]
        gt_total = gt_total_times_all[i]
        
        output_dict['LSTM_Predicted_TotalTime'][-1] = predicted_total
        output_dict['TotalTime_Difference'][-1] = gt_total - predicted_total 

        transformer_pred_increment = original_seq_df['Predicted_Increment'].fillna(0).values
        transformer_pred_cumulative = original_seq_df['Predicted_Cumulative'].fillna(0).values
        diff_transformer_inc = np.abs(output_dict['GroundTruth_Increment'] - transformer_pred_increment)
        diff_lstm_inc = np.abs(output_dict['GroundTruth_Increment'] - output_dict['LSTM_Predicted_Increment'])
        output_dict['Increment_Improvement_Pct'] = np.where(diff_transformer_inc > 1e-6, (diff_transformer_inc - diff_lstm_inc) / diff_transformer_inc * 100, 0 )
        diff_transformer_cum = np.abs(output_dict['GroundTruth_Cumulative'] - transformer_pred_cumulative)
        diff_lstm_cum = np.abs(output_dict['GroundTruth_Cumulative'] - output_dict['LSTM_Predicted_Cumulative'])
        output_dict['Cumulative_Improvement_Pct'] = np.where(diff_transformer_cum > 1e-6, (diff_transformer_cum - diff_lstm_cum) / diff_transformer_cum * 100, 0 )
        
        clean_seq_df = pd.DataFrame(output_dict)
        results_list_df.append(clean_seq_df)

    if not results_list_df: return pd.DataFrame()
    final_results_df = pd.concat(results_list_df, ignore_index=True)
    
    output_filename = 'predictions_lstm_refined_total_time_v16.csv' # Changed filename
    final_results_df.to_csv(output_filename, index=False)
    print(f"Combined and refined predictions saved to {output_filename}")
    
    if not final_results_df.empty:
        original_df_full = pd.concat(original_dfs_from_processing, ignore_index=True)
        merged_for_summary = pd.merge(final_results_df, original_df_full[['Sequence', 'Step', 'Predicted_Increment', 'Predicted_Cumulative']], on=['Sequence', 'Step'])
        if 'Predicted_Increment' in merged_for_summary.columns and 'LSTM_Predicted_Increment' in merged_for_summary.columns:
            mae_transformer = np.mean(np.abs(merged_for_summary['GroundTruth_Increment'] - merged_for_summary['Predicted_Increment']))
            mae_lstm = np.mean(np.abs(merged_for_summary['GroundTruth_Increment'] - merged_for_summary['LSTM_Predicted_Increment']))
            print(f"\nTransformer MAE (Increments): {mae_transformer:.4f}, LSTM-Refined MAE (Increments): {mae_lstm:.4f}")
            if mae_transformer > 1e-6: print(f"Improvement (Increments): {(mae_transformer - mae_lstm) / mae_transformer * 100:.2f}%")
        if 'Predicted_Cumulative' in merged_for_summary.columns and 'LSTM_Predicted_Cumulative' in merged_for_summary.columns:
            mae_transformer_cum = np.mean(np.abs(merged_for_summary['GroundTruth_Cumulative'] - merged_for_summary['Predicted_Cumulative']))
            mae_lstm_cum = np.mean(np.abs(merged_for_summary['GroundTruth_Cumulative'] - merged_for_summary['LSTM_Predicted_Cumulative']))
            print(f"Transformer MAE (Cumulative): {mae_transformer_cum:.4f}, LSTM-Refined MAE (Cumulative): {mae_lstm_cum:.4f}")
            if mae_transformer_cum > 1e-6: print(f"Improvement (Cumulative): {(mae_transformer_cum - mae_lstm_cum) / mae_transformer_cum * 100:.2f}%")

    gt_total_times_for_lstm_training = processed_data['y_lstm_target_total_times'] 
    if len(lstm_predicted_total_times_original_scale) == len(gt_total_times_for_lstm_training):
        mae_total_time_lstm = np.mean(np.abs(gt_total_times_for_lstm_training - lstm_predicted_total_times_original_scale))
        print(f"\nLSTM MAE for Total Time (vs Max GT Cumulative): {mae_total_time_lstm:.4f}")

    return final_results_df

# %%
def visualize_lstm_results(results_df, processed_data, lstm_model, training_history):
    if results_df.empty: print("Results DataFrame is empty, skipping visualizations."); return
    print("Generating visualizations for LSTM results...")
    plt.style.use('ggplot')
    if training_history and hasattr(training_history, 'history'):
        if 'loss' in training_history.history and 'val_loss' in training_history.history:
            plt.figure(figsize=(10, 5))
            plt.plot(training_history.history['loss'], label='Training Loss')
            plt.plot(training_history.history['val_loss'], label='Validation Loss')
            if 'lr' in training_history.history:
                ax2 = plt.gca().twinx(); ax2.plot(training_history.history['lr'], label='Learning Rate', color='g', linestyle='--')
                ax2.set_ylabel('Learning Rate'); ax2.legend(loc='upper center')
            plt.title('LSTM Model Loss (Predicting Scaled Total Time)') 
            plt.xlabel('Epoch'); plt.ylabel('Mean Squared Error (Scaled Loss)'); plt.legend(loc='upper left'); plt.tight_layout()
            plt.savefig('lstm_total_time_training_loss_v16.png'); print("Saved LSTM training loss plot."); plt.close() # v16
    else: print("Warning: Training history not available or malformed.")

    sample_sequence_ids = results_df['Sequence'].unique()
    if len(sample_sequence_ids) == 0 : print("No sequences in results_df for plotting."); return
    sample_sequence_ids = sample_sequence_ids[:min(5, len(sample_sequence_ids))]
    
    original_df_full = pd.concat(processed_data['original_dfs'], ignore_index=True)
    plot_df = pd.merge(results_df, original_df_full[['Sequence', 'Step', 'Predicted_Cumulative', 'Predicted_Increment']], on=['Sequence', 'Step'], how='left')
    
    if len(sample_sequence_ids) > 0:
        num_plots = len(sample_sequence_ids); fig_height = max(8, 3 * num_plots) 
        plt.figure(figsize=(15, fig_height)) # Cumulative Plot
        for i, seq_id in enumerate(sample_sequence_ids):
            seq_data_plot = plot_df[plot_df['Sequence'] == seq_id].sort_values('Step')
            if seq_data_plot.empty: continue
            plt.subplot(num_plots, 1, i + 1)
            plt.plot(seq_data_plot['Step'], seq_data_plot['GroundTruth_Cumulative'], 'o-', label='GT Cumul.', ms=4)
            if 'Predicted_Cumulative' in plot_df.columns and 'Predicted_Cumulative' in seq_data_plot.columns: plt.plot(seq_data_plot['Step'], seq_data_plot['Predicted_Cumulative'], 's--', label='Transformer Cumul.', ms=4)
            if 'LSTM_Predicted_Cumulative' in seq_data_plot.columns: plt.plot(seq_data_plot['Step'], seq_data_plot['LSTM_Predicted_Cumulative'], '^-.', label='LSTM-Refined Cumul.', ms=4)
            lstm_total_time_for_seq = seq_data_plot['LSTM_Predicted_TotalTime'].dropna().unique()
            if len(lstm_total_time_for_seq) == 1: plt.axhline(y=lstm_total_time_for_seq[0], color='purple', linestyle=':', label=f'LSTM Total Pred: {lstm_total_time_for_seq[0]:.2f}')
            plt.title(f'Cumulative Times: Seq {seq_id}'); plt.xlabel('Step'); plt.ylabel('Cumulative Time'); plt.legend()
        plt.tight_layout(); plt.savefig('lstm_refined_cumulative_time_comparison_v16.png'); print("Saved cumulative time comparison plot."); plt.close() # v16

        plt.figure(figsize=(15, fig_height)) # Increment Plot
        for i, seq_id in enumerate(sample_sequence_ids):
            seq_data_plot = plot_df[plot_df['Sequence'] == seq_id].sort_values('Step')
            if seq_data_plot.empty: continue
            plt.subplot(num_plots, 1, i + 1)
            plt.plot(seq_data_plot['Step'], seq_data_plot['GroundTruth_Increment'], 'o-', label='GT Incr.', ms=4)
            if 'Predicted_Increment' in plot_df.columns and 'Predicted_Increment' in seq_data_plot.columns: plt.plot(seq_data_plot['Step'], seq_data_plot['Predicted_Increment'], 's--', label='Transformer Incr.', ms=4)
            if 'LSTM_Predicted_Increment' in seq_data_plot.columns: plt.plot(seq_data_plot['Step'], seq_data_plot['LSTM_Predicted_Increment'], '^-.', label='LSTM-Refined Incr.', ms=4)
            plt.title(f'Time Increments: Seq {seq_id}'); plt.xlabel('Step'); plt.ylabel('Time Increment'); plt.legend()
        plt.tight_layout(); plt.savefig('lstm_refined_increment_comparison_v16.png'); print("Saved increment comparison plot."); plt.close() # v16

    gt_total_times_for_lstm_training = processed_data.get('y_lstm_target_total_times', np.array([])) 
    if lstm_model is not None and 'X_sequential_input' in processed_data and 'X_global_features_input' in processed_data and 'target_scaler' in processed_data:
        X_seq_tensor = tf.convert_to_tensor(processed_data['X_sequential_input'], dtype=tf.float32)
        X_glob_unscaled = processed_data['X_global_features_input']
        X_glob_scaled_for_plot = processed_data['global_feature_scaler'].transform(X_glob_unscaled)
        X_glob_tensor = tf.convert_to_tensor(X_glob_scaled_for_plot, dtype=tf.float32)
        
        lstm_pred_scaled_total_t = lstm_model.predict([X_seq_tensor, X_glob_tensor])
        lstm_pred_original_scale_total_t = processed_data['target_scaler'].inverse_transform(lstm_pred_scaled_total_t).squeeze()
        
        if lstm_pred_original_scale_total_t.ndim == 0: lstm_pred_original_scale_total_t = np.array([lstm_pred_original_scale_total_t])
            
        if gt_total_times_for_lstm_training.size > 0 and lstm_pred_original_scale_total_t.size > 0:
            plt.figure(figsize=(12, 6))
            plt.subplot(1, 2, 1); 
            plt.hist(gt_total_times_for_lstm_training, bins=30, alpha=0.7, label='GT Total Times (Max Cumul.)')
            plt.hist(lstm_pred_original_scale_total_t, bins=30, alpha=0.7, label='LSTM Pred Total Times (Original Scale)')
            plt.xlabel('Total Time'); plt.ylabel('Frequency'); plt.title('Distribution of Total Times'); plt.legend()
            plt.subplot(1, 2, 2); 
            if len(gt_total_times_for_lstm_training) == len(lstm_pred_original_scale_total_t):
                errors_total_time = gt_total_times_for_lstm_training - lstm_pred_original_scale_total_t
                plt.hist(errors_total_time, bins=30, alpha=0.7, color='red')
                plt.xlabel('Prediction Error (GT Max Cumul. - Pred)'); plt.ylabel('Frequency'); plt.title('LSTM Total Time Prediction Errors')
                if errors_total_time.size > 0: 
                    mean_error_val = errors_total_time.mean(); plt.axvline(mean_error_val, color='k', ls='--', lw=1, label=f'Mean Error: {mean_error_val:.2f}')
                plt.legend()
            else: print("Warning: Mismatch length GT total times and predictions for error histogram.")
            plt.tight_layout(); plt.savefig('lstm_total_time_prediction_analysis_v16.png'); print("Saved total time prediction analysis plot."); plt.close() # v16
        else: print("Warning: Not enough data for total time distribution plots.")
    else: print("Warning: LSTM model, input data, or scaler missing for total time prediction plot.")
    print("Visualizations for LSTM completed!")

# %%
def main_lstm_total_time_flow():
    try:
        transformer_predictions_file = "predictions_transformer_182625.csv" 
        if not os.path.exists(transformer_predictions_file):
            print(f"Error: Transformer predictions file not found: {transformer_predictions_file}")
            print("Creating DUMMY CSV for testing flow.")
            dummy_data = []
            for seq_idx in range(300): 
                num_steps = np.random.randint(10, 60) 
                steps = np.arange(1, num_steps + 1)
                gt_increments = np.random.lognormal(mean=2.0, sigma=0.7, size=num_steps) + 0.1 
                gt_increments = np.maximum(gt_increments, 0.01) 
                gt_cumulative = np.cumsum(gt_increments)
                
                raw_props = np.random.rand(num_steps) + 0.05 
                pred_proportions = raw_props / raw_props.sum() 
                
                actual_sequence_total_time = gt_cumulative[-1] if num_steps > 0 else 1.0
                actual_sequence_total_time = max(actual_sequence_total_time, 1.0) 

                dummy_transformer_effective_total_time = actual_sequence_total_time * np.random.normal(loc=1.0, scale=0.4) 
                dummy_transformer_effective_total_time = max(dummy_transformer_effective_total_time, 0.1)

                pred_increments_from_transformer = pred_proportions * dummy_transformer_effective_total_time
                pred_cumulative_from_transformer = np.cumsum(pred_increments_from_transformer)

                for s_idx in range(num_steps):
                    dummy_data.append({
                        'Sequence': seq_idx, 'Step': steps[s_idx], 'SourceID': f'MRI_DUMMY_{s_idx%5 +1}',
                        'Predicted_Proportion': pred_proportions[s_idx], 
                        'Predicted_Increment': pred_increments_from_transformer[s_idx],
                        'Predicted_Cumulative': pred_cumulative_from_transformer[s_idx], 
                        'GroundTruth_Increment': gt_increments[s_idx], 
                        'GroundTruth_Cumulative': gt_cumulative[s_idx]  })
            if not dummy_data: 
                 dummy_data.append({ 'Sequence': 0, 'Step': 1, 'SourceID': 'MRI_DUMMY_0', 'Predicted_Proportion': 1.0, 
                                     'Predicted_Increment': 10.0, 'Predicted_Cumulative': 10.0, 
                                     'GroundTruth_Increment': 10.0, 'GroundTruth_Cumulative': 10.0})
            dummy_df = pd.DataFrame(dummy_data)
            dummy_df.to_csv(transformer_predictions_file, index=False)
            print(f"Dummy '{transformer_predictions_file}' created with {len(dummy_df)} rows and {len(dummy_df['Sequence'].unique())} sequences.")
        
        lstm_model, lstm_history, processed_lstm_data = train_total_time_lstm(
            transformer_predictions_file, epochs=200, batch_size=32 ) 
        
        if lstm_model is None or processed_lstm_data is None:
            print("LSTM training failed or returned None. Exiting."); return

        refined_results_df = generate_refined_predictions_with_lstm(lstm_model, processed_lstm_data)
        if not refined_results_df.empty:
            print("\nSample of Refined Predictions (LSTM Total Time Approach - v16):")
            display_cols = [ 'Sequence', 'Step', 'SourceID', 
                             'LSTM_Predicted_Increment', 'GroundTruth_Increment', 
                             'LSTM_Predicted_Cumulative', 'GroundTruth_Cumulative',
                             'LSTM_Predicted_TotalTime', 'TotalTime_Difference', 'Increment_Improvement_Pct']
            actual_display_cols = [col for col in display_cols if col in refined_results_df.columns]
            print(refined_results_df[actual_display_cols].head(20))
            print("\nGenerating visualizations for LSTM (total time approach - v16)...")
            visualize_lstm_results(refined_results_df, processed_lstm_data, lstm_model, lstm_history)
        else: print("No refined predictions were generated by the LSTM flow.")
    except Exception as e:
        print(f"Error in LSTM (total time) main function: {e}"); import traceback; traceback.print_exc()


In [9]:
# %%
if __name__ == "__main__":
    main_lstm_total_time_flow()

Processing data for LSTM training...
Processing data from: predictions_transformer_182625.csv

Statistics for TARGET y_total_times_list (max GT_Cumulative per seq, 186 sequences):
  Mean: 374.8065, Std Dev: 348.4868
  Min: 0.0000, Max: 2900.0000
  Number of zeros (<=1e-6): 5

Num sequential features: 2, Max seq length: 42
Num global features: 5

Enhanced LSTM Model Summary (v15 - after sample call):



Manually split data: 148 train, 38 validation samples.
Training target stats (orig scale): Mean=390.04, Std=362.56
Validation target stats (orig scale): Mean=315.47, Std=279.35

Starting LSTM model training (with scaled targets and global features)...
Epoch 1/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 229ms/step - loss: 1.3462 - val_loss: 0.8809 - learning_rate: 0.0010
Epoch 2/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - loss: 1.6358 - val_loss: 0.7999 - learning_rate: 0.0010
Epoch 3/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - loss: 1.1352 - val_loss: 0.7418 - learning_rate: 0.0010
Epoch 4/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 110ms/step - loss: 1.0317 - val_loss: 0.7055 - learning_rate: 0.0010
Epoch 5/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 109ms/step - loss: 1.0657 - val_loss: 0.6854 - learning_rate: 0.0010
Epoch 6/200
[1m5/5[0m [32m━━━━━

  output_dict['Increment_Improvement_Pct'] = np.where(diff_transformer_inc > 1e-6, (diff_transformer_inc - diff_lstm_inc) / diff_transformer_inc * 100, 0 )
  output_dict['Increment_Improvement_Pct'] = np.where(diff_transformer_inc > 1e-6, (diff_transformer_inc - diff_lstm_inc) / diff_transformer_inc * 100, 0 )
  output_dict['Cumulative_Improvement_Pct'] = np.where(diff_transformer_cum > 1e-6, (diff_transformer_cum - diff_lstm_cum) / diff_transformer_cum * 100, 0 )


Combined and refined predictions saved to predictions_lstm_refined_total_time_v15.csv

Transformer MAE (Increments): 46.1613, LSTM-Refined MAE (Increments): 46.6444
Improvement (Increments): -1.05%
Transformer MAE (Cumulative): 62.9617, LSTM-Refined MAE (Cumulative): 143.8719
Improvement (Cumulative): -128.51%

LSTM MAE for Total Time (vs Max GT Cumulative): 188.7979

Sample of Refined Predictions (LSTM Total Time Approach - v15):
    Sequence  Step      SourceID  LSTM_Predicted_Increment  \
0          0     1   MRI_MSR_104                  1.692836   
1          0     2     MRI_FRR_2                 25.392542   
2          0     3   MRI_FRR_257                  8.464181   
3          0     4   MRI_FRR_264                 18.621197   
4          0     5   MRI_FRR_264                 28.778217   
5          0     6    MRI_CCS_11                 49.092247   
6          0     7    MRI_CCS_11                  1.692836   
7          0     8   MRI_FRR_257                 42.320904   
8      