In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import pandas as pd
import numpy as np
import os
import copy
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error # For evaluation

# torch-pruning
import torch_pruning as tp

# Type Hinting (Optional but good practice)
from typing import Tuple, List, Dict, Union, Optional

 ### Data Loading and Preprocessing Functions (Using your provided code)

In [3]:
# --- Functions from your provided code ---

column_names = ['unit_number', 'time_in_cycles'] + [f'op_setting_{i}' for i in range(1, 4)] + [f'sensor_{i}' for i in range(1, 24)]

def load_dataframe(file_path: str) -> pd.DataFrame | None:
    """Loads a single CMaps data file."""
    try:
        df = pd.read_csv(file_path, sep=' ', header=None, names=column_names)
        # Drop the last two columns if they are all NaNs (often artifacts of space delimiter)
        df.dropna(axis=1, how='all', inplace=True)
        return df
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

def clean_data(df: pd.DataFrame) -> list:
    """Identifies columns to remove based on low std dev."""
    if df is None:
        return []
    # Columns with std dev < 0.02 (potential candidates for removal)
    # Avoid removing unit_number or time_in_cycles here.
    cols_to_check = [col for col in df.columns if 'sensor' in col or 'op_setting' in col]
    low_std_cols = [col for col in cols_to_check if df[col].std() < 0.02]
    print(f"Columns with std < 0.02 (potential removal): {low_std_cols}")
    # You might decide which ones to actually remove based on domain knowledge or experiment
    # For this example, let's remove them as identified.
    return low_std_cols

def add_rul(df: pd.DataFrame) -> pd.DataFrame | None:
    """Calculates and adds the Remaining Useful Life (RUL) column."""
    if df is None:
        return None
    max_cycles = df.groupby('unit_number')['time_in_cycles'].max().reset_index()
    max_cycles.columns = ['unit_number', 'max_cycle']
    df = df.merge(max_cycles, on='unit_number', how='left')
    df['RUL'] = df['max_cycle'] - df['time_in_cycles']
    df.drop(columns=['max_cycle'], inplace=True)
    # Clip RUL (optional, common practice to limit max RUL)
    df['RUL'] = df['RUL'].clip(upper=125)
    return df

def normalize_data(df: pd.DataFrame,
                   columns_to_normalize: List[str], scaler: MinMaxScaler = None) -> Tuple[pd.DataFrame, MinMaxScaler] | Tuple[None, None]:
    """Normalizes specified columns using MinMaxScaler."""
    if df is None:
        return None, None
    data_to_scale = df[columns_to_normalize]
    if scaler is None:
        scaler = MinMaxScaler()
        df[columns_to_normalize] = scaler.fit_transform(data_to_scale)
    else:
        # Ensure only columns present in the scaler are transformed
        valid_cols = [col for col in columns_to_normalize if col in scaler.feature_names_in_]
        if len(valid_cols) < len(columns_to_normalize):
            print("Warning: Some columns not found in the provided scaler. Skipping them.")
        if valid_cols: # Check if there's anything to transform
             df[valid_cols] = scaler.transform(df[valid_cols])

    return df, scaler

# --- Data Preparation Main Function ---
def prepare_cmapss_data(data_dir: str, train_file: str, test_file: str, test_rul_file: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, MinMaxScaler, List[str]]:
    """Loads, cleans, preprocesses train/test data and RUL."""
    print("--- Preparing Training Data ---")
    train_df = load_dataframe(os.path.join(data_dir, train_file))
    train_df = add_rul(train_df)

    print("\n--- Preparing Test Data ---")
    test_df = load_dataframe(os.path.join(data_dir, test_file))
    test_rul_df = pd.read_csv(os.path.join(data_dir, test_rul_file), header=None, names=['RUL'])
    # Adjust RUL based on test_rul_df and clipping if needed
    # Test RUL is usually the RUL at the *end* of the test sequence
    # We'll use this test_rul_df directly later for evaluation targets

    # Clean Data - identify columns based on TRAINING data variance
    cols_to_remove = clean_data(train_df)
    feature_cols = [col for col in train_df.columns if col not in ['unit_number', 'time_in_cycles', 'RUL'] + cols_to_remove]
    print(f"\nUsing Features: {feature_cols}")

    # Drop removed columns from both train and test
    train_df.drop(columns=cols_to_remove, inplace=True, errors='ignore')
    test_df.drop(columns=cols_to_remove, inplace=True, errors='ignore')


    # Normalize features based on TRAINING data
    print("\n--- Normalizing Data ---")
    train_df_norm, scaler = normalize_data(train_df.copy(), feature_cols, scaler=None)
    # Use the same scaler for test data
    test_df_norm, _ = normalize_data(test_df.copy(), feature_cols, scaler=scaler)

    return train_df_norm, test_df_norm, test_rul_df, scaler, feature_cols

### Define MLP Model and Dataset Class


In [4]:
# --- MLP Model Definition (Using your provided class) ---
class MLPmodel(nn.Module):
    def __init__(self, layer_units: list, input_size: int, output_size: int = 1, dropout_rate: float = 0.2): # Default dropout 0.2
        super(MLPmodel, self).__init__()
        self.layers = nn.ModuleList()
        self.dropouts = nn.ModuleList()
        self.model_type = 'MLP'

        current_size = input_size
        # Add hidden layers
        for units in layer_units:
            self.layers.append(nn.Linear(current_size, units))
            # Add BatchNorm before Dropout potentially
            # self.layers.append(nn.BatchNorm1d(units)) # Optional
            self.dropouts.append(nn.Dropout(dropout_rate))
            current_size = units

        # Add the output layer (no dropout or explicit activation for regression)
        self.layers.append(nn.Linear(current_size, output_size))

        self.init_weights()

    def init_weights(self) -> None:
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_uniform_(layer.weight, nonlinearity='relu') # Kaiming often good default
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Input tensor shape: (batch_size, num_features)
        for i in range(len(self.layers) - 1): # Iterate through hidden layers
            x = self.layers[i](x)
            x = nn.ReLU()(x)
            x = self.dropouts[i](x)

        # Output layer (no activation/dropout after last linear layer for regression)
        x = self.layers[-1](x)
        return x

# --- PyTorch Dataset for CMaps MLP ---
class CMAPSS_MLP_Dataset(Dataset):
    def __init__(self, features: np.ndarray, targets: np.ndarray):
        if features.ndim == 3: # Handle potential sequence input by flattening or taking last step
             print("Warning: Input features seem sequential. Taking last step for MLP.")
             # This assumes LSTM-prepared data; better to prepare MLP data correctly upstream
             features = features[:, -1, :]
        if targets.ndim > 1 and targets.shape[1] > 1:
             print("Warning: Targets have more than one dimension. Squeezing.")
             targets = targets.squeeze()


        self.features = torch.tensor(features, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32).unsqueeze(1) # Ensure target shape is [N, 1]

    def __len__(self) -> int:
        return len(self.features)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        return self.features[idx], self.targets[idx]

### Training and Evaluation Functions (Adapted for Regression)

In [5]:
# --- Utility Functions ---
def save_model_state(model, path):
    """Saves model state dictionary."""
    os.makedirs(os.path.dirname(path), exist_ok=True)
    torch.save(model.state_dict(), path)
    print(f"Model state saved to {path}")

def load_model_state(model, path, device):
    """Loads model state dictionary."""
    model.load_state_dict(torch.load(path, map_location=device))
    print(f"Model state loaded from {path}")
    return model

def calculate_flops_params(model, example_input):
    """Calculates FLOPs and Parameters using torch-pruning."""
    # Note: For MLPs, FLOPs are roughly 2*params per linear layer pass (MACs).
    # count_ops_and_params focuses on conv/linear typically.
    flops, params = tp.utils.count_ops_and_params(model, example_input)
    return flops, params

# --- Evaluation Function (RMSE) ---
def evaluate_model_rmse(model: nn.Module, data_loader: DataLoader, device: torch.device, example_input: torch.Tensor) -> Dict[str, float]:
    """Evaluates the regression model using RMSE and calculates FLOPs/Params."""
    model.eval()
    all_predictions = []
    all_targets = []
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.to(device)
            predictions = model(features)
            all_predictions.append(predictions.cpu().numpy())
            all_targets.append(targets.numpy()) # Targets are already [N, 1]

    # Concatenate results from all batches
    all_predictions = np.concatenate(all_predictions).squeeze()
    all_targets = np.concatenate(all_targets).squeeze()

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(all_targets, all_predictions))
    print(f"Evaluation RMSE: {rmse:.4f}")

    # Calculate FLOPs and Params
    flops, params = calculate_flops_params(model, example_input.to(device))
    size_mb = params * 4 / 1e6 # Approximation

    return {
        'rmse': rmse,
        'flops': flops,
        'params': params,
        'size_mb': size_mb
    }


# --- Training Function (Modified from your ResNet version for MLP/Regression) ---
def train_mlp_model(model: nn.Module, train_loader: DataLoader, val_loader: DataLoader,
                 criterion: nn.Module, optimizer: torch.optim.Optimizer, scheduler: Optional[torch.optim.lr_scheduler._LRScheduler],
                 device: torch.device, num_epochs: int, patience: int = 10,
                 model_save_path: str = "temp_best_model.pth") -> nn.Module:
    """Trains an MLP model with validation, early stopping, and returns best model."""

    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    print(f"Starting training on {device} with patience={patience}")

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for features, targets in train_loader:
            features, targets = features.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * features.size(0) # Loss per batch item

        epoch_train_loss = running_loss / len(train_loader.dataset) # Average loss per sample

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for features, targets in val_loader:
                features, targets = features.to(device), targets.to(device)
                outputs = model(features)
                loss = criterion(outputs, targets)
                val_loss += loss.item() * features.size(0) # Loss per batch item

        epoch_val_loss = val_loss / len(val_loader.dataset) # Average loss per sample

        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}/{num_epochs}: Train Loss={epoch_train_loss:.4f}, Val Loss={epoch_val_loss:.4f}, LR={current_lr:.1e}")

        # Early Stopping Check & Save Best Model
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            epochs_no_improve = 0
            best_model_state = copy.deepcopy(model.state_dict())
            # Optionally save checkpoint immediately
            # save_model_state(model, model_save_path)
            print(f"*** New best validation loss: {best_val_loss:.4f} (Epoch {epoch+1}) ***")
        else:
            epochs_no_improve += 1
            print(f"Val loss did not improve for {epochs_no_improve} epoch(s).")

        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {patience} epochs without improvement.")
            break

        # Step the scheduler
        if scheduler:
            if isinstance(scheduler, ReduceLROnPlateau):
                scheduler.step(epoch_val_loss) # Step based on validation loss
            else:
                scheduler.step() # Step per epoch for other schedulers

    print(f"Training finished. Best validation loss: {best_val_loss:.4f}")
    # Load the best model state found during training
    if best_model_state:
        model.load_state_dict(best_model_state)
        print("Loaded best model state based on validation loss.")
    else:
         print("Warning: No improvement in validation loss observed. Returning model from last epoch.")

    # Optionally remove the temporary save file if you saved intermediate checkpoints
    # if os.path.exists(model_save_path):
    #    os.remove(model_save_path)

    return model

### Pruning Function (Adapted for MLP)

In [15]:
def prune_mlp_model(model_to_prune: nn.Module, example_input: torch.Tensor, strategy: Dict,
                    target_sparsity: float = 0.5, iterative_steps: int = 1, # Note: iterative_steps > 1 handled below
                    importance: Optional[tp.importance.Importance] = None) -> nn.Module:
    """Prunes an MLP model using the specified strategy (Revised)."""
    device = example_input.device # Get device from input
    model_to_prune.eval().to(device) # Ensure model is on correct device and in eval mode

    # --- Determine ignored layer ---
    ignored_layers = []
    if hasattr(model_to_prune, 'layers') and isinstance(model_to_prune.layers, nn.ModuleList) and len(model_to_prune.layers) > 0:
        # Assuming the last layer in the ModuleList is the output layer
        output_layer = model_to_prune.layers[-1]
        if isinstance(output_layer, nn.Linear): # Check if it's actually Linear
             ignored_layers.append(output_layer)
             print(f"Ignoring output layer during pruning: {output_layer}")
        else:
             print(f"Warning: Last layer is not nn.Linear ({type(output_layer)}), not ignoring automatically.")

    # --- Select Importance Metric ---
    if importance is None:
        importance_metric = strategy['importance']
    else:
        importance_metric = importance # Use provided importance (e.g., for Taylor)
    print(f"Using Importance Metric: {type(importance_metric).__name__}")

    # --- Instantiate Pruner ---
    pruner_class = strategy['pruner']
    print(f"Using Pruner Class: {pruner_class.__name__}")

    # Standard arguments for most pruners in torch-pruning >= 1.2.0
    # Note: ch_sparsity is often used for channel pruning. For linear layers (neurons),
    # it might correspond to pruning whole rows/neurons if interpreted structrually.
    # Let's assume ch_sparsity implies pruning rows/neurons here.
    try:
        pruner = pruner_class(
            model=model_to_prune,
            example_inputs=example_input.to(device), # Ensure input on correct device
            importance=importance_metric,
            iterative_steps=iterative_steps, # Let the pruner handle iterations internally if steps > 1
            ch_sparsity=target_sparsity,   # Sparsity target
            root_module_types=[nn.Linear], # Focus on Linear layers
            ignored_layers=ignored_layers,
            # Optional: round_to=8 might be useful if you need specific alignment
        )
    except TypeError as e:
        print(f"Warning: Error initializing pruner {pruner_class.__name__} with standard args: {e}")
        print("Attempting initialization with fewer args...")
        # Fallback for potentially simpler pruner constructors
        try:
            pruner = pruner_class(
                model=model_to_prune,
                example_inputs=example_input.to(device),
                importance=importance_metric,
                ch_sparsity=target_sparsity, # Still likely needed
                ignored_layers=ignored_layers,
             )
        except Exception as E:
             print(f"ERROR: Could not initialize pruner {pruner_class.__name__}")
             raise E


    # --- Calculate Initial State ---
    flops_before, params_before = calculate_flops_params(model_to_prune, example_input.to(device))
    print(f"State Before Pruning: FLOPs={flops_before/1e6:.3f}M, Params={params_before/1e6:.3f}M")
    print(f"Starting pruning with {strategy['importance'].__class__.__name__}, Target Sparsity: {target_sparsity:.2f}")

    # --- Special handling BEFORE pruner.step() for specific Importance types ---
    if isinstance(importance_metric, tp.importance.TaylorImportance):
        # Calculate gradients needed by TaylorImportance
        model_to_prune.train() # Requires grad mode
        # Ensure inputs and model are on the same device
        input_on_device = example_input.to(device)
        output = model_to_prune(input_on_device)
        # Simple regression loss: dummy target is zero
        loss = torch.sum(output**2) # L2 norm squared (or .mean()) as dummy loss
        # Or: loss = output.mean()
        model_to_prune.zero_grad() # Zero gradients before backward
        try:
            loss.backward()
            print("Calculated gradients for TaylorImportance.")
        except Exception as e:
            print(f"ERROR: Could not perform backward pass for TaylorImportance: {e}")
            raise e
        finally:
             model_to_prune.eval() # Always switch back to eval mode

    # --- Execute Pruning ---
    # The `pruner.step(interactive=False)` method (default) handles the pruning
    # over the specified `iterative_steps` internally for most pruners.
    # You typically don't need an external loop unless using interactive=True.
    try:
        pruner.step() # Execute pruning based on the pruner's internal logic
    except AttributeError as e:
         # Catch cases where 'step' might not be the correct method or has issues
         print(f"ERROR: During pruner.step() for {pruner_class.__name__}: {e}")
         print("This pruner might require interactive=True or have a different API.")
         # If interactive is needed:
         # for group in pruner.step(interactive=True):
         #    group.prune()
         raise e # Re-raise after printing info
    except Exception as e:
        print(f"ERROR: An unexpected error occurred during pruner.step() for {pruner_class.__name__}: {e}")
        raise e


    # --- Calculate Final State ---
    flops_after, params_after = calculate_flops_params(model_to_prune, example_input.to(device))
    print(f"Pruning finished. Final FLOPs: {flops_after/1e6:.3f}M, Params: {params_after/1e6:.3f}M")
    print(f"FLOPs Reduction: {(flops_before-flops_after)/flops_before*100:.2f}%")
    print(f"Params Reduction: {(params_before-params_after)/params_before*100:.2f}%")

    # Ensure parameters require gradients for fine-tuning
    # Pruning might have altered the requires_grad flag in some cases
    num_params_fixed = 0
    for name, param in model_to_prune.named_parameters():
        if not param.requires_grad:
            param.requires_grad = True
            num_params_fixed += 1
    if num_params_fixed > 0:
        print(f"Set requires_grad=True for {num_params_fixed} parameters.")


    return model_to_prune

###  Comparison and Plotting (Adapted for Regression)

In [11]:
import os # Ensure imported
from typing import Dict # Ensure imported

def compare_results_and_plot_rmse(results: Dict[str, Dict[str, float]], output_dir: str):
    """Prints comparison table and plots results for regression (RMSE)."""

    print("\n=== Pruning Strategy Comparison (RMSE) ===")
    print(f"{'Strategy':<12} | {'FLOPs':<12} | {'Params':<10} | {'Size (MB)':<10} | {'RMSE':<10}")
    print("-" * 65)
    # Sort strategies by RMSE (lower is better) for better comparison
    sorted_strategies = sorted(results.keys(), key=lambda s: results[s].get('rmse', float('inf')))

    for strategy in sorted_strategies:
        metrics = results[strategy]
        # Use .get() with defaults for robustness if a metric is missing
        flops_m = metrics.get('flops', 0) / 1e6
        params_m = metrics.get('params', 0) / 1e6
        size_mb_val = metrics.get('size_mb', 0)
        rmse_val = metrics.get('rmse', float('nan'))
        print(f"{strategy:<12} | {flops_m:<11.2f}M | {params_m:<9.2f}M | {size_mb_val:>9.2f} | {rmse_val:<10.4f}")

    os.makedirs(output_dir, exist_ok=True)
    # Ensure 'initial' is first if it exists for plotting reference
    plot_strategies = ['initial'] + [s for s in sorted_strategies if s != 'initial']
    metrics_to_plot = ['flops', 'params', 'size_mb', 'rmse']
    titles = {'flops': 'FLOPs Comparison', 'params': 'Parameters Comparison',
              'size_mb': 'Model Size (MB) Comparison', 'rmse': 'RMSE Comparison (Lower is Better)'}
    y_labels = {'flops': 'FLOPs (Millions)', 'params': 'Parameters (Millions)',
                'size_mb': 'Size (MB)', 'rmse': 'RMSE'}

    colors = plt.cm.viridis(np.linspace(0, 1, len(plot_strategies)))

    for metric_name in metrics_to_plot:
        if not any(metric_name in results.get(s, {}) for s in plot_strategies):
             print(f"Skipping plot for '{metric_name}', data not found in results.")
             continue

        values = []
        for strategy in plot_strategies:
             metric_val = results.get(strategy, {}).get(metric_name, np.nan)
             if metric_name in ['flops', 'params']:
                 if not np.isnan(metric_val): # Avoid division by zero or on NaN
                     metric_val /= 1e6
             values.append(metric_val)

        plt.figure(figsize=(12, 6))
        bars = plt.bar(plot_strategies, values, color=colors)
        plt.xlabel('Strategy')
        plt.ylabel(y_labels[metric_name])
        plt.title(titles[metric_name])
        plt.xticks(rotation=45, ha='right')

        # Determine format string for labels OUTSIDE the f-string placeholder
        label_format = '.4f' if metric_name == 'rmse' else '.2f'
        for bar in bars:
            yval = bar.get_height()
            if not np.isnan(yval):
                # Use the determined label_format inside the f-string placeholder
                plt.text(bar.get_x() + bar.get_width()/2., yval, f'{yval:{label_format}}',
                         ha='center', va='bottom', fontsize=9)

        # Add initial model reference line
        if 'initial' in results and metric_name in results['initial'] and not np.isnan(results['initial'][metric_name]):
            initial_value = results['initial'][metric_name]
            if metric_name in ['flops', 'params']:
                initial_value /= 1e6

            # --- CORRECTED PART for the label ---
            # 1. Determine the format string based on the metric
            initial_label_format = '.4f' if metric_name == 'rmse' else '.2f'
            # 2. Apply the format string to the value
            formatted_initial_value = f"{initial_value:{initial_label_format}}"
            # 3. Construct the label string
            initial_line_label = f"Initial ({formatted_initial_value})"

            plt.axhline(y=initial_value, color='r', linestyle='--', label=initial_line_label) # Use the constructed label
            plt.legend()

        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f'mlp_{metric_name}_comparison.png'))
        plt.close()

    print(f"Comparison plots saved to {output_dir}")

### Main Workflow Configuration

In [8]:
# --- Configuration ---
DATA_DIR = './data/CMaps/' # <<< IMPORTANT: Set path to your NASA CMaps data directory
OUTPUT_DIR = './output_mlp_pruning/fd001/'
TRAIN_FILE = 'train_FD001.txt'
TEST_FILE = 'test_FD001.txt'
TEST_RUL_FILE = 'RUL_FD001.txt'

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Model Config
MLP_HIDDEN_UNITS = [128, 64, 32] # Example MLP structure
DROPOUT_RATE = 0.2

# Training Config
INITIAL_TRAIN_EPOCHS = 100 # Train longer initially
FINETUNE_EPOCHS = 100    # Fine-tune potentially as long
BATCH_SIZE = 128
INITIAL_LR = 0.001
FINETUNE_LR = 0.0005
PATIENCE = 15 # Patience for early stopping
VAL_SPLIT_RATIO = 0.2 # Use 20% of training engines for validation

# Pruning Config
PRUNING_TARGET_SPARSITY = 0.5 # Target 50% sparsity
PRUNING_ITERATIVE_STEPS = 1 # For structured pruning, 1 step is common
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Pruning strategies (can reuse from ResNet example)
# Note: BNScalePruner/GroupNormPruner less applicable to MLP without BatchNorm layers
pruning_strategies = {
    'magnitude': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.MagnitudeImportance(p=2)},
    #'bn_scale': {'pruner': tp.pruner.BNScalePruner, 'importance': tp.importance.BNScaleImportance()}, # If you add BN layers
    'random': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.RandomImportance()},
    'Taylor': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.TaylorImportance()},
    #'Hessian': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.GroupHessianImportance()}, # Slow, requires grads
    'lamp': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.LAMPImportance(p=2)},
    #'geometry': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.FPGMImportance()} # More geometric
}

### Data Loading and Preparation Execution

In [9]:
# --- Load and Prepare Data ---
train_df_norm, test_df_norm, test_rul_df, scaler, feature_cols = prepare_cmapss_data(
    DATA_DIR, TRAIN_FILE, TEST_FILE, TEST_RUL_FILE
)

INPUT_SIZE = len(feature_cols) # Determine input size dynamically
print(f"MLP Input Size determined as: {INPUT_SIZE}")

# --- Split Training Data into Train/Validation (by engine unit) ---
train_units = train_df_norm['unit_number'].unique()
np.random.seed(42) # For reproducible split
np.random.shuffle(train_units)
split_idx = int(len(train_units) * (1 - VAL_SPLIT_RATIO))
train_unit_ids = train_units[:split_idx]
val_unit_ids = train_units[split_idx:]

df_train_split = train_df_norm[train_df_norm['unit_number'].isin(train_unit_ids)]
df_val_split = train_df_norm[train_df_norm['unit_number'].isin(val_unit_ids)]

print(f"Training data split: {len(df_train_split)} samples ({len(train_unit_ids)} engines)")
print(f"Validation data split: {len(df_val_split)} samples ({len(val_unit_ids)} engines)")

# --- Prepare MLP Inputs/Outputs ---
# Training data: use all time steps
X_train = df_train_split[feature_cols].values
y_train = df_train_split['RUL'].values

# Validation data: use all time steps
X_val = df_val_split[feature_cols].values
y_val = df_val_split['RUL'].values

# Test data: use ONLY the LAST time step for each engine
X_test = []
test_engine_ids = test_df_norm['unit_number'].unique()
for eng_id in test_engine_ids:
    eng_data = test_df_norm[test_df_norm['unit_number'] == eng_id]
    last_step_features = eng_data[feature_cols].iloc[-1].values # Get last row features
    X_test.append(last_step_features)
X_test = np.array(X_test)
# Target RULs for test set are provided directly in RUL_FD001.txt
y_test = test_rul_df['RUL'].values[:len(X_test)] # Ensure lengths match if RUL file has extra lines

print(f"Prepared MLP data shapes:")
print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"X_val: {X_val.shape}, y_val: {y_val.shape}")
print(f"X_test: {X_test.shape}, y_test: {y_test.shape}")


# --- Create DataLoaders ---
train_dataset = CMAPSS_MLP_Dataset(X_train, y_train)
val_dataset = CMAPSS_MLP_Dataset(X_val, y_val)
test_dataset = CMAPSS_MLP_Dataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) # Use same batch size for consistency

# Create an example input tensor for pruning/flops calculation
example_input_tensor = torch.randn(1, INPUT_SIZE).to(DEVICE)

--- Preparing Training Data ---

--- Preparing Test Data ---
Columns with std < 0.02 (potential removal): ['op_setting_1', 'op_setting_2', 'op_setting_3', 'sensor_1', 'sensor_5', 'sensor_6', 'sensor_10', 'sensor_16', 'sensor_18', 'sensor_19']

Using Features: ['sensor_2', 'sensor_3', 'sensor_4', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_17', 'sensor_20', 'sensor_21']

--- Normalizing Data ---
MLP Input Size determined as: 14
Training data split: 16340 samples (80 engines)
Validation data split: 4291 samples (20 engines)
Prepared MLP data shapes:
X_train: (16340, 14), y_train: (16340,)
X_val: (4291, 14), y_val: (4291,)
X_test: (100, 14), y_test: (100,)


### Main Pruning Workflow Execution

In [16]:
# --- Main Workflow ---
all_results = {}
initial_model_path = os.path.join(OUTPUT_DIR, "mlp_initial.pth")
model = None # Define model variable

# --- 1. Initial Training ---
if not os.path.exists(initial_model_path):
    print("\n--- Training Initial MLP Model ---")
    model = MLPmodel(layer_units=MLP_HIDDEN_UNITS, input_size=INPUT_SIZE, dropout_rate=DROPOUT_RATE).to(DEVICE)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=INITIAL_LR)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=int(PATIENCE/2), verbose=True)

    model = train_mlp_model(
        model=model, train_loader=train_loader, val_loader=val_loader,
        criterion=criterion, optimizer=optimizer, scheduler=scheduler,
        device=DEVICE, num_epochs=INITIAL_TRAIN_EPOCHS, patience=PATIENCE
    )
    save_model_state(model, initial_model_path)
else:
    print(f"\n--- Loading Initial MLP Model from {initial_model_path} ---")
    # Instantiate model first, then load state
    model = MLPmodel(layer_units=MLP_HIDDEN_UNITS, input_size=INPUT_SIZE, dropout_rate=DROPOUT_RATE).to(DEVICE)
    model = load_model_state(model, initial_model_path, DEVICE)

# --- 2. Evaluate Initial Model ---
print("\n--- Evaluating Initial MLP Model on Test Set ---")
initial_metrics = evaluate_model_rmse(model, test_loader, DEVICE, example_input_tensor)
all_results['initial'] = initial_metrics

# --- 3. Pruning and Fine-tuning Loop ---
for strategy_name, strategy_details in pruning_strategies.items():
    print(f"\n--- Pruning MLP with Strategy: {strategy_name} ---")
    # Load a fresh copy of the initial model for each strategy
    pruned_model = MLPmodel(layer_units=MLP_HIDDEN_UNITS, input_size=INPUT_SIZE, dropout_rate=DROPOUT_RATE).to(DEVICE)
    pruned_model = load_model_state(pruned_model, initial_model_path, DEVICE)

    # Prepare stateful importance if needed (e.g., Taylor)
    current_importance = None
    if strategy_name == 'Taylor':
         current_importance = tp.importance.TaylorImportance()
         # Gradient calculation moved inside prune_mlp_model for simplicity here
         pass


    # Prune the model
    try:
        pruned_model = prune_mlp_model(
            model_to_prune=pruned_model,
            example_input=example_input_tensor,
            strategy=strategy_details,
            target_sparsity=PRUNING_TARGET_SPARSITY,
            iterative_steps=PRUNING_ITERATIVE_STEPS,
            importance = current_importance # Pass stateful importance if created
        )
    except Exception as e:
        print(f"!!!!!! Pruning failed for strategy {strategy_name}: {e} !!!!!!")
        continue # Skip to next strategy if pruning fails

    pruned_model_path = os.path.join(OUTPUT_DIR, f"mlp_{strategy_name}_pruned.pth")
    save_model_state(pruned_model, pruned_model_path)

    # Fine-tune the pruned model
    print(f"\n--- Fine-tuning MLP after {strategy_name} Pruning ---")
    # Ensure requires_grad is True before creating optimizer
    for param in pruned_model.parameters(): param.requires_grad = True

    optimizer_ft = torch.optim.Adam(pruned_model.parameters(), lr=FINETUNE_LR)
    scheduler_ft = ReduceLROnPlateau(optimizer_ft, mode='min', factor=0.5, patience=int(PATIENCE/2), verbose=True)
    criterion_ft = nn.MSELoss()

    fine_tuned_model = train_mlp_model(
        model=pruned_model, train_loader=train_loader, val_loader=val_loader,
        criterion=criterion_ft, optimizer=optimizer_ft, scheduler=scheduler_ft,
        device=DEVICE, num_epochs=FINETUNE_EPOCHS, patience=PATIENCE
    )

    # Evaluate the fine-tuned model
    print(f"\n--- Evaluating Fine-tuned MLP ({strategy_name}) on Test Set ---")
    final_metrics = evaluate_model_rmse(fine_tuned_model, test_loader, DEVICE, example_input_tensor)
    all_results[strategy_name] = final_metrics

    # Save the final fine-tuned model
    final_model_path = os.path.join(OUTPUT_DIR, f"mlp_{strategy_name}_final.pth")
    save_model_state(fine_tuned_model, final_model_path)


# --- 4. Compare Results ---
print("\n--- Final Results Comparison ---")
compare_results_and_plot_rmse(all_results, OUTPUT_DIR)

print("\nMLP Pruning Workflow Completed!")


--- Training Initial MLP Model ---
Starting training on cuda with patience=15




Epoch 1/100: Train Loss=4277.9776, Val Loss=831.4228, LR=1.0e-03
*** New best validation loss: 831.4228 (Epoch 1) ***
Epoch 2/100: Train Loss=836.0778, Val Loss=434.3090, LR=1.0e-03
*** New best validation loss: 434.3090 (Epoch 2) ***
Epoch 3/100: Train Loss=699.9373, Val Loss=421.8724, LR=1.0e-03
*** New best validation loss: 421.8724 (Epoch 3) ***
Epoch 4/100: Train Loss=664.5800, Val Loss=395.3857, LR=1.0e-03
*** New best validation loss: 395.3857 (Epoch 4) ***
Epoch 5/100: Train Loss=646.5040, Val Loss=396.7813, LR=1.0e-03
Val loss did not improve for 1 epoch(s).
Epoch 6/100: Train Loss=636.8023, Val Loss=385.8818, LR=1.0e-03
*** New best validation loss: 385.8818 (Epoch 6) ***
Epoch 7/100: Train Loss=618.2175, Val Loss=378.6449, LR=1.0e-03
*** New best validation loss: 378.6449 (Epoch 7) ***
Epoch 8/100: Train Loss=618.8014, Val Loss=389.7877, LR=1.0e-03
Val loss did not improve for 1 epoch(s).
Epoch 9/100: Train Loss=598.5212, Val Loss=376.3227, LR=1.0e-03
*** New best validation



Epoch 1/100: Train Loss=1438.0322, Val Loss=396.6622, LR=5.0e-04
*** New best validation loss: 396.6622 (Epoch 1) ***
Epoch 2/100: Train Loss=594.2083, Val Loss=365.4766, LR=5.0e-04
*** New best validation loss: 365.4766 (Epoch 2) ***
Epoch 3/100: Train Loss=581.5642, Val Loss=362.1640, LR=5.0e-04
*** New best validation loss: 362.1640 (Epoch 3) ***
Epoch 4/100: Train Loss=586.5283, Val Loss=358.3324, LR=5.0e-04
*** New best validation loss: 358.3324 (Epoch 4) ***
Epoch 5/100: Train Loss=591.5512, Val Loss=356.9907, LR=5.0e-04
*** New best validation loss: 356.9907 (Epoch 5) ***
Epoch 6/100: Train Loss=587.9350, Val Loss=357.0493, LR=5.0e-04
Val loss did not improve for 1 epoch(s).
Epoch 7/100: Train Loss=598.0355, Val Loss=356.1322, LR=5.0e-04
*** New best validation loss: 356.1322 (Epoch 7) ***
Epoch 8/100: Train Loss=582.0053, Val Loss=358.2220, LR=5.0e-04
Val loss did not improve for 1 epoch(s).
Epoch 9/100: Train Loss=573.7702, Val Loss=356.3579, LR=5.0e-04
Val loss did not improv



Epoch 1/100: Train Loss=3234.1270, Val Loss=913.2397, LR=5.0e-04
*** New best validation loss: 913.2397 (Epoch 1) ***
Epoch 2/100: Train Loss=988.9687, Val Loss=438.1702, LR=5.0e-04
*** New best validation loss: 438.1702 (Epoch 2) ***
Epoch 3/100: Train Loss=858.1054, Val Loss=403.5121, LR=5.0e-04
*** New best validation loss: 403.5121 (Epoch 3) ***
Epoch 4/100: Train Loss=848.1945, Val Loss=402.7371, LR=5.0e-04
*** New best validation loss: 402.7371 (Epoch 4) ***
Epoch 5/100: Train Loss=829.8641, Val Loss=388.6739, LR=5.0e-04
*** New best validation loss: 388.6739 (Epoch 5) ***
Epoch 6/100: Train Loss=812.6633, Val Loss=388.0064, LR=5.0e-04
*** New best validation loss: 388.0064 (Epoch 6) ***
Epoch 7/100: Train Loss=798.8382, Val Loss=389.9772, LR=5.0e-04
Val loss did not improve for 1 epoch(s).
Epoch 8/100: Train Loss=807.3219, Val Loss=382.3556, LR=5.0e-04
*** New best validation loss: 382.3556 (Epoch 8) ***
Epoch 9/100: Train Loss=800.3400, Val Loss=382.4948, LR=5.0e-04
Val loss di



Epoch 1/100: Train Loss=1924.6356, Val Loss=510.5144, LR=5.0e-04
*** New best validation loss: 510.5144 (Epoch 1) ***
Epoch 2/100: Train Loss=726.7381, Val Loss=381.5013, LR=5.0e-04
*** New best validation loss: 381.5013 (Epoch 2) ***
Epoch 3/100: Train Loss=672.2033, Val Loss=370.7648, LR=5.0e-04
*** New best validation loss: 370.7648 (Epoch 3) ***
Epoch 4/100: Train Loss=652.6088, Val Loss=368.9616, LR=5.0e-04
*** New best validation loss: 368.9616 (Epoch 4) ***
Epoch 5/100: Train Loss=641.9424, Val Loss=369.1669, LR=5.0e-04
Val loss did not improve for 1 epoch(s).
Epoch 6/100: Train Loss=646.9728, Val Loss=370.4775, LR=5.0e-04
Val loss did not improve for 2 epoch(s).
Epoch 7/100: Train Loss=637.8505, Val Loss=373.9305, LR=5.0e-04
Val loss did not improve for 3 epoch(s).
Epoch 8/100: Train Loss=631.7592, Val Loss=367.1711, LR=5.0e-04
*** New best validation loss: 367.1711 (Epoch 8) ***
Epoch 9/100: Train Loss=619.2146, Val Loss=367.6930, LR=5.0e-04
Val loss did not improve for 1 epoc



Epoch 2/100: Train Loss=594.4565, Val Loss=365.9210, LR=5.0e-04
*** New best validation loss: 365.9210 (Epoch 2) ***
Epoch 3/100: Train Loss=590.9945, Val Loss=360.4965, LR=5.0e-04
*** New best validation loss: 360.4965 (Epoch 3) ***
Epoch 4/100: Train Loss=585.2041, Val Loss=360.5169, LR=5.0e-04
Val loss did not improve for 1 epoch(s).
Epoch 5/100: Train Loss=589.7443, Val Loss=357.6806, LR=5.0e-04
*** New best validation loss: 357.6806 (Epoch 5) ***
Epoch 6/100: Train Loss=584.9153, Val Loss=356.8022, LR=5.0e-04
*** New best validation loss: 356.8022 (Epoch 6) ***
Epoch 7/100: Train Loss=588.0763, Val Loss=357.6678, LR=5.0e-04
Val loss did not improve for 1 epoch(s).
Epoch 8/100: Train Loss=589.5545, Val Loss=355.8854, LR=5.0e-04
*** New best validation loss: 355.8854 (Epoch 8) ***
Epoch 9/100: Train Loss=576.8014, Val Loss=360.3429, LR=5.0e-04
Val loss did not improve for 1 epoch(s).
Epoch 10/100: Train Loss=581.2550, Val Loss=356.3161, LR=5.0e-04
Val loss did not improve for 2 epoc