# Pruning Experiment for LSTM on Energy Prediction Dataset

In [2]:
import torch
import torch.nn as nn
import torch_pruning as tp
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from torch.utils.data import Dataset, DataLoader
from torch import optim
import os
import time
import copy

### 1. LSTM Model Definition

In [2]:
class TimeSeriesLSTM(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_layers: int, output_size: int, dropout_rate: float = 0.5):
        super(TimeSeriesLSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        self.lstm.flatten_parameters()
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]
        out = self.dropout(out)
        out = self.fc(out)
        return out

In [16]:
class TimeSeriesLSTM_MoreLayers(nn.Module): # Renamed for clarity
    def __init__(self, input_size, hidden_size, num_layers,
                 intermediate_size_1=32, # Size for the first intermediate layer
                 intermediate_size_2=24, # Size for the second
                 intermediate_size_3=16, # Size for the third
                 output_size=1,
                 dropout_prob=0.2):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True, dropout=dropout_prob if num_layers > 1 else 0)

        # Define the intermediate Linear layers
        # Layer 1
        self.intermediate_fc1 = nn.Linear(hidden_size, intermediate_size_1)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_prob)

        # Layer 2
        self.intermediate_fc2 = nn.Linear(intermediate_size_1, intermediate_size_2)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_prob)

        # Layer 3
        self.intermediate_fc3 = nn.Linear(intermediate_size_2, intermediate_size_3)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(dropout_prob)

        # Final output layer
        self.fc_final = nn.Linear(intermediate_size_3, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_time_step_out = lstm_out[:, -1, :] # Shape: (batch, hidden_size)

        # Pass through intermediate layers
        x = self.intermediate_fc1(last_time_step_out)
        x = self.relu1(x)
        x = self.dropout1(x) # Shape: (batch, intermediate_size_1)

        x = self.intermediate_fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x) # Shape: (batch, intermediate_size_2)

        x = self.intermediate_fc3(x)
        x = self.relu3(x)
        x = self.dropout3(x) # Shape: (batch, intermediate_size_3)

        final_out = self.fc_final(x) # Shape: (batch, output_size)
        return final_out

#### Seed block

In [3]:
class IntermediateBlock(nn.Module):
    def __init__(self, in_features, out_features, dropout_prob=0.2):
        """
        A block consisting of a Linear layer, ReLU activation, and Dropout.
        Args:
            in_features (int): Number of input features to the Linear layer.
            out_features (int): Number of output features from the Linear layer.
            dropout_prob (float): Dropout probability.
        """
        super(IntermediateBlock, self).__init__()
        self.fc = nn.Linear(in_features, out_features)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        x = self.fc(x)
        x = self.relu(x)
        x = self.dropout(x)
        return x

#### LSTM with Intermidiate Blocks

In [4]:
class TimeSeriesLSTM_WithBlocks(nn.Module):
    def __init__(self, input_size, lstm_hidden_size, num_lstm_layers,
                 block_configs, # List of output sizes for each intermediate block
                 output_size=1,
                 lstm_dropout_prob=0.2, # Dropout for LSTM layers if num_layers > 1
                 block_dropout_prob=0.2): # Dropout within each IntermediateBlock
        """
        LSTM model followed by a sequence of IntermediateBlocks.
        Args:
            input_size (int): Number of features per time step for LSTM.
            lstm_hidden_size (int): Hidden size of the LSTM layer(s).
            num_lstm_layers (int): Number of stacked LSTM layers.
            block_configs (list of int): A list where each integer is the
                                         'out_features' for an IntermediateBlock.
                                         e.g., [48, 32, 24] for three blocks.
            output_size (int): Final output dimension (e.g., 1 for regression).
            lstm_dropout_prob (float): Dropout probability for LSTM (if num_layers > 1).
            block_dropout_prob (float): Dropout probability for each IntermediateBlock.
        """
        super(TimeSeriesLSTM_WithBlocks, self).__init__()
        self.input_size = input_size
        self.lstm_hidden_size = lstm_hidden_size
        self.num_lstm_layers = num_lstm_layers
        self.output_size = output_size

        self.lstm = nn.LSTM(input_size, lstm_hidden_size, num_lstm_layers,
                            batch_first=True,
                            dropout=lstm_dropout_prob if num_lstm_layers > 1 else 0)

        # Create intermediate blocks dynamically
        self.intermediate_blocks = nn.ModuleList()
        current_in_features = lstm_hidden_size # Input to the first block is LSTM's output

        for block_out_features in block_configs:
            self.intermediate_blocks.append(
                IntermediateBlock(current_in_features, block_out_features, block_dropout_prob)
            )
            current_in_features = block_out_features # Output of this block is input to the next

        # Final output layer takes input from the last intermediate block
        self.fc_final = nn.Linear(current_in_features, output_size)

    def forward(self, x):
        # LSTM part
        lstm_out, _ = self.lstm(x)
        # Use the output from the last time step of the LSTM
        x = lstm_out[:, -1, :] # Shape: (batch, lstm_hidden_size)

        # Pass through intermediate blocks
        for block in self.intermediate_blocks:
            x = block(x)
        # After loop, x shape: (batch, block_configs[-1]) if block_configs is not empty
        # or (batch, lstm_hidden_size) if block_configs is empty

        # Final output
        final_out = self.fc_final(x)
        return final_out

### 2. Data Handling for Appliances Energy Dataset

#### --- Data Configuration ---

In [5]:
DATASET_PATH = './data/energydata_complete.csv' # ADJUST PATH AS NEEDED
SEQUENCE_LENGTH = 6 * 12 # Use 12 hours of past data (12 hours * 6 samples/hour)
TARGET_COLUMN = 'Appliances'
# Features to use (excluding target, date, and others)
FEATURE_COLUMNS = [
    'lights', 'T1', 'RH_1', 'T2', 'RH_2', 'T3', 'RH_3', 'T4', 'RH_4', 'T5', 'RH_5',
    'T6', 'RH_6', 'T7', 'RH_7', 'T8', 'RH_8', 'T9', 'RH_9', 'T_out',
    'Press_mm_hg', 'RH_out', 'Windspeed', 'Visibility', 'Tdewpoint'
]

#### --- Helper function to create sequences ---

In [6]:
def create_sequences(input_data, target_data, seq_length):
    sequences = []
    targets = []
    # Stop seq_length steps early to ensure target data is available
    for i in range(len(input_data) - seq_length):
        sequences.append(input_data[i:i + seq_length])
        targets.append(target_data[i + seq_length])
    return np.array(sequences), np.array(targets)

#### --- Custom Dataset ---

In [7]:
class EnergyDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = torch.tensor(sequences, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32).unsqueeze(1) # Target shape [N, 1]

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.targets[idx]

#### --- Main Data Loading Function ---

In [8]:
def get_energy_data_loaders(
    file_path=DATASET_PATH,
    feature_cols=FEATURE_COLUMNS,
    target_col=TARGET_COLUMN,
    seq_length=SEQUENCE_LENGTH,
    batch_size=64,
    test_size=0.2,
    val_size=0.1 # Proportion of the *remaining* data after test split
    ):
    """Loads, preprocesses, scales, and creates sequences for the energy dataset."""
    print(f"Loading dataset from: {file_path}")
    try:
        # 1. Load Data & Initial Processing
        df = pd.read_csv(file_path)
        print(f"Original data shape: {df.shape}")
        df['date'] = pd.to_datetime(df['date']) # Parse date
        df = df.sort_values('date') # Ensure chronological order
        df = df.set_index('date') # Optional: use date index
        df = df[feature_cols + [target_col]].dropna() # Select columns and drop NaNs
        print(f"Data shape after selecting columns & dropping NaNs: {df.shape}")
        if df.empty:
            raise ValueError("DataFrame is empty after selecting columns and dropping NaNs.")

        # 2. Separate Features and Target
        X = df[feature_cols].values
        y = df[[target_col]].values # Keep as 2D: [N, 1]

        # 3. Splitting (Chronological)
        n_total = len(X)
        n_test = int(n_total * test_size)
        n_val = int((n_total - n_test) * val_size)
        n_train = n_total - n_test - n_val

        if n_train <= seq_length or n_val <= seq_length or n_test <= seq_length:
             raise ValueError(f"Not enough data for sequence length {seq_length} after splitting. "
                              f"Train={n_train}, Val={n_val}, Test={n_test}")


        X_train, y_train = X[:n_train], y[:n_train]
        X_val, y_val = X[n_train:n_train + n_val], y[n_train:n_train + n_val]
        X_test, y_test = X[n_train + n_val:], y[n_train + n_val:]

        print(f"Data split: Train={X_train.shape[0]}, Val={X_val.shape[0]}, Test={X_test.shape[0]}")

        # 4. Scaling
        scaler_features = MinMaxScaler()
        scaler_target = MinMaxScaler()

        X_train_scaled = scaler_features.fit_transform(X_train)
        X_val_scaled = scaler_features.transform(X_val)
        X_test_scaled = scaler_features.transform(X_test)

        y_train_scaled = scaler_target.fit_transform(y_train)
        # Flatten y for sequence creation, EnergyDataset will unsqueeze later
        y_val_scaled = scaler_target.transform(y_val)
        y_test_scaled = scaler_target.transform(y_test)

        # 5. Create Sequences
        X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled.flatten(), seq_length)
        X_val_seq, y_val_seq = create_sequences(X_val_scaled, y_val_scaled.flatten(), seq_length)
        X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled.flatten(), seq_length)

        # 6. Create Datasets and DataLoaders
        train_dataset = EnergyDataset(X_train_seq, y_train_seq)
        val_dataset = EnergyDataset(X_val_seq, y_val_seq)
        test_dataset = EnergyDataset(X_test_seq, y_test_seq)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        input_size = X_train_scaled.shape[1] # Number of features
        scalers = {'features': scaler_features, 'target': scaler_target} # Store scalers

        print(f"Data loaded successfully:")
        print(f"  Input size (features): {input_size}")
        print(f"  Sequence length: {seq_length}")
        print(f"  Train sequences: {len(train_dataset)}")
        print(f"  Validation sequences: {len(val_dataset)}")
        print(f"  Test sequences: {len(test_dataset)}")

        return train_loader, val_loader, test_loader, input_size, seq_length, scalers

    except FileNotFoundError:
        print(f"Error: Dataset file not found at {file_path}")
    except ValueError as ve:
        print(f"ValueError during data processing: {ve}")
    except Exception as e:
        print(f"An unexpected error occurred during data loading: {e}")
        import traceback
        traceback.print_exc()

    return None, None, None, 0, 0, None # Return None on error


### 3. Training Function (Regression)

In [9]:
def train_model_regression(model, train_loader, criterion, optimizer, device, num_epochs, val_loader=None, model_path_prefix="best_model", grad_clip=None):
    best_val_loss = float('inf')
    train_losses, val_losses = [], []

    print(f"Starting training for {num_epochs} epochs...")
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        epoch_start_time = time.time()

        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            if grad_clip:
                 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=grad_clip)
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_loss)
        epoch_time = time.time() - epoch_start_time
        log_msg = f"Epoch {epoch+1}/{num_epochs}: Train Loss={epoch_loss:.6f}, Time: {epoch_time:.2f}s"

        # Validation Step
        if val_loader:
            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for inputs_val, labels_val in val_loader:
                    inputs_val, labels_val = inputs_val.to(device), labels_val.to(device)
                    outputs_val = model(inputs_val)
                    val_loss += criterion(outputs_val, labels_val).item() * inputs_val.size(0)
            val_loss /= len(val_loader.dataset)
            val_losses.append(val_loss)
            log_msg += f", Val Loss={val_loss:.6f}"

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                save_path = f"{model_path_prefix}_best_val.pth"
                try:
                    torch.save(model.state_dict(), save_path)
                    log_msg += f" (Best model saved)"
                except Exception as e:
                    log_msg += f" (Error saving model: {e})"
            model.train() # Switch back

        print(log_msg)

    print("Training finished.")
    return model, train_losses, val_losses

### 4. Evaluation Function (Regression)

In [10]:
def calculate_macs_params(model, example_input):
    # Ensure example_input is on the right device
    device = next(model.parameters()).device
    example_input = example_input.to(device)
    # tp.utils.count_ops_and_params can fail with LSTMs sometimes. Use torchinfo as fallback.
    try:
         macs, params = tp.utils.count_ops_and_params(model, example_input)
         return macs, params
    except Exception as e:
        print(f"Warning: torch_pruning MACs calculation failed ({e}). Falling back to torchinfo estimate.")
        try:
             from torchinfo import summary
             # Correct input format for torchinfo might be needed depending on version
             # Try with tuple (common format) or just the tensor
             input_data_shape = example_input.shape
             model_summary = summary(model, input_size=input_data_shape, verbose=0)
             params = model_summary.total_params
             macs = model_summary.total_mult_adds
             print(f"torchinfo estimate: Params={params}, MACs={macs}")
             return macs, params
        except Exception as e2:
            print(f"Warning: torchinfo calculation also failed ({e2}). Returning 0 for MACs/Params.")
            return 0, sum(p.numel() for p in model.parameters()) # Return at least params

def evaluate_model_regression(model, test_loader, example_input, device, scalers=None):
    model.eval()
    macs, params = calculate_macs_params(model, example_input) # Handles potential LSTM issues
    size_mb = params * 4 / 1e6 # Assumes float32

    all_outputs = []
    all_labels = []

    print("Evaluating on test set...")
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            all_outputs.append(outputs.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    all_outputs = np.concatenate(all_outputs, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    outputs_eval = all_outputs
    labels_eval = all_labels

    # Inverse transform for interpretable metrics
    if scalers and 'target' in scalers:
        try:
            outputs_eval = scalers['target'].inverse_transform(all_outputs)
            labels_eval = scalers['target'].inverse_transform(all_labels)
            print("Metrics calculated on original scale.")
        except Exception as e:
            print(f"Warning: Could not inverse transform. Metrics on scaled data. Error: {e}")
    else:
         print("Warning: Target scaler not provided. Metrics calculated on scaled data.")


    mse = mean_squared_error(labels_eval, outputs_eval)
    mae = mean_absolute_error(labels_eval, outputs_eval)
    r2 = r2_score(labels_eval, outputs_eval)
    rmse = np.sqrt(mse)
    # MAPE calculation - handle potential zeros in labels_eval
    epsilon = 1e-8 # Small value to avoid division by zero
    mape = np.mean(np.abs((labels_eval - outputs_eval) / (labels_eval + epsilon))) * 100


    print(f"Evaluation Metrics: MSE={mse:.4f}, MAE={mae:.4f}, RMSE={rmse:.4f}, R2={r2:.4f}, MAPE={mape:.2f}%")

    return {
        'macs': macs,
        'params': params,
        'size_mb': size_mb,
        'mse': mse,
        'mae': mae,
        'rmse': rmse,
        'r2': r2,
        'mape': mape,
        'performance': mse # Use MSE as the primary performance metric (lower is better)
    }

### 5. Pruning Function (Adapted for LSTM)

In [12]:
def prune_lstm_model_by_threshold(
    model,
    example_input_bs1, # BS=1 for MACs/Params calc & non-grad strategies
    target_macs,
    target_params,
    strategy,
    max_iterations=50,
    step_pruning_ratio=0.1,
    gradient_batch=None, # Dict {'inputs': T, 'labels': T} with BS > 1
    prunable_modules=None # List of specific layers (e.g., [model.fc])
    ):
    device = next(model.parameters()).device

    needs_gradient = isinstance(strategy['importance'], (
        tp.importance.TaylorImportance,
        tp.importance.GroupHessianImportance
    ))
    if needs_gradient:
        if gradient_batch is None: raise ValueError(f"Strategy needs 'gradient_batch'.")
        if gradient_batch['inputs'].shape[0] <= 1: raise ValueError(f"Need BS > 1 in gradient_batch")
        gradient_inputs = gradient_batch['inputs'].to(device)
        gradient_labels = gradient_batch['labels'].to(device)

    print(f"--- Starting Pruning ({strategy['importance'].__class__.__name__}) ---")
    print(f"Target MACs: <= {target_macs:,.0f}, Target Params: <= {target_params:,.0f}")
    print(f"Step Ratio: {step_pruning_ratio:.2f}, Max Iter: {max_iterations}")

    # Determine layers to prune/ignore
    if not prunable_modules:
         prunable_modules = [m for m in model.modules() if isinstance(m, nn.Linear)]
         print(f"Defaulting to pruning nn.Linear layers: {[m.__class__.__name__ for m in prunable_modules]}")
    else:
         print(f"Targeting specific modules for pruning: {[m.__class__.__name__ for m in prunable_modules]}")

    modules_to_ignore = [m for m in model.modules() if isinstance(m, (nn.Linear, nn.Conv2d, nn.LSTM)) and m not in prunable_modules]
    root_types = list(set(type(m) for m in prunable_modules))
    if not root_types:
        print("Warning: No prunable module types identified. Pruning may fail.")
        root_types = [nn.Linear] # Fallback guess


    pruner = strategy['pruner'](
        model,
        example_input_bs1.to(device),
        importance=strategy['importance'],
        pruning_ratio=step_pruning_ratio,
        root_module_types=root_types,
        ignored_layers=modules_to_ignore,
    )

    initial_macs, initial_params = calculate_macs_params(model, example_input_bs1.to(device))
    current_macs, current_params = initial_macs, initial_params
    print(f"Initial State | MACs: {current_macs:,.0f}, Params: {current_params:,.0f}")
    # --- Remove or comment out these two lines ---
    # prunable_layer_names = [layer.__class__.__name__ for layer in pruner.get_pruning_layers()] # <--- ERROR HERE
    # print(f"Detected Prunable Layers by tp: {prunable_layer_names}")
    # --------------------------------------------
    if initial_macs == 0 and initial_params == 0: # Check if initial calc failed
         print("Warning: Initial MACs/Params calculation failed or returned zero. Cannot proceed.")
         return model # Or raise error

    # Existing check (adjust slightly): If no prunable layers are implicitly handled by pruner.step, it will just return empty groups.
    # We don't need the explicit check here anymore. The loop checking `if not pruning_groups:` will handle it.
    # if not prunable_layer_names and initial_macs > 0:
    #     print("Warning: torch-pruning did not detect any prunable layers matching criteria.")


    iteration = 0
    model.eval()
    criterion = nn.MSELoss().to(device) # Loss for gradient calculation

    while (current_macs > target_macs or current_params > target_params) and iteration < max_iterations:
        iteration += 1
        macs_before_step = current_macs
        params_before_step = current_params

        if needs_gradient:
            model.train()
            input_for_grad = gradient_inputs.detach().clone()
            labels_for_grad = gradient_labels.detach().clone()
            try:
                for param in model.parameters(): param.requires_grad_(True)
                outputs = model(input_for_grad)
                loss = criterion(outputs, labels_for_grad)
                model.zero_grad()
                loss.backward()
            except Exception as e:
                print(f"\nError during backward (Iter {iteration}): {e}. Stopping.")
                model.eval() ; break
            finally:
                model.eval() ; model.zero_grad(set_to_none=True)

        try:
            pruning_groups = list(pruner.step(interactive=True))
        except Exception as e:
            print(f"\nError during pruner.step() (Iter {iteration}): {e}. Stopping.")
            break

        if not pruning_groups:
            print(f"Iter {iteration}: No more candidates found by pruner. Stopping.")
            break

        for group in pruning_groups: group.prune()

        current_macs, current_params = calculate_macs_params(model, example_input_bs1.to(device))
        macs_reduced_pct = (macs_before_step - current_macs) / initial_macs * 100 if initial_macs > 0 else 0
        params_reduced_pct = (params_before_step - current_params) / initial_params * 100 if initial_params > 0 else 0

        print(
            f"Iter {iteration: >3}/{max_iterations} | "
            f"MACs: {current_macs:,.0f} ({macs_reduced_pct:+6.1f}% R) | "
            f"Params: {current_params:,.0f} ({params_reduced_pct:+6.1f}% R)"
        )

        if current_macs >= macs_before_step and current_params >= params_before_step:
            if not (current_macs <= target_macs and current_params <= target_params):
                print(f"Iter {iteration}: No reduction. Stopping.") ; break
            else: break # Targets met

    # --- Final Report ---
    print(f"--- Finished Pruning ({strategy['importance'].__class__.__name__}) ---")
    if iteration >= max_iterations: print(f"Warning: Reached max iterations ({max_iterations}).")
    final_macs, final_params = calculate_macs_params(model, example_input_bs1.to(device))
    macs_reduction = (initial_macs - final_macs) / initial_macs * 100 if initial_macs > 0 else 0
    params_reduction = (initial_params - final_params) / initial_params * 100 if initial_params > 0 else 0
    print(f"Initial | MACs: {initial_macs:,.0f}, Params: {initial_params:,.0f}")
    print(f"Final   | MACs: {final_macs:,.0f} (Reduction: {macs_reduction:.2f}%)")
    print(f"        | Params: {final_params:,.0f} (Reduction: {params_reduction:.2f}%)")
    print(f"Target  | MACs <= {target_macs:,.0f}, Params <= {target_params:,.0f}")
    if final_macs > target_macs or final_params > target_params:
         print("Warning: Pruning finished, but target threshold(s) were not fully met.")

    model.eval()
    return model

### 6. Comparison and Plotting Function (Regression)

In [13]:
def compare_results_and_plot_regression(results, metric_key='mse', lower_is_better=True, output_dir='output'):
    if not results: print("No results to plot.") ; return
    valid_results = {k: v for k, v in results.items() if isinstance(v, dict) and all(m in v for m in ['macs', 'params', metric_key])}
    if not valid_results: print("No valid results entries found for plotting.") ; return

    strategy_order = []
    if 'initial' in valid_results: strategy_order.append('initial')
    strategy_order.extend([s for s in valid_results if s != 'initial'])
    if not strategy_order: print("No strategies to plot."); return

    # --- Print Table ---
    metric_name = metric_key.upper()
    print(f"\n=== Pruning Strategy Comparison (Metric: {metric_name}) ===")
    header = f"{'Strategy':<15} | {'MACs (M)':<10} | {'Params (K)':<10} | {'Size (MB)':<10} | {metric_name:<12}"
    print(header); print("-" * len(header))
    for strategy in strategy_order:
        metrics = valid_results[strategy]
        macs_m = metrics['macs']/1e6 if metrics['macs'] is not None else 0
        params_k = metrics['params']/1e3 if metrics['params'] is not None else 0
        print(f"{strategy:<15} | {macs_m:<10.2f} | {params_k:<10.1f} | {metrics.get('size_mb', 0):>10.2f} | {metrics[metric_key]:>12.4f}")

    # --- Generate Bar Charts ---
    os.makedirs(output_dir, exist_ok=True)
    metrics_to_plot = ['macs', 'params', 'size_mb', metric_key]
    base_titles = {'macs': 'MACs', 'params': 'Parameters', 'size_mb': 'Model Size (MB)', metric_key: metric_name}
    plot_titles = {k: f'{v} Comparison (Lower is Better)' for k, v in base_titles.items()}
    if not lower_is_better:
        plot_titles[metric_key] = f'{base_titles[metric_key]} Comparison (Higher is Better)'

    colors = plt.cm.viridis(np.linspace(0, 1, len(strategy_order)))
    initial_metrics = valid_results.get('initial', None)

    for plot_metric in metrics_to_plot:
        if not all(plot_metric in valid_results[s] for s in strategy_order):
             print(f"Skipping plot for {plot_metric} as it's missing from some results.")
             continue
        values = [valid_results[strategy][plot_metric] for strategy in strategy_order]

        plt.figure(figsize=(12, 7))
        bars = plt.bar(strategy_order, values, color=colors)
        plt.ylabel(base_titles[plot_metric])
        plt.title(plot_titles[plot_metric])
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', linestyle='--', alpha=0.7)

        # Add value labels
        max_val = max(values) if values else 0
        for i, bar in enumerate(bars):
            yval = bar.get_height()
            label = ""
            # ... (Use the formatting logic from previous `compare_results_and_plot_regression` function) ...
            if plot_metric == 'macs': label = f'{yval/1e6:.2f}M' if yval > 1e5 else f'{yval:,.0f}' # Adjust format
            elif plot_metric == 'params': label = f'{yval/1e3:.1f}K' if yval > 100 else f'{yval:,.0f}' # Adjust format
            elif plot_metric == 'size_mb': label = f'{yval:.2f}'
            else: label = f'{yval:.4f}' # Regression metric
            plt.text(bar.get_x() + bar.get_width()/2., yval + 0.01 * max_val, label, ha='center', va='bottom', fontsize=8, rotation=0)

        if initial_metrics and plot_metric in initial_metrics:
             initial_value = initial_metrics[plot_metric]
             plt.axhline(y=initial_value, color='r', linestyle='--', label=f'Initial Value')
             plt.legend()

        plt.tight_layout()
        save_path = os.path.join(output_dir, f'lstm_energy_{plot_metric}_comparison.png')
        try:
            plt.savefig(save_path)
        except Exception as e:
             print(f"Error saving plot {save_path}: {e}")
        plt.close()
    print(f"Comparison plots saved to {output_dir}")

In [14]:
import torch
import torch.onnx

# [...] other imports

def save_model_as_onnx(model, example_input, output_path, opset_version=13):
    """Saves the PyTorch model as ONNX."""
    # Ensure model is on the same device as the example input for export
    device = example_input.device
    model.to(device)
    model.eval() # Ensure model is in evaluation mode

    print(f"Attempting to save model to ONNX: {output_path}")
    print(f"Using example input shape: {example_input.shape}")

    try:
        torch.onnx.export(
            model,
            example_input, # Must have the correct shape (batch, seq_len, features)
            output_path,
            export_params=True,       # Store the trained parameter weights inside the model file
            opset_version=opset_version,    # The ONNX version to export the model to
            do_constant_folding=True, # Optional: optimizes the model
            input_names=['input'],    # Specify names for input nodes
            output_names=['output'],  # Specify names for output nodes
            dynamic_axes={            # Allow variable batch size
                'input': {0: 'batch_size'},
                'output': {0: 'batch_size'}
            }
        )
        print(f"✅ Model successfully saved as ONNX to {output_path}")
        return True

    except Exception as e:
        print(f"❌ Failed to save model as ONNX: {e}")
        import traceback
        traceback.print_exc()
        return False

### 7. Main Workflow (LSTM Energy Prediction)

In [21]:
def main_lstm():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    lstm_hidden_size_val = 128
    # --- Configuration ---
    config = {
        'strategies': {
            'Magnitude_L1': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.MagnitudeImportance(p=1)},
            'Magnitude_L2': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.MagnitudeImportance(p=2)},
            'Random': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.RandomImportance()},
            'Taylor': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.TaylorImportance()},
            'FPGM': {'pruner': tp.pruner.MagnitudePruner, 'importance': tp.importance.FPGMImportance()},
        },
        # --- LSTM Hyperparameters ---
        'lstm_hidden_size': lstm_hidden_size_val,    # Example, adjust as needed
        'num_lstm_layers': 1,
        'lstm_dropout': 0.5,

        # --- Configuration for 7 Intermediate Blocks ---
        # Defines the output size of each block. The number of elements defines the number of blocks.
        # Example: Tapering down, starting relatively large after LSTM
        'block_output_features': [
            lstm_hidden_size_val // 2,
            lstm_hidden_size_val // 2,
            lstm_hidden_size_val // 3,
            lstm_hidden_size_val // 3,
            lstm_hidden_size_val // 4,
            lstm_hidden_size_val // 4,
            lstm_hidden_size_val // 5
        ],
        # 'block_output_features': [96, 80, 64, 48, 32, 24, 16], # Alternative: More explicit tapering

        'block_dropout': 0.6,       # Dropout for the intermediate blocks

        # --- Training ---
        'train_epochs': 50,         # May need more for a deeper model
        'fine_tune_epochs': 60,     # Also potentially more
        'batch_size': 128,
        'learning_rate_initial': 0.001, # May need adjustment for deeper model
        'learning_rate_finetune': 0.0002,
        'grad_clip': 1.0,

        # --- Paths & Pruning ---
        'output_dir': './output/lstm_energy_7blocks', # New output directory
        'pruning_max_iterations': 100, # More iterations might be needed for deeper models
        'pruning_step_ratio': 0.15,    # Adjust step ratio
        'pruning_primary_metric': 'mse',

        # --- TARGETS FOR PRUNING ---
        'target_macs_absolute': 3_000_000_000, # 3 Billion MACs
        'target_params_absolute': None,      # Use size_mb for params target, effectively
        'target_size_mb_absolute': 5.0,    # 5 MB target size

        # Sparsity targets (will be overridden if absolute targets are set and initial model is larger)
        'target_macs_sparsity': 0.5,   # Target 50% MAC reduction
        'target_params_sparsity': 0.5, # Target 50% Params reduction
    }
    os.makedirs(config['output_dir'], exist_ok=True)

    # --- Initialize Data ---
    print("Loading and processing data...")
    train_loader, val_loader, test_loader, input_size, seq_length, scalers = get_energy_data_loaders(
        batch_size=config['batch_size'], seq_length=SEQUENCE_LENGTH
    )
    if train_loader is None: return # Exit if data loading failed

    # --- Initialize Model ---
    """
    model = TimeSeriesLSTM(
        input_size=input_size, hidden_size=config['lstm_hidden_size'],
        num_layers=config['lstm_num_layers'], output_size=1,
        dropout_rate=config['lstm_dropout']
    ).to(device)

    model = TimeSeriesLSTM_MoreLayers(
        input_size=input_size,
        hidden_size=config['lstm_hidden_size'],
        num_layers=config['lstm_num_layers'],
        output_size=1,
        dropout_prob=config['lstm_dropout']
    ).to(device)
    """
    model = TimeSeriesLSTM_WithBlocks(
        input_size=input_size,
        lstm_hidden_size=config['lstm_hidden_size'],
        num_lstm_layers=config['num_lstm_layers'], # Assuming this key exists in your config
        block_configs=config.get('block_output_features'), # Pass the list of block output sizes
        output_size=1,
        lstm_dropout_prob=config.get('lstm_dropout'),
        block_dropout_prob=config.get('block_dropout')
    ).to(device)
    print("\nModel Architecture:")
    print(model)

    # --- Create Example Inputs ---
    example_input_bs1 = torch.randn(1, seq_length, input_size) .to(device) # No need to send to device yet
    example_gradient_batch = None
    try:
        grad_batch_data = next(iter(train_loader))
        if grad_batch_data[0].shape[0] > 1:
            example_gradient_batch = {'inputs': grad_batch_data[0], 'labels': grad_batch_data[1]}
            print(f"Obtained gradient batch with BS={example_gradient_batch['inputs'].shape[0]}")
        else: print("Warning: First train batch has BS=1, cannot use for gradient importance.")
    except Exception as e: print(f"Could not get gradient batch: {e}")


     # --- Initial Training ---
    initial_model_pth_path = os.path.join(config['output_dir'], "lstm_energy_initial.pth") # Last epoch
    initial_best_model_pth_path = os.path.join(config['output_dir'], "lstm_energy_initial_best_val.pth") # Best validation
    initial_model_onnx_path = os.path.join(config['output_dir'], "lstm_energy_initial_best_val.onnx") # ONNX for the best initial

    if not os.path.exists(initial_best_model_pth_path):
        print("\n--- Initial Training ---")
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate_initial'])
        # Assuming train_model_regression saves the best model based on val_loader
        model, _, _ = train_model_regression(
            model=model, train_loader=train_loader, criterion=nn.MSELoss().to(device),
            optimizer=optimizer, device=device, num_epochs=config['train_epochs'],
            val_loader=val_loader,
            model_path_prefix=os.path.join(config['output_dir'], "lstm_energy_initial"), # prefix for _best_val.pth
            grad_clip=config.get('grad_clip', None)
        )
        # Save the *last* epoch model state as well
        torch.save(model.state_dict(), initial_model_pth_path)
        print(f"Last epoch of initial training saved to {initial_model_pth_path}")

        # Determine which model to use for ONNX (best validated if available, else last epoch)
        if os.path.exists(initial_best_model_pth_path):
            print(f"Best initial model saved during training: {initial_best_model_pth_path}")
            # Load the best model to ensure 'model' variable holds the best weights
            model.load_state_dict(torch.load(initial_best_model_pth_path, map_location=device))
            print(f"Loaded best initial model ({initial_best_model_pth_path}) for ONNX export and pruning base.")
            onnx_base_model_pth = initial_best_model_pth_path
        else:
            print(f"Warning: Best validation model not saved. Using last epoch model from {initial_model_pth_path} for ONNX.")
            # 'model' already holds the last epoch weights here
            onnx_base_model_pth = initial_model_pth_path # Should actually be model, already holding last epoch

        # ---> SAVE INITIAL MODEL AS ONNX <---
        print(f"\nSaving best initial model (from {os.path.basename(onnx_base_model_pth)}) as ONNX...")
        # Ensure 'model' currently holds the weights we want to export (i.e., the best initial)
        save_model_as_onnx(model, example_input_bs1, initial_model_onnx_path)
        # ----------------------------------

    else: # If initial_best_model_pth_path already exists
        print(f"\nLoading best initial model from {initial_best_model_pth_path}")
        model.load_state_dict(torch.load(initial_best_model_pth_path, map_location=device))
        # ---> SAVE INITIAL MODEL AS ONNX (if not already done, or overwrite if preferred) <---
        if not os.path.exists(initial_model_onnx_path): # Only save if it doesn't exist
            print(f"Saving loaded best initial model ({os.path.basename(initial_best_model_pth_path)}) as ONNX...")
            save_model_as_onnx(model, example_input_bs1, initial_model_onnx_path)
        else:
            print(f"ONNX for initial model already exists at {initial_model_onnx_path}. Skipping.")
        # ----------------------------------


     # --- Evaluate Initial Model ---
    results = {}
    print("\n--- Evaluating Initial Model ---")
    results['initial'] = evaluate_model_regression(model, test_loader, example_input_bs1, device, scalers)
    initial_macs = results['initial']['macs']
    initial_params = results['initial']['params']
    initial_size_mb = results['initial']['size_mb']
    print(f"Initial Model Performance: MACs={initial_macs:,.0f}, Params={initial_params:,.0f}, SizeMB={initial_size_mb:.2f}, MSE={results['initial']['mse']:.4f}")

    # --- Calculate Targets for Pruning ---
    # Priority to absolute targets if defined
    target_macs_from_sparsity = initial_macs * (1 - config['target_macs_sparsity'])
    target_macs_absolute_config = config.get('target_macs_absolute', None) # Get value or None
    target_macs_absolute = target_macs_absolute_config if target_macs_absolute_config is not None else float('inf') # Handle None
    target_macs_value = min(target_macs_from_sparsity, target_macs_absolute)

    if initial_macs <= target_macs_value:
        print(f"Initial MACs ({initial_macs:,.0f}) already meet/are below target MACs ({target_macs_value:,.0f}). Setting target slightly lower than initial for pruning attempt.")
        target_macs_value = initial_macs * 0.98

    # Target params from target size_mb
    target_size_mb_absolute_config = config.get('target_size_mb_absolute', None)
    if target_size_mb_absolute_config is not None:
        target_params_from_size_mb = int((target_size_mb_absolute_config * 1024 * 1024) / 4)
    else:
        target_params_from_size_mb = float('inf')

    target_params_from_sparsity = initial_params * (1 - config['target_params_sparsity'])

    # --- CORRECTED PART for target_params_absolute ---
    target_params_absolute_config = config.get('target_params_absolute', None) # Get config value (which is None) or default to None if key is missing
    target_params_absolute = target_params_absolute_config if target_params_absolute_config is not None else float('inf') # If it's None, use infinity for min comparison
    # ----------------------------------------------------

    # Choose the most restrictive params target among sparsity, size_mb, and absolute_params
    target_params_value = min(target_params_from_sparsity, target_params_from_size_mb, target_params_absolute)

    if initial_params <= target_params_value:
        print(f"Initial Params ({initial_params:,.0f}) already meet/are below target Params ({target_params_value:,.0f}). Setting target slightly lower.")
        target_params_value = initial_params * 0.98


    print(f"\nFinal Pruning Targets:")
    print(f"  Targeting MACs <= {target_macs_value:,.0f} (Initial: {initial_macs:,.0f})")
    if target_size_mb_absolute_config is not None: # Use the config value for printing
        print(f"  Targeting Size <= {target_size_mb_absolute_config:.2f} MB (Implies Params <= {target_params_from_size_mb:,.0f})")
    print(f"  Targeting Params <= {target_params_value:,.0f} (Initial: {initial_params:,.0f})")


    # --- Pruning and Fine-tuning Loop ---
    for strategy_name, strategy_config in config['strategies'].items():
        print(f"\n===== Processing Strategy: {strategy_name} =====")
        model_to_prune = TimeSeriesLSTM_WithBlocks(
             input_size=input_size,
             lstm_hidden_size=config['lstm_hidden_size'],
             num_lstm_layers=config['num_lstm_layers'],
             block_configs=config.get('block_output_features'),
             output_size=1,
             lstm_dropout_prob=config.get('lstm_dropout'),
             block_dropout_prob=config.get('block_dropout')
        ).to(device)
        # Load the *best initial* weights into the fresh model
        model_to_prune.load_state_dict(torch.load(initial_best_model_pth_path, map_location=device))
        model_to_prune.eval()

        # Gradient batch check
        needs_grad = isinstance(strategy_config['importance'], tp.importance.TaylorImportance)
        grad_batch = example_gradient_batch if needs_grad else None
        if needs_grad and not grad_batch:
            print(f"Skipping {strategy_name}: requires gradient batch, but none available.")
            continue




        # --- > SELECT PRUNABLE LAYERS FROM BLOCKS < ---
        # To target layers *inside* the blocks, you need to access them via intermediate_blocks
        # e.g., model_to_prune.intermediate_blocks[0].fc is the Linear layer of the first block
        prunable_layers_list = []
        if hasattr(model_to_prune, 'intermediate_blocks') and len(model_to_prune.intermediate_blocks) > 0:
            # Example: Prune the Linear layers within all intermediate blocks
            for i in range(len(model_to_prune.intermediate_blocks)):
                prunable_layers_list.append(model_to_prune.intermediate_blocks[i].fc)
        else:
            # Fallback if no blocks (or if you also want to prune a global fc after blocks, if any)
            # prunable_layers_list.append(model_to_prune.some_other_fc_if_exists)
            print("Warning: No intermediate blocks found to prune. Check model architecture and 'block_output_features' config.")


        # Check if any prunable layers were actually added
        if not prunable_layers_list:
            print(f"No prunable layers selected for strategy {strategy_name}. Skipping pruning for this strategy.")
            # Optionally evaluate the unpruned model here or mark as failed
            results[strategy_name] = results['initial'].copy() # Copy initial if not pruning
            results[strategy_name]['notes'] = "Pruning skipped: No prunable layers found/selected."
            continue


        print(f"Targeting for pruning: {[type(layer).__name__ + ' with out_features=' + str(layer.out_features) for layer in prunable_layers_list]}")



        # Perform Pruning
        try:
            pruned_model = prune_lstm_model_by_threshold(
                model=model_to_prune,
                example_input_bs1=example_input_bs1,
                target_macs=target_macs_value,
                target_params=target_params_value,
                strategy=strategy_config,
                max_iterations=config['pruning_max_iterations'],
                step_pruning_ratio=config['pruning_step_ratio'],
                gradient_batch=grad_batch,
                prunable_modules=prunable_layers_list  # Updated to include LSTM # Explicitly prune only FC
            )
            pruned_path = os.path.join(config['output_dir'], f"lstm_{strategy_name}_pruned.pth")
            torch.save(pruned_model.state_dict(), pruned_path)
        except Exception as e:
            print(f"\nERROR during pruning ({strategy_name}): {e}")
            import traceback; traceback.print_exc()
            results[strategy_name] = {'error': str(e)} # Mark failure
            continue # Skip to next strategy

        # Fine-tune
        print(f"\n--- Fine-tuning ({strategy_name}) ---")
        ft_prefix = os.path.join(config['output_dir'], f"lstm_{strategy_name}_ft")
        ft_best_path = ft_prefix + "_best_val.pth"
        optimizer_ft = optim.Adam(pruned_model.parameters(), lr=config['learning_rate_finetune'])
        fine_tuned_model, _, _ = train_model_regression(
            model=pruned_model, train_loader=train_loader, criterion=nn.MSELoss().to(device),
            optimizer=optimizer_ft, device=device, num_epochs=config['fine_tune_epochs'],
            val_loader=val_loader, model_path_prefix=ft_prefix, grad_clip=config['grad_clip']
        )

        # Load the best fine-tuned model
        if os.path.exists(ft_best_path):
            print(f"Loading best fine-tuned model from {ft_best_path}")
            fine_tuned_model.load_state_dict(torch.load(ft_best_path, map_location=device))
        else:
            print(f"Warning: Best fine-tuned model path not found ({ft_best_path}). Using last epoch.")

        # Evaluate Final
        print(f"\n--- Evaluating Fine-tuned Model ({strategy_name}) ---")
        results[strategy_name] = evaluate_model_regression(
            fine_tuned_model, test_loader, example_input_bs1, device, scalers
        )

        # Save final model state dict (.pth)
        final_path = os.path.join(config['output_dir'], f"lstm_{strategy_name}_final.pth")
        torch.save(fine_tuned_model.state_dict(), final_path)
        print(f"Final PyTorch model saved to {final_path}")

        # ---> ADD ONNX SAVING HERE <---
        onnx_path = final_path.replace('.pth', '.onnx')
        # Ensure the fine_tuned_model is used, and example_input_bs1 has the correct shape/device
        save_model_as_onnx(fine_tuned_model, example_input_bs1, onnx_path)

        # --- (Optional) Intermediate Comparison Plot ---
        compare_results_and_plot_regression(
            results, metric_key=config['pruning_primary_metric'],
            lower_is_better=True, # True for MSE/MAE
            output_dir=config['output_dir']
        )

    # --- Final Comparison ---
    print("\n===== Final Comparison =====")
    compare_results_and_plot_regression(
         results, metric_key=config['pruning_primary_metric'],
         lower_is_better=True, output_dir=config['output_dir']
     )
    print("\nWorkflow completed!")

### Run the main function

In [22]:
if __name__ == "__main__":
    main_lstm()

Using device: cuda
Loading and processing data...
Loading dataset from: ./data/energydata_complete.csv
Original data shape: (19735, 29)
Data shape after selecting columns & dropping NaNs: (19735, 26)
Data split: Train=14210, Val=1578, Test=3947
Data loaded successfully:
  Input size (features): 25
  Sequence length: 72
  Train sequences: 14138
  Validation sequences: 1506
  Test sequences: 3875

Model Architecture:
TimeSeriesLSTM_WithBlocks(
  (lstm): LSTM(25, 128, batch_first=True)
  (intermediate_blocks): ModuleList(
    (0): IntermediateBlock(
      (fc): Linear(in_features=128, out_features=64, bias=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.6, inplace=False)
    )
    (1): IntermediateBlock(
      (fc): Linear(in_features=64, out_features=64, bias=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.6, inplace=False)
    )
    (2): IntermediateBlock(
      (fc): Linear(in_features=64, out_features=42, bias=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.6, 

  result = _VF.lstm(
  result = _VF.lstm(


Epoch 1/60: Train Loss=0.009885, Time: 0.52s, Val Loss=0.007208 (Best model saved)
Epoch 2/60: Train Loss=0.009873, Time: 0.51s, Val Loss=0.007217
Epoch 3/60: Train Loss=0.009893, Time: 0.28s, Val Loss=0.007209
Epoch 4/60: Train Loss=0.009907, Time: 0.30s, Val Loss=0.007215
Epoch 5/60: Train Loss=0.009918, Time: 0.48s, Val Loss=0.007215
Epoch 6/60: Train Loss=0.009896, Time: 0.34s, Val Loss=0.007213
Epoch 7/60: Train Loss=0.009894, Time: 0.35s, Val Loss=0.007218
Epoch 8/60: Train Loss=0.009869, Time: 0.41s, Val Loss=0.007211
Epoch 9/60: Train Loss=0.009871, Time: 0.33s, Val Loss=0.007203 (Best model saved)
Epoch 10/60: Train Loss=0.009882, Time: 0.41s, Val Loss=0.007213
Epoch 11/60: Train Loss=0.009857, Time: 0.32s, Val Loss=0.007224
Epoch 12/60: Train Loss=0.009869, Time: 0.29s, Val Loss=0.007206
Epoch 13/60: Train Loss=0.009868, Time: 0.27s, Val Loss=0.007210
Epoch 14/60: Train Loss=0.009908, Time: 0.27s, Val Loss=0.007210
Epoch 15/60: Train Loss=0.009890, Time: 0.27s, Val Loss=0.007



Comparison plots saved to ./output/lstm_energy_7blocks

===== Processing Strategy: Magnitude_L2 =====
Targeting for pruning: ['Linear with out_features=64', 'Linear with out_features=64', 'Linear with out_features=42', 'Linear with out_features=42', 'Linear with out_features=32', 'Linear with out_features=32', 'Linear with out_features=25']
--- Starting Pruning (MagnitudeImportance) ---
Target MACs: <= 72,586,268, Target Params: <= 49,798
Step Ratio: 0.15, Max Iter: 100
Targeting specific modules for pruning: ['Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear']
Initial State | MACs: 145,172,536, Params: 99,595
Iter   1/100 | MACs: 145,167,712 (  +0.0% R) | Params: 94,819 (  +4.8% R)
Iter 2: No more candidates found by pruner. Stopping.
--- Finished Pruning (MagnitudeImportance) ---
Initial | MACs: 145,172,536, Params: 99,595
Final   | MACs: 145,167,712 (Reduction: 0.00%)
        | Params: 94,819 (Reduction: 4.80%)
Target  | MACs <= 72,586,268, Params <= 49,798

--- Fi

  result = _VF.lstm(


Epoch 1/60: Train Loss=0.009912, Time: 0.27s, Val Loss=0.007207 (Best model saved)
Epoch 2/60: Train Loss=0.009871, Time: 0.27s, Val Loss=0.007213
Epoch 3/60: Train Loss=0.009895, Time: 0.27s, Val Loss=0.007208
Epoch 4/60: Train Loss=0.009827, Time: 0.26s, Val Loss=0.007202 (Best model saved)
Epoch 5/60: Train Loss=0.009896, Time: 0.26s, Val Loss=0.007214
Epoch 6/60: Train Loss=0.009838, Time: 0.27s, Val Loss=0.007211
Epoch 7/60: Train Loss=0.009906, Time: 0.26s, Val Loss=0.007208
Epoch 8/60: Train Loss=0.009897, Time: 0.25s, Val Loss=0.007218
Epoch 9/60: Train Loss=0.009908, Time: 0.26s, Val Loss=0.007213
Epoch 10/60: Train Loss=0.009899, Time: 0.26s, Val Loss=0.007207
Epoch 11/60: Train Loss=0.009896, Time: 0.25s, Val Loss=0.007229
Epoch 12/60: Train Loss=0.009903, Time: 0.26s, Val Loss=0.007212
Epoch 13/60: Train Loss=0.009852, Time: 0.27s, Val Loss=0.007226
Epoch 14/60: Train Loss=0.009866, Time: 0.27s, Val Loss=0.007214
Epoch 15/60: Train Loss=0.009883, Time: 0.26s, Val Loss=0.007



Comparison plots saved to ./output/lstm_energy_7blocks

===== Processing Strategy: Random =====
Targeting for pruning: ['Linear with out_features=64', 'Linear with out_features=64', 'Linear with out_features=42', 'Linear with out_features=42', 'Linear with out_features=32', 'Linear with out_features=32', 'Linear with out_features=25']
--- Starting Pruning (RandomImportance) ---
Target MACs: <= 72,586,268, Target Params: <= 49,798
Step Ratio: 0.15, Max Iter: 100
Targeting specific modules for pruning: ['Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear']
Initial State | MACs: 145,172,536, Params: 99,595
Iter   1/100 | MACs: 145,167,712 (  +0.0% R) | Params: 94,819 (  +4.8% R)
Iter 2: No more candidates found by pruner. Stopping.
--- Finished Pruning (RandomImportance) ---
Initial | MACs: 145,172,536, Params: 99,595
Final   | MACs: 145,167,712 (Reduction: 0.00%)
        | Params: 94,819 (Reduction: 4.80%)
Target  | MACs <= 72,586,268, Params <= 49,798

--- Fine-tuning (R

  result = _VF.lstm(


Epoch 1/60: Train Loss=0.009874, Time: 0.30s, Val Loss=0.007206 (Best model saved)
Epoch 2/60: Train Loss=0.009847, Time: 0.29s, Val Loss=0.007212
Epoch 3/60: Train Loss=0.009909, Time: 0.27s, Val Loss=0.007215
Epoch 4/60: Train Loss=0.009906, Time: 0.27s, Val Loss=0.007213
Epoch 5/60: Train Loss=0.009878, Time: 0.26s, Val Loss=0.007206
Epoch 6/60: Train Loss=0.009911, Time: 0.27s, Val Loss=0.007212
Epoch 7/60: Train Loss=0.009866, Time: 0.25s, Val Loss=0.007200 (Best model saved)
Epoch 8/60: Train Loss=0.009885, Time: 0.26s, Val Loss=0.007207
Epoch 9/60: Train Loss=0.009911, Time: 0.26s, Val Loss=0.007213
Epoch 10/60: Train Loss=0.009878, Time: 0.26s, Val Loss=0.007221
Epoch 11/60: Train Loss=0.009908, Time: 0.26s, Val Loss=0.007213
Epoch 12/60: Train Loss=0.009869, Time: 0.26s, Val Loss=0.007220
Epoch 13/60: Train Loss=0.009872, Time: 0.27s, Val Loss=0.007206
Epoch 14/60: Train Loss=0.009908, Time: 0.26s, Val Loss=0.007218
Epoch 15/60: Train Loss=0.009887, Time: 0.25s, Val Loss=0.007



Comparison plots saved to ./output/lstm_energy_7blocks

===== Processing Strategy: Taylor =====
Targeting for pruning: ['Linear with out_features=64', 'Linear with out_features=64', 'Linear with out_features=42', 'Linear with out_features=42', 'Linear with out_features=32', 'Linear with out_features=32', 'Linear with out_features=25']
--- Starting Pruning (TaylorImportance) ---
Target MACs: <= 72,586,268, Target Params: <= 49,798
Step Ratio: 0.15, Max Iter: 100
Targeting specific modules for pruning: ['Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear']
Initial State | MACs: 145,172,536, Params: 99,595

Error during pruner.step() (Iter 1): 'NoneType' object has no attribute 'data'. Stopping.
--- Finished Pruning (TaylorImportance) ---
Initial | MACs: 145,172,536, Params: 99,595
Final   | MACs: 145,172,536 (Reduction: 0.00%)
        | Params: 99,595 (Reduction: 0.00%)
Target  | MACs <= 72,586,268, Params <= 49,798

--- Fine-tuning (Taylor) ---
Starting training for 60 e

  result = _VF.lstm(


Epoch 1/60: Train Loss=0.009830, Time: 0.29s, Val Loss=0.007199 (Best model saved)
Epoch 2/60: Train Loss=0.009896, Time: 0.29s, Val Loss=0.007216
Epoch 3/60: Train Loss=0.009896, Time: 0.26s, Val Loss=0.007211
Epoch 4/60: Train Loss=0.009904, Time: 0.28s, Val Loss=0.007216
Epoch 5/60: Train Loss=0.009880, Time: 0.27s, Val Loss=0.007213
Epoch 6/60: Train Loss=0.009900, Time: 0.27s, Val Loss=0.007216
Epoch 7/60: Train Loss=0.009857, Time: 0.27s, Val Loss=0.007209
Epoch 8/60: Train Loss=0.009902, Time: 0.27s, Val Loss=0.007205
Epoch 9/60: Train Loss=0.009884, Time: 0.27s, Val Loss=0.007206
Epoch 10/60: Train Loss=0.009911, Time: 0.27s, Val Loss=0.007209
Epoch 11/60: Train Loss=0.009900, Time: 0.27s, Val Loss=0.007218
Epoch 12/60: Train Loss=0.009876, Time: 0.27s, Val Loss=0.007204
Epoch 13/60: Train Loss=0.009908, Time: 0.27s, Val Loss=0.007203
Epoch 14/60: Train Loss=0.009866, Time: 0.27s, Val Loss=0.007207
Epoch 15/60: Train Loss=0.009905, Time: 0.27s, Val Loss=0.007203
Epoch 16/60: Tr



Comparison plots saved to ./output/lstm_energy_7blocks

===== Processing Strategy: FPGM =====
Targeting for pruning: ['Linear with out_features=64', 'Linear with out_features=64', 'Linear with out_features=42', 'Linear with out_features=42', 'Linear with out_features=32', 'Linear with out_features=32', 'Linear with out_features=25']
--- Starting Pruning (FPGMImportance) ---
Target MACs: <= 72,586,268, Target Params: <= 49,798
Step Ratio: 0.15, Max Iter: 100
Targeting specific modules for pruning: ['Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear', 'Linear']
Initial State | MACs: 145,172,536, Params: 99,595
Iter   1/100 | MACs: 145,167,712 (  +0.0% R) | Params: 94,819 (  +4.8% R)
Iter 2: No more candidates found by pruner. Stopping.
--- Finished Pruning (FPGMImportance) ---
Initial | MACs: 145,172,536, Params: 99,595
Final   | MACs: 145,167,712 (Reduction: 0.00%)
        | Params: 94,819 (Reduction: 4.80%)
Target  | MACs <= 72,586,268, Params <= 49,798

--- Fine-tuning (FPGM) -

  result = _VF.lstm(


Epoch 1/60: Train Loss=0.009871, Time: 0.28s, Val Loss=0.007206 (Best model saved)
Epoch 2/60: Train Loss=0.009878, Time: 0.28s, Val Loss=0.007217
Epoch 3/60: Train Loss=0.009893, Time: 0.28s, Val Loss=0.007214
Epoch 4/60: Train Loss=0.009853, Time: 0.27s, Val Loss=0.007229
Epoch 5/60: Train Loss=0.009837, Time: 0.27s, Val Loss=0.007210
Epoch 6/60: Train Loss=0.009918, Time: 0.27s, Val Loss=0.007225
Epoch 7/60: Train Loss=0.009899, Time: 0.27s, Val Loss=0.007201 (Best model saved)
Epoch 8/60: Train Loss=0.009868, Time: 0.27s, Val Loss=0.007216
Epoch 9/60: Train Loss=0.009892, Time: 0.27s, Val Loss=0.007205
Epoch 10/60: Train Loss=0.009895, Time: 0.27s, Val Loss=0.007208
Epoch 11/60: Train Loss=0.009902, Time: 0.28s, Val Loss=0.007210
Epoch 12/60: Train Loss=0.009881, Time: 0.26s, Val Loss=0.007213
Epoch 13/60: Train Loss=0.009886, Time: 0.27s, Val Loss=0.007215
Epoch 14/60: Train Loss=0.009877, Time: 0.26s, Val Loss=0.007212
Epoch 15/60: Train Loss=0.009878, Time: 0.26s, Val Loss=0.007



Comparison plots saved to ./output/lstm_energy_7blocks

===== Final Comparison =====

=== Pruning Strategy Comparison (Metric: MSE) ===
Strategy        | MACs (M)   | Params (K) | Size (MB)  | MSE         
---------------------------------------------------------------------
initial         | 145.17     | 99.6       |       0.40 |    7496.7686
Magnitude_L1    | 145.17     | 94.8       |       0.38 |    7227.3730
Magnitude_L2    | 145.17     | 94.8       |       0.38 |    7039.9312
Random          | 145.17     | 94.8       |       0.38 |    7508.2358
Taylor          | 145.17     | 99.6       |       0.40 |    7426.9209
FPGM            | 145.17     | 94.8       |       0.38 |    7443.5874
Comparison plots saved to ./output/lstm_energy_7blocks

Workflow completed!


In [18]:
import pandas as pd

# Path is defined in your code
dataset_path = './data/energydata_complete.csv'
df = pd.read_csv(dataset_path)

# Print the head of the original dataframe
print("Original dataframe shape:", df.shape)
print(df.head())

# If you want to see the target column statistics
print("\nTarget column summary statistics:")
print(df['Appliances'].describe())

# To see the feature columns
print("\nFeatures in the dataset:")
print(df.columns.tolist())

Original dataframe shape: (19735, 29)
                  date  Appliances  lights     T1       RH_1    T2       RH_2  \
0  2016-01-11 17:00:00          60      30  19.89  47.596667  19.2  44.790000   
1  2016-01-11 17:10:00          60      30  19.89  46.693333  19.2  44.722500   
2  2016-01-11 17:20:00          50      30  19.89  46.300000  19.2  44.626667   
3  2016-01-11 17:30:00          50      40  19.89  46.066667  19.2  44.590000   
4  2016-01-11 17:40:00          60      40  19.89  46.333333  19.2  44.530000   

      T3       RH_3         T4  ...         T9   RH_9     T_out  Press_mm_hg  \
0  19.79  44.730000  19.000000  ...  17.033333  45.53  6.600000        733.5   
1  19.79  44.790000  19.000000  ...  17.066667  45.56  6.483333        733.6   
2  19.79  44.933333  18.926667  ...  17.000000  45.50  6.366667        733.7   
3  19.79  45.000000  18.890000  ...  17.000000  45.40  6.250000        733.8   
4  19.79  45.000000  18.890000  ...  17.000000  45.40  6.133333        733.

In [19]:
inputs, targets = next(iter(train_loader))

print(f"\nBatch shape - Inputs: {inputs.shape}, Targets: {targets.shape}")
print(f"Input features sample (first sequence):\n{inputs[0]}")
print(f"Target values sample:\n{targets[:5]}")


Batch shape - Inputs: torch.Size([64, 72, 25]), Targets: torch.Size([64, 1])
Input features sample (first sequence):
tensor([[0.1429, 0.6156, 0.3790,  ..., 0.5833, 0.6000, 0.4898],
        [0.2857, 0.6156, 0.3738,  ..., 0.5714, 0.6000, 0.4889],
        [0.1429, 0.6156, 0.3715,  ..., 0.5714, 0.6000, 0.4833],
        ...,
        [0.0000, 0.6156, 0.3727,  ..., 0.7143, 0.6821, 0.7389],
        [0.0000, 0.6074, 0.3727,  ..., 0.6786, 0.5923, 0.7306],
        [0.0000, 0.6033, 0.3769,  ..., 0.6429, 0.5026, 0.7222]])
Target values sample:
tensor([[0.0374],
        [0.0374],
        [0.0374],
        [0.0654],
        [0.0654]])
