# LPBF Optimizer Workflow

This notebook demonstrates the complete workflow of the LPBF Optimizer, from data generation to optimization.

## Setup

First, let's import the necessary modules and set up the environment.

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import h5py
import torch
import yaml
from pathlib import Path

# Add the src directory to the Python path
sys.path.append(os.path.abspath('../src'))

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Configure paths
config_path = '../data/params.yaml'
data_dir = Path('../data')
processed_dir = data_dir / 'processed'
models_dir = data_dir / 'models'
results_dir = data_dir / 'optimized'

# Create directories if they don't exist
processed_dir.mkdir(parents=True, exist_ok=True)
models_dir.mkdir(parents=True, exist_ok=True)
results_dir.mkdir(parents=True, exist_ok=True)

## 1. Generate Synthetic Data

First, let's generate synthetic data that mimics FEA simulation results.

In [None]:
from generate_synthetic_data import SyntheticDataGenerator

# Initialize data generator
generator = SyntheticDataGenerator(config_path)

# Generate synthetic dataset
# For demonstration, we use fewer scan vectors and points than a real application
dataset_path = generator.generate(n_scan_vectors=20, n_points_per_vector=100)

print(f"Generated synthetic dataset at: {dataset_path}")

## 2. Explore the Generated Data

Let's examine the synthetic dataset to understand its structure.

In [None]:
# Load the generated dataset
with h5py.File(dataset_path, 'r') as f:
    # Print dataset structure
    print("Dataset structure:")
    def print_structure(name, obj):
        if isinstance(obj, h5py.Dataset):
            print(f"  {name}: shape={obj.shape}, dtype={obj.dtype}")
    f.visititems(print_structure)
    
    # Load some data for visualization
    train_outputs = f['train/outputs'][:]
    param_names = [name.decode('utf-8') for name in f['metadata/parameter_names'][:]]
    train_scan_vectors = f['train/scan_vectors'][:]
    
    # Get metadata
    n_train = f['metadata'].attrs['n_train']
    n_val = f['metadata'].attrs['n_val']
    n_test = f['metadata'].attrs['n_test']
    
print(f"\nParameter names: {param_names}")
print(f"Dataset split: {n_train} training, {n_val} validation, {n_test} test samples")

Let's visualize some of the outputs from the synthetic data.

In [None]:
# Plot outputs from the first 100 samples
plt.figure(figsize=(15, 5))

# Plot residual stress
plt.subplot(1, 3, 1)
plt.hist(train_outputs[:100, 0], bins=20)
plt.title('Residual Stress Distribution')
plt.xlabel('Stress (MPa)')
plt.ylabel('Frequency')

# Plot porosity
plt.subplot(1, 3, 2)
plt.hist(train_outputs[:100, 1], bins=20)
plt.title('Porosity Distribution')
plt.xlabel('Porosity (%)')
plt.ylabel('Frequency')

# Plot geometric accuracy
plt.subplot(1, 3, 3)
plt.hist(train_outputs[:100, 2], bins=20)
plt.title('Geometric Accuracy Distribution')
plt.xlabel('Accuracy Ratio')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()

# Examine relationships between parameters and outputs
if len(param_names) >= 2:  # If we have at least laser power and scan speed
    plt.figure(figsize=(15, 5))
    
    # Power vs. Stress
    plt.subplot(1, 3, 1)
    plt.scatter(train_scan_vectors[:50, 0], train_outputs[:50, 0])
    plt.title(f'{param_names[0]} vs. Residual Stress')
    plt.xlabel(f'{param_names[0]}')
    plt.ylabel('Residual Stress (MPa)')
    
    # Speed vs. Porosity
    plt.subplot(1, 3, 2)
    plt.scatter(train_scan_vectors[:50, 1], train_outputs[:50, 1])
    plt.title(f'{param_names[1]} vs. Porosity')
    plt.xlabel(f'{param_names[1]}')
    plt.ylabel('Porosity (%)')
    
    # Power-to-Speed Ratio vs. Accuracy
    energy_density = train_scan_vectors[:50, 0] / train_scan_vectors[:50, 1]
    plt.subplot(1, 3, 3)
    plt.scatter(energy_density, train_outputs[:50, 2])
    plt.title('Energy Density vs. Accuracy')
    plt.xlabel('Energy Density (P/v)')
    plt.ylabel('Geometric Accuracy')
    
    plt.tight_layout()
    plt.show()

## 3. Train the PINN Model

Now let's train a Physics-Informed Neural Network using this synthetic data. We'll do a simplified training for demonstration purposes.

In [None]:
from pinn.model import PINN
from pinn.physics import compute_physics_loss
import torch.nn as nn
import torch.optim as optim

# Load configuration
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Load a small portion of the data for demonstration
with h5py.File(dataset_path, 'r') as f:
    # Load inputs and outputs
    train_inputs = torch.tensor(f['train/inputs'][:200], dtype=torch.float32)
    train_outputs = torch.tensor(f['train/outputs'][:200], dtype=torch.float32)
    train_coords = torch.tensor(f['train/coordinates'][:200], dtype=torch.float32)
    train_time = torch.tensor(f['train/time'][:200], dtype=torch.float32)
    train_scan_vectors = torch.tensor(f['train/scan_vectors'][:], dtype=torch.float32)
    
    # Get unique scan vectors (for demo, just use a few)
    unique_scan_vectors = train_scan_vectors[:5]
    
    # Find indices for these scan vectors in the inputs
    n_scan_params = unique_scan_vectors.shape[1]
    selected_indices = []
    for i in range(len(train_inputs)):
        input_sv = train_inputs[i, :n_scan_params]
        if any(torch.all(input_sv == sv) for sv in unique_scan_vectors):
            selected_indices.append(i)
        if len(selected_indices) >= 100:  # Limit to 100 samples for demo
            break
    
    # Select the data
    inputs = train_inputs[selected_indices]
    outputs = train_outputs[selected_indices]
    coords = train_coords[selected_indices]
    time = train_time[selected_indices]
    
# Create and train model
model_config = config['model']
model = PINN(
    in_dim=model_config['input_dim'],
    out_dim=model_config['output_dim'],
    width=model_config['hidden_width'],
    depth=model_config['hidden_depth']
)

# Define loss and optimizer
mse_loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Simple training loop (for demonstration)
n_epochs = 20  # Just a few epochs for demonstration
losses = []

print("Starting demo training loop...")
for epoch in range(n_epochs):
    # Forward pass
    optimizer.zero_grad()
    predictions = model(inputs)
    
    # Data loss
    data_loss = mse_loss(predictions, outputs)
    
    # Physics loss (simplified for demo)
    physics_loss = compute_physics_loss(
        model, 
        inputs[:, :n_scan_params], 
        coords, 
        time, 
        config['material_properties'],
        lambda_heat=config['training']['lambda_heat'],
        lambda_stress=config['training']['lambda_stress']
    )
    
    # Total loss
    total_loss = data_loss + physics_loss
    
    # Backward and optimize
    total_loss.backward()
    optimizer.step()
    
    losses.append(total_loss.item())
    
    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss.item():.4f}, "
              f"Data Loss: {data_loss.item():.4f}, Physics Loss: {physics_loss.item():.4f}")

# Save the model for later use
model_path = models_dir / 'demo_model.pt'
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': total_loss.item()
}, model_path)

print(f"Model saved to {model_path}")

# Plot loss curve
plt.figure(figsize=(10, 5))
plt.plot(losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.show()

## 4. Make Predictions with the Trained Model

Let's use our trained model to make some predictions.

In [None]:
# Generate some new scan vectors for prediction
with h5py.File(dataset_path, 'r') as f:
    param_bounds = {}
    param_names = [name.decode('utf-8') for name in f['metadata/parameter_names'][:]]
    for i, param in enumerate(param_names):
        # Extract bounds from existing data
        values = f['train/scan_vectors'][:, i]
        param_bounds[param] = [np.min(values), np.max(values)]

# Create a grid of test points for the first two parameters
if len(param_names) >= 2:
    param1 = param_names[0]  # e.g., Laser power
    param2 = param_names[1]  # e.g., Scan speed
    
    p1_range = np.linspace(param_bounds[param1][0], param_bounds[param1][1], 10)
    p2_range = np.linspace(param_bounds[param2][0], param_bounds[param2][1], 10)
    P1, P2 = np.meshgrid(p1_range, p2_range)
    
    # Create test scan vectors
    test_points = []
    for i in range(P1.shape[0]):
        for j in range(P1.shape[1]):
            point = [P1[i,j], P2[i,j]]
            # Add default values for other parameters
            for k in range(2, len(param_names)):
                point.append(np.mean(param_bounds[param_names[k]]))
            test_points.append(point)
    
    test_scan_vectors = torch.tensor(test_points, dtype=torch.float32)
    
    # Add dummy coordinates and time for prediction
    coords = torch.zeros(len(test_scan_vectors), 3)  # Origin point (0,0,0)
    time = torch.ones(len(test_scan_vectors), 1)     # Final time (t=1)
    
    # Combine inputs for the model
    test_inputs = torch.cat([test_scan_vectors, coords, time], dim=1)
    
    # Make predictions
    model.eval()
    with torch.no_grad():
        predictions = model(test_inputs)
    
    # Reshape for visualization
    stress = predictions[:, 0].reshape(P1.shape)
    porosity = predictions[:, 1].reshape(P1.shape)
    accuracy = predictions[:, 2].reshape(P1.shape)
    
    # Visualize the results
    plt.figure(figsize=(15, 5))
    
    # Plot residual stress
    plt.subplot(1, 3, 1)
    contour = plt.contourf(P1, P2, stress, cmap='hot')
    plt.colorbar(contour)
    plt.title('Residual Stress')
    plt.xlabel(param1)
    plt.ylabel(param2)
    
    # Plot porosity
    plt.subplot(1, 3, 2)
    contour = plt.contourf(P1, P2, porosity, cmap='viridis')
    plt.colorbar(contour)
    plt.title('Porosity')
    plt.xlabel(param1)
    plt.ylabel(param2)
    
    # Plot geometric accuracy
    plt.subplot(1, 3, 3)
    contour = plt.contourf(P1, P2, accuracy, cmap='Blues')
    plt.colorbar(contour)
    plt.title('Geometric Accuracy')
    plt.xlabel(param1)
    plt.ylabel(param2)
    
    plt.tight_layout()
    plt.show()

## 5. Multi-objective Optimization

Now let's use the NSGA-III algorithm to find optimal process parameters based on our trained surrogate model.

In [None]:
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga3 import NSGA3
from pymoo.util.ref_dirs import get_reference_directions
from pymoo.optimize import minimize
from pymoo.factory import get_sampling, get_crossover, get_mutation

class SurrogateProblem(Problem):
    """
    Multi-objective optimization problem using PINN surrogate
    """
    def __init__(self, model, param_bounds, objectives):
        # Extract parameter bounds
        self.n_var = len(param_bounds)
        self.param_names = list(param_bounds.keys())
        self.xl = np.array([param_bounds[p][0] for p in self.param_names])
        self.xu = np.array([param_bounds[p][1] for p in self.param_names])
        
        # Set objectives
        self.n_obj = len(objectives)
        self.objectives = objectives
        
        # The optimization is unconstrained
        self.n_constr = 0
        
        # Store the surrogate model
        self.model = model
        
        # Call parent constructor
        super().__init__(
            n_var=self.n_var,
            n_obj=self.n_obj,
            n_constr=self.n_constr,
            xl=self.xl,
            xu=self.xu
        )
    
    def _evaluate(self, x, out, *args, **kwargs):
        # Convert to tensor
        x_tensor = torch.tensor(x, dtype=torch.float32)
        
        # Add dummy spatial coordinates and time
        batch_size = x_tensor.shape[0]
        coords = torch.zeros(batch_size, 3)  # Origin point
        time = torch.ones(batch_size, 1)     # Final time step
        
        # Forward pass through the model
        with torch.no_grad():
            model_input = torch.cat([x_tensor, coords, time], dim=1)
            predictions = self.model(model_input)
        
        # Extract objective values
        objectives = predictions.numpy()
        
        # For minimization, we negate geometric accuracy (higher is better)
        objectives[:, 2] = -objectives[:, 2]  # Negate accuracy for minimization
        
        # Set the objective values for pymoo
        out["F"] = objectives

# Run a simple optimization
if len(param_names) >= 2:
    # Define the problem
    problem = SurrogateProblem(
        model=model,
        param_bounds=param_bounds,
        objectives=['residual_stress', 'porosity', 'geometric_accuracy']
    )
    
    # Get reference directions
    ref_dirs = get_reference_directions("das-dennis", 3, n_partitions=4)
    
    # Create the algorithm
    algorithm = NSGA3(
        pop_size=40,
        ref_dirs=ref_dirs,
        sampling=get_sampling("real_random"),
        crossover=get_crossover("real_sbx", prob=0.9, eta=15),
        mutation=get_mutation("real_pm", eta=20),
        eliminate_duplicates=True
    )
    
    # Run the optimization
    print("Running NSGA-III optimization (with limited generations for demo)...")
    res = minimize(
        problem=problem,
        algorithm=algorithm,
        termination=('n_gen', 20),  # Limited generations for demo
        seed=1,
        verbose=True
    )
    
    # Get the Pareto optimal solutions
    X = res.X  # Optimal process parameters
    F = res.F  # Corresponding objective values
    
    # Save results
    results_path = results_dir / 'demo_pareto_solutions.csv'
    import pandas as pd
    param_df = pd.DataFrame(X, columns=problem.param_names)
    obj_df = pd.DataFrame(F, columns=['residual_stress', 'porosity', '-geometric_accuracy'])
    result_df = pd.concat([param_df, obj_df], axis=1)
    result_df.to_csv(results_path, index=False)
    
    print(f"Optimization complete! Results saved to {results_path}")
    print(f"Found {len(X)} Pareto-optimal solutions")
    
    # Visualize Pareto front
    if F.shape[1] == 3:
        fig = plt.figure(figsize=(10, 8))
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(F[:, 0], F[:, 1], F[:, 2], s=30)
        ax.set_xlabel('Residual Stress')
        ax.set_ylabel('Porosity')
        ax.set_zlabel('-Geometric Accuracy')
        ax.set_title('Pareto Front')
        plt.show()

## 6. Conclusion

This notebook has demonstrated the complete workflow for the LPBF Optimizer:

1. Generating synthetic data that mimics FEA simulations
2. Exploring and visualizing the data
3. Training a physics-informed neural network (PINN)
4. Making predictions using the trained model
5. Running multi-objective optimization to find optimal process parameters

In a real application, you would:
- Generate or use much larger datasets
- Train the model for many more epochs
- Run the optimization with a larger population size and more generations
- Validate the results with actual LPBF builds

For more information, see the documentation in the README.md file.