# FUSED Framework: Hyperparameter Optimization Example

This notebook demonstrates how to use the hyperparameter optimization utilities in the FUSED framework to automatically find the best configuration for your models.

## Setup

First, let's import the necessary libraries and setup our environment:

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader

# Import FUSED utilities
from fused.utils.hyperparameter_optimization import HyperparameterTuner, OptunaOptimizer
from fused.models import SequentialEncoder, TemporalFusionModel
from fused.utils.experiment_tracking import ExperimentTracker

# Enable interactive plotting
%matplotlib inline

## Generate Synthetic Data

Let's create some synthetic multimodal time series data for our example:

In [None]:
def generate_synthetic_data(n_samples=1000, seq_length=50, n_features=10, n_modalities=2):
    """Generate synthetic multimodal time series data."""
    # Create modalities
    modalities = {}
    
    for i in range(n_modalities):
        # Generate random time series
        X = torch.randn(n_samples, seq_length, n_features)
        
        # Add some patterns
        freq = 0.1 * (i + 1)
        t = torch.linspace(0, 1, seq_length).unsqueeze(0).unsqueeze(-1)
        sin_pattern = torch.sin(2 * np.pi * freq * t) * 2.0
        cos_pattern = torch.cos(2 * np.pi * freq * t) * 2.0
        
        # Add patterns to first few features
        X[:, :, 0:1] += sin_pattern
        X[:, :, 1:2] += cos_pattern
        
        modalities[f"modality_{i}"] = X
    
    # Generate labels based on the patterns
    y = torch.zeros(n_samples)
    sin_sum = torch.sum(modalities["modality_0"][:, :, 0], dim=1)
    cos_sum = torch.sum(modalities["modality_1"][:, :, 1], dim=1) if n_modalities > 1 else 0
    
    # Class is determined by the sum of sin and cos patterns
    y = (sin_sum + cos_sum > 0).float()
    
    return modalities, y

# Generate data
modalities, labels = generate_synthetic_data()

# Split into train and validation sets
train_size = int(0.8 * len(labels))
train_indices = torch.randperm(len(labels))[:train_size]
val_indices = torch.randperm(len(labels))[train_size:]

train_modalities = {k: v[train_indices] for k, v in modalities.items()}
train_labels = labels[train_indices]

val_modalities = {k: v[val_indices] for k, v in modalities.items()}
val_labels = labels[val_indices]

# Create datasets
def create_dataset(modalities, labels):
    # For simplicity, we'll combine all modalities and labels in a dictionary
    data_dict = {**modalities, "labels": labels}
    return data_dict

train_dataset = create_dataset(train_modalities, train_labels)
val_dataset = create_dataset(val_modalities, val_labels)

print(f"Train dataset: {len(train_labels)} samples")
print(f"Validation dataset: {len(val_labels)} samples")

# Let's visualize a sample
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(modalities["modality_0"][0, :, 0].numpy())
plt.title(f"Modality 0, Feature 0 (Label: {labels[0].item()})")

plt.subplot(1, 2, 2)
plt.plot(modalities["modality_1"][0, :, 1].numpy())
plt.title(f"Modality 1, Feature 1 (Label: {labels[0].item()})")
plt.tight_layout()

## Define a FUSED Model

Now, let's define a simple FUSED model with configurable hyperparameters that we'll optimize:

In [None]:
class MultimodalClassifier(nn.Module):
    """A simple multimodal classifier using FUSED components."""
    
    def __init__(self, config):
        super().__init__()
        self.config = config
        
        # Extract configuration parameters
        hidden_dim = config.get("hidden_dim", 64)
        num_layers = config.get("num_layers", 2)
        dropout_rate = config.get("dropout_rate", 0.1)
        bidirectional = config.get("bidirectional", True)
        fusion_type = config.get("fusion_type", "attention")
        
        # Create encoders for each modality
        self.encoders = nn.ModuleDict()
        for modality in ["modality_0", "modality_1"]:
            self.encoders[modality] = SequentialEncoder(
                input_dim=10,
                hidden_dim=hidden_dim,
                output_dim=hidden_dim,
                num_layers=num_layers,
                bidirectional=bidirectional,
                dropout=dropout_rate,
                encoder_type="lstm"
            )
        
        # Create fusion model
        self.fusion = TemporalFusionModel(
            input_dim=hidden_dim,
            hidden_dim=hidden_dim,
            output_dim=hidden_dim,
            num_modalities=2,
            fusion_type=fusion_type,
            dropout=dropout_rate
        )
        
        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)
        
    def forward(self, inputs):
        # Encode each modality
        encoded = {}
        for modality, encoder in self.encoders.items():
            encoded[modality] = encoder(inputs[modality])
        
        # Fuse representations
        fused = self.fusion(encoded)
        
        # Classify
        logits = self.classifier(fused)
        
        return {"logits": logits.squeeze(-1)}
    
    def fit(self, train_data, validation_data=None, epochs=10, lr=0.001, batch_size=32):
        """Train the model."""
        device = next(self.parameters()).device
        optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        criterion = nn.BCEWithLogitsLoss()
        
        # Training loop
        for epoch in range(epochs):
            self.train()
            epoch_loss = 0.0
            
            # Process data in batches
            for i in range(0, len(train_data["labels"]), batch_size):
                batch_end = min(i + batch_size, len(train_data["labels"]))
                
                # Prepare batch
                batch = {}
                for k, v in train_data.items():
                    if k == "labels":
                        batch[k] = v[i:batch_end].to(device)
                    else:
                        batch[k] = v[i:batch_end].to(device)
                
                # Forward pass
                optimizer.zero_grad()
                outputs = self(batch)
                loss = criterion(outputs["logits"], batch["labels"])
                
                # Backward pass
                loss.backward()
                optimizer.step()
                
                epoch_loss += loss.item() * (batch_end - i)
            
            epoch_loss /= len(train_data["labels"])
            
            # Validation
            if validation_data is not None:
                self.eval()
                val_loss = 0.0
                val_correct = 0
                
                with torch.no_grad():
                    for i in range(0, len(validation_data["labels"]), batch_size):
                        batch_end = min(i + batch_size, len(validation_data["labels"]))
                        
                        # Prepare batch
                        batch = {}
                        for k, v in validation_data.items():
                            if k == "labels":
                                batch[k] = v[i:batch_end].to(device)
                            else:
                                batch[k] = v[i:batch_end].to(device)
                        
                        # Forward pass
                        outputs = self(batch)
                        loss = criterion(outputs["logits"], batch["labels"])
                        
                        val_loss += loss.item() * (batch_end - i)
                        
                        # Calculate accuracy
                        preds = (outputs["logits"] > 0).float()
                        val_correct += (preds == batch["labels"]).sum().item()
                
                val_loss /= len(validation_data["labels"])
                val_acc = val_correct / len(validation_data["labels"])
                
                print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f} - Val Loss: {val_loss:.4f} - Val Acc: {val_acc:.4f}")
                return {"val_loss": val_loss, "val_acc": val_acc}
            else:
                print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f}")
                return {"loss": epoch_loss}

# Test the model
test_config = {
    "hidden_dim": 64,
    "num_layers": 2,
    "dropout_rate": 0.1,
    "bidirectional": True,
    "fusion_type": "attention"
}

model = MultimodalClassifier(test_config)
print(model)

## Define Search Space

Now, let's define the hyperparameter search space for our optimizer:

In [None]:
search_space = {
    "hidden_dim": {
        "type": "int",
        "low": 32,
        "high": 128,
        "step": 16
    },
    "num_layers": {
        "type": "int",
        "low": 1,
        "high": 3
    },
    "dropout_rate": {
        "type": "float",
        "low": 0.0,
        "high": 0.5
    },
    "bidirectional": {
        "type": "categorical",
        "choices": [True, False]
    },
    "fusion_type": {
        "type": "categorical",
        "choices": ["attention", "concat", "mean"]
    }
}

## Run Hyperparameter Optimization

Now, let's use the `HyperparameterTuner` to find the optimal configuration:

In [None]:
# Configure the optimizer
optimizer_config = {
    "direction": "minimize",   # Minimize validation loss
    "n_trials": 20,            # Number of trials to run
    "study_name": "multimodal_classifier",
    "storage": "sqlite:///study.db"  # Save results to a database
}

# Create tuner
tuner = HyperparameterTuner(
    optimizer_type="optuna",
    optimizer_config=optimizer_config,
    cv_folds=3,  # Use 3-fold cross-validation
)

# Optional: Create an experiment tracker
tracker = ExperimentTracker(
    experiment_name="hyperparameter_optimization",
    tracking_uri=".",  # Local tracking
    experiment_tags={"task": "classification", "data": "synthetic"}
)

# Run hyperparameter optimization
best_params, best_model = tuner.tune(
    model_class=MultimodalClassifier,
    dataset=train_dataset,
    validation_dataset=val_dataset,
    search_space=search_space,
    eval_metric="val_loss",
    direction="minimize",
    epochs=5,
    batch_size=32,
    lr=0.001,
    experiment_tracker=tracker,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

## Analyze Results

Let's analyze the optimization results and look at the best parameters:

In [None]:
print("Best hyperparameters:")
for param, value in best_params.items():
    print(f"  {param}: {value}")

# If we used Optuna, we can visualize the optimization history
if isinstance(tuner.optimizer, OptunaOptimizer) and tuner.optimizer.study is not None:
    # Import optuna visualization
    import optuna.visualization as vis
    
    # Plot optimization history
    fig = vis.plot_optimization_history(tuner.optimizer.study)
    fig.show()
    
    # Plot parameter importances
    fig = vis.plot_param_importances(tuner.optimizer.study)
    fig.show()
    
    # Plot slice of parameter space
    fig = vis.plot_slice(tuner.optimizer.study)
    fig.show()

## Test the Best Model

Now, let's evaluate the performance of the best model on the validation set:

In [None]:
# Move to CPU for evaluation
best_model = best_model.cpu()
best_model.eval()

# Prepare validation data
batch_size = 32
all_preds = []
all_labels = []

with torch.no_grad():
    for i in range(0, len(val_dataset["labels"]), batch_size):
        batch_end = min(i + batch_size, len(val_dataset["labels"]))
        
        # Prepare batch
        batch = {}
        for k, v in val_dataset.items():
            if k == "labels":
                batch[k] = v[i:batch_end]
            else:
                batch[k] = v[i:batch_end]
        
        # Get predictions
        outputs = best_model(batch)
        preds = (outputs["logits"] > 0).float()
        
        all_preds.append(preds)
        all_labels.append(batch["labels"])

# Concatenate results
all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

# Calculate metrics
accuracy = (all_preds == all_labels).float().mean().item()
print(f"Validation Accuracy: {accuracy:.4f}")

# Create confusion matrix
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# Print classification report
report = classification_report(all_labels, all_preds)
print("Classification Report:")
print(report)

## Save the Best Model

Finally, let's save the best model configuration and weights:

In [None]:
# Save model configuration and weights
from fused.utils.serving import ModelExporter

# Create exporter
exporter = ModelExporter(best_model, save_dir="./saved_models")

# Export model
torch_path = exporter.export_pytorch(filename="best_multimodal_model.pt")
config_path = exporter.export_config(filename="best_multimodal_config.json")

print(f"Model saved to {torch_path}")
print(f"Config saved to {config_path}")

## Conclusion

In this notebook, we've demonstrated how to use the hyperparameter optimization utilities in the FUSED framework to automatically find the best configuration for a multimodal time series model. We've covered:

1. Creating a synthetic multimodal dataset
2. Defining a model using FUSED components
3. Specifying a hyperparameter search space
4. Running the optimization process
5. Analyzing and visualizing the results
6. Evaluating and saving the best model

The `HyperparameterTuner` class makes it easy to integrate with different optimization backends like Optuna and Ray Tune, enabling efficient hyperparameter search for complex models.