# Belavkin Optimizer Example

This notebook demonstrates how to use the Belavkin optimizer on a simple task.

In [None]:
import sys
sys.path.append('..')

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import numpy as np

from track1_optimizer import BelavkinOptimizer

## 1. Generate Synthetic Data

We'll create a simple binary classification task.

In [None]:
# Generate synthetic data
torch.manual_seed(42)
n_samples = 1000
n_features = 20

X = torch.randn(n_samples, n_features)
w_true = torch.randn(n_features, 1)
y = (X @ w_true + 0.1 * torch.randn(n_samples, 1) > 0).long().squeeze()

# Create dataset
dataset = TensorDataset(X, y)
train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

print(f"Dataset: {n_samples} samples, {n_features} features")
print(f"Class distribution: {y.sum().item()}/{len(y)} positive")

## 2. Define Model

In [None]:
class SimpleClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 2)
        )
    
    def forward(self, x):
        return self.network(x)

model = SimpleClassifier(n_features)
print(model)

## 3. Train with Different Optimizers

In [None]:
def train_model(optimizer, n_epochs=100):
    """Train model and return loss history."""
    criterion = nn.CrossEntropyLoss()
    losses = []
    
    for epoch in range(n_epochs):
        epoch_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        losses.append(epoch_loss / len(train_loader))
    
    return losses

### 3.1 Adam Baseline

In [None]:
model_adam = SimpleClassifier(n_features)
optimizer_adam = torch.optim.Adam(model_adam.parameters(), lr=1e-3)
losses_adam = train_model(optimizer_adam, n_epochs=100)
print(f"Adam - Final loss: {losses_adam[-1]:.4f}")

### 3.2 Belavkin Optimizer

In [None]:
model_belavkin = SimpleClassifier(n_features)
optimizer_belavkin = BelavkinOptimizer(
    model_belavkin.parameters(),
    lr=1e-3,
    gamma=1e-4,
    beta=1e-2,
    adaptive_gamma=True
)
losses_belavkin = train_model(optimizer_belavkin, n_epochs=100)
print(f"Belavkin - Final loss: {losses_belavkin[-1]:.4f}")

## 4. Visualize Results

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(losses_adam, label='Adam', linewidth=2)
plt.plot(losses_belavkin, label='Belavkin', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Optimizer Comparison')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 5. Hyperparameter Exploration

In [None]:
# Test different gamma values
gamma_values = [1e-5, 1e-4, 1e-3]
results = {}

for gamma in gamma_values:
    model_test = SimpleClassifier(n_features)
    optimizer_test = BelavkinOptimizer(
        model_test.parameters(),
        lr=1e-3,
        gamma=gamma,
        beta=1e-2
    )
    losses = train_model(optimizer_test, n_epochs=100)
    results[gamma] = losses
    print(f"Gamma={gamma:.0e}: Final loss={losses[-1]:.4f}")

In [None]:
# Plot results
plt.figure(figsize=(10, 6))
for gamma, losses in results.items():
    plt.plot(losses, label=f'γ={gamma:.0e}', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Belavkin Optimizer: Sensitivity to Damping Factor γ')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Conclusion

This notebook demonstrated:
1. How to use the Belavkin optimizer
2. Comparison with Adam baseline
3. Hyperparameter sensitivity analysis

For more examples, see:
- `experiments/run_track1_experiments.py` for synthetic tasks
- `docs/USAGE.md` for detailed documentation