In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple neural network with two layers
class ToyNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ToyNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x

# Define the SPSA gradient approximation
def spsa_gradient(network, loss_fn, input_data, true_output, epsilon=0.01):
    original_weight = network.layer1.weight.clone()
    
    perturbation = (torch.rand_like(original_weight) * 2 - 1) * epsilon
    positive_weight = original_weight + perturbation
    negative_weight = original_weight - perturbation
    
    network.layer1.weight.data = positive_weight
    loss_positive = loss_fn(network(input_data), true_output)
    
    network.layer1.weight.data = negative_weight
    loss_negative = loss_fn(network(input_data), true_output)
    
    gradient_estimate = (loss_positive - loss_negative) / (2 * epsilon)
    gradient = gradient_estimate * perturbation / epsilon  # Compute gradient using perturbation
    network.layer1.weight.data = original_weight  # Reset to original weights
    
    return gradient

# Create the model, loss function, and optimizer
model = ToyNN(input_size=5, hidden_size=10, output_size=1)
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Dummy data
input_data = torch.rand((1, 5))
true_output = torch.tensor([[0.5]])

# Forward pass
output = model(input_data)
loss = loss_function(output, true_output)

# Compute gradient for layer1 using SPSA
layer1_gradient = spsa_gradient(model, loss_function, input_data, true_output)
model.layer1.weight.grad = layer1_gradient

# Backward pass for layer2
loss.backward(retain_graph=True)  # This will compute gradient for layer2 using backpropagation

# Update weights
optimizer.step()
