In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

## Simple neural network

In [2]:
class BC_Network(nn.Module):
    def __init__(self, input_size, output_size):
        super(BC_Network, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [3]:
# Example data: states (e.g., images or feature vectors) and expert actions
states = np.random.randn(1000, 10)  # 1000 states with 10 features
actions = np.random.randn(1000, 2)  # 1000 actions with 2 dimensions (e.g., 2D)

# Convert to PyTorch tensors
states = torch.tensor(states, dtype=torch.float32)
actions = torch.tensor(actions, dtype=torch.float32)

# Initialize model, loss function, and optimizer
model = BC_Network(input_size=10, output_size=2)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training loop

In [4]:
for epoch in range(1000):
    optimizer.zero_grad()
    outputs = model(states)
    loss = criterion(outputs, actions)
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# Now, the model is trained to clone the behavior of the expert.

Epoch 0, Loss: 1.029486060142517
Epoch 100, Loss: 0.7919188737869263
Epoch 200, Loss: 0.4418388605117798
Epoch 300, Loss: 0.2389066368341446
Epoch 400, Loss: 0.14609679579734802
Epoch 500, Loss: 0.09871659427881241
Epoch 600, Loss: 0.07145064324140549
Epoch 700, Loss: 0.05374564975500107
Epoch 800, Loss: 0.042260266840457916
Epoch 900, Loss: 0.03374309092760086


---