# Train behavior cloning

Train a behavior cloning based robot controller. 
* Code for loading and pre-processing the training data, typically from a set of demonstrations as specified in an exp/run
* Train the behavior cloning controller. This notebook should be able to run different kind of controllers such as MLP, LSTM, LSTM+MDN, Transformer etc. 
__To be done as of June 1, 2025__
* The trained controllers should be saved into the exp/run

In [None]:
import sys
sys.path.append("..")

from exp_run_config import Config
Config.PROJECTNAME = "BerryPicker"

import pathlib
from tqdm import tqdm
import pprint

import torch
import torch.nn as nn
import torch.optim as optim

torch.manual_seed(1)

from bc_trainingdata import create_bc_training_and_validation
from bc_LSTM import LSTMXYPredictor, LSTMResidualController

from tensorboardX import SummaryWriter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

***ExpRun**: Loading pointer config file:
	C:\Users\lboloni\.config\BerryPicker\mainsettings.yaml
***ExpRun**: Loading machine-specific config file:
	G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\settings-LotziYoga.yaml
Using device: cuda


In [None]:
experiment = "behavior_cloning"
# run = "bc_mlp_00"
run = "lstm_00"
exp = Config().get_experiment(experiment, run)
pprint.pprint(exp)
spexp = Config().get_experiment(exp["sp_experiment"], exp["sp_run"])

***ExpRun**: No system dependent experiment file
	 G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\behavior_cloning\lstm_00_sysdep.yaml,
	 that is ok, proceeding.
***ExpRun**: Configuration for exp/run: behavior_cloning/lstm_00 successfully loaded
Experiment:
    control_size: 6
    controller: LSTMXYPredictor
    controller_file: controller.pth
    controller_hidden_size: 32
    controller_num_layers: 2
    data_dir: c:\Users\lboloni\Documents\Code\_TempData\BerryPicker-experiments\behavior_cloning\lstm_00
    epochs: 10
    exp_run_sys_indep_file: C:\Users\lboloni\Documents\Code\_Checkouts\BerryPicker\src\experiment_configs\behavior_cloning\lstm_00.yaml
    experiment_name: behavior_cloning
    run_name: lstm_00
    sequence_lenght: 10
    sp_experiment: sensorprocessing_conv_vae
    sp_run: sp_vae_256
    subrun_name: null
    time_started: '2025-06-02 18:26:10.292469'
    training_data:
    - - random-both-cameras
      - '2025_03_08__14

### Training an RNN model
Functions for training an RNN type model. These models assume that the input is a sequence $[z_{t-k},...z_{t}]$ while the output is the next action $a_{t+1}$

In [1]:
def validate_bc_rnn(model, criterion, data):
    """Calculates the average validation error for the behavior cloning model using an RNN with the specific criterion function. Uses the z_validation an a_validation fields in "data". The inputs and the targets a list of individual input and target. 
    CHECK: I think that the target is supposed to be the last output of the RNN when the whole input string had been passed through it. 
    The model is reset before each of the strings (i.e. state is not transferred)
    model: an LSTM or similar model that can consume a sequence of inputs
    criterion: any function that calculates the distance between the targets
    """
    num_sequences = data["z_validation"].shape[0]
    model.eval()
    val_loss = 0
    with torch.no_grad():  # Disable gradient computation
        for i in range(num_sequences):
            # Forward pass
            input_seq = data["z_validation"][i]
            target = data["a_validation"][i]
            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]
            outputs = model(input_seq)
            loss = criterion(outputs, target)
            # Accumulate loss
            val_loss += loss.item()
    avg_loss = val_loss / num_sequences
    return avg_loss


In [2]:

def train_bc_rnn(model, optimizer, criterion, data, num_epochs, writer = None):
    """Train a behavior cloning model of the LSTM class.
    Uses a writer
    """
    num_sequences = data["z_train"].shape[0]

    for epoch in tqdm(range(num_epochs)):
        model.train()
        
        # Loop over each sequence in the batch
        training_loss = 0
        for i in range(num_sequences):
            # Prepare input and target
            input_seq = data["z_train"][i]
            target = data["a_train"][i]

            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]

            # Forward pass
            output = model(input_seq)
            loss = criterion(output, target)
            training_loss += loss.item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        avg_training_loss = training_loss / num_sequences
        avg_validation_loss = validate_bc_rnn(model, criterion, data)
        if writer is not None:
            writer.add_scalar("TrainingLoss", avg_training_loss, epoch)
            writer.add_scalar("ValidationLoss", avg_validation_loss, epoch)
            writer.flush()
        if (epoch+1) % 2 == 0: # was 0
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_training_loss:.4f} Validation Loss: {avg_validation_loss:.4f} ')


# Train the LSTMXYPredictor model 

Trains the single layer LSTM model LSTMXYPredictor. This is a baseline LSTM model. 

Training notes:
* On the proprioception experiments, this reaches the performance:
    Epoch [20/100], Training Loss: 0.0079 Validation Loss: 0.0080
* No further improvement is observed from there. 

In [None]:
if exp["controller"] != "LSTMXYPredictor":
    raise Exception(f"The controller specified {exp['controller']} is not LSTMXYPredictor")

latent_size = spexp["latent_size"]
output_size = exp["control_size"]  # degrees of freedom in the robot
num_layers = exp["controller_num_layers"]
hidden_size = exp["controller_hidden_size"] 

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)

exp.start_timer("data_preparation")
inputs_training, targets_training, inputs_validation, targets_validation = create_bc_training_and_validation(exp, spexp, device)
exp.end_timer("data_preparation")

criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = exp["epochs"]

# Create a SummaryWriter instance
# where does the logdir go???
writer = SummaryWriter(logdir="/home/lboloni/runs/example")
exp.start_timer("train")
train_bc_rnn(
    model, optimizer, criterion, data=data,
    num_epochs=num_epochs, writer=writer)
writer.close()
print("Training complete.")
exp.end_timer("train")
controller_path = pathlib.Path(exp.data_dir(), exp["controller_file"])
torch.save(model.state_dict(), controller_path)


  0%|          | 0/10 [00:00<?, ?it/s]

 20%|██        | 2/10 [00:06<00:24,  3.06s/it]

Epoch [2/10], Training Loss: 2.2141 Validation Loss: 1.8117 


 40%|████      | 4/10 [00:20<00:34,  5.72s/it]

Epoch [4/10], Training Loss: 1.2378 Validation Loss: 1.1010 


 60%|██████    | 6/10 [00:33<00:25,  6.36s/it]

Epoch [6/10], Training Loss: 0.8218 Validation Loss: 0.8380 


 80%|████████  | 8/10 [00:49<00:14,  7.25s/it]

Epoch [8/10], Training Loss: 0.5207 Validation Loss: 0.6388 


100%|██████████| 10/10 [01:05<00:00,  6.57s/it]

Epoch [10/10], Training Loss: 0.4163 Validation Loss: 0.4760 
Training complete.



