# Train behavior cloning

Train a behavior cloning based robot controller. 
* Code for loading and pre-processing the training data, typically from a set of demonstrations as specified in an exp/run
* Train the behavior cloning controller. This notebook should be able to run different kind of controllers such as MLP, LSTM, LSTM+MDN, Transformer etc. 
__To be done as of June 1, 2025__
* The trained controllers should be saved into the exp/run

In [1]:
import sys
sys.path.append("..")

from exp_run_config import Config
Config.PROJECTNAME = "BerryPicker"

import pathlib
from tqdm import tqdm
import numpy as np
import pprint
#import matplotlib.pyplot as plt

import torch
import torch.nn as nn
#import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)
from sensorprocessing.sp_helper import get_transform_to_sp
from sensorprocessing.sp_factory import create_sp
from demonstration.demonstration import Demonstration

from bc_LSTM import LSTMXYPredictor, LSTMResidualController
from robot.al5d_position_controller import RobotPosition

from tensorboardX import SummaryWriter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

***ExpRun**: Loading pointer config file:
	C:\Users\lboloni\.config\BerryPicker\mainsettings.yaml
***ExpRun**: Loading machine-specific config file:
	G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\settings-LotziYoga.yaml
Using device: cuda


In [2]:
experiment = "behavior_cloning"
# run = "bc_mlp_00"
run = "lstm_00"

exp = Config().get_experiment(experiment, run)
pprint.pprint(exp)

# Create the sp object described in the experiment
spexp = Config().get_experiment(exp["sp_experiment"], exp["sp_run"])
sp = create_sp(spexp, device)


***ExpRun**: No system dependent experiment file
	 G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\behavior_cloning\lstm_00_sysdep.yaml,
	 that is ok, proceeding.
***ExpRun**: Configuration for exp/run: behavior_cloning/lstm_00 successfully loaded
Experiment:
    control_size: 6
    controller: LSTMXYPredictor
    controller_file: controller.pth
    controller_hidden_size: 32
    controller_num_layers: 2
    data_dir: c:\Users\lboloni\Documents\Code\_TempData\BerryPicker-experiments\behavior_cloning\lstm_00
    epochs: 10
    exp_run_sys_indep_file: C:\Users\lboloni\Documents\Code\_Checkouts\BerryPicker\src\experiment_configs\behavior_cloning\lstm_00.yaml
    experiment_name: behavior_cloning
    run_name: lstm_00
    sequence_lenght: 10
    sp_experiment: sensorprocessing_conv_vae
    sp_run: sp_vae_256
    subrun_name: null
    time_started: '2025-06-02 18:26:10.292469'
    training_data:
    - - random-both-cameras
      - '2025_03_08__14

### Creating training and validation data
Create training and validation data from all the demonstrations of a certain task.

In [3]:
def create_RNN_training_sequence_xy(x_seq, y_seq, sequence_length):
    """Create supervised training data for RNNs such as LSTM from two sequences. In this data, from a string of length sequence_length in x_seq we are predicting the next item in y_seq.
    x_seq and y_seq are tensors
    Returns the results as tensors
    """
    # Prepare training data
    total_length = x_seq.shape[0]
    #total_length = len(x_seq)
    #assert total_length == len(y_seq)
    inputs = []
    targets = []
    for i in range(total_length - sequence_length):
        # Input is a subsequence of length `sequence_length`
        input_seq = x_seq[i:i + sequence_length]
        # Shape: [sequence_length, latent_size]

        # Target is the next vector after the input sequence
        target = y_seq[i + sequence_length]
        # Shape: [output_size]

        # Append to lists
        inputs.append(torch.tensor(input_seq))
        targets.append(torch.tensor(target))

    # Convert lists to tensors for training
    inputs = torch.stack(inputs)   # Shape: [num_samples, sequence_length, latent_size]
    targets = torch.stack(targets) # Shape: [num_samples, latent_size]
    return inputs, targets

In [None]:
def create_bc_training_and_validation(exp, spexp, sp):
    """Creates training data for training and validation with the demonstrations specified in the exp/run. Caches the results into the input and target files specified in the exp/run. Remove those files to recalculate."""
    retval = {}
    input_path = pathlib.Path(exp.data_dir(), "training_input.pth")
    target_path = pathlib.Path(exp.data_dir(), "training_target.pth")

    if input_path.exists():
        inputs = torch.load(input_path, weights_only=True)
        targets = torch.load(target_path, weights_only=True)
    else:
        all_demos_inputs = []
        all_demos_targets = []
        transform = get_transform_to_sp(spexp)
        for val in exp["training_data"]: # for all demonstrations
            run, demo_name, camera = val
            exp_demo = Config().get_experiment("demonstration", run)
            demo = Demonstration(exp_demo, demo_name)
            # read the a and z 
            inputlist = []
            targetlist = []
            for i in range(demo.metadata["maxsteps"]-1): # -1 because of lookahead
                sensor_readings, _ = demo.get_image(i, device=device, transform=transform, camera=camera)                
                # inputlist.append(sensor_readings[0])
                z = sp.process(sensor_readings)
                inputlist.append(torch.from_numpy(z))
                # the action we are choosing, is the next one
                a = demo.get_action(i+1)
                rp = RobotPosition.from_vector(a)
                anorm = rp.to_normalized_vector()        
                targetlist.append(torch.from_numpy(anorm))
            inputlist_tensor = torch.stack(inputlist)
            targetlist_tensor = torch.stack(targetlist)
            inputs, targets = create_RNN_training_sequence_xy(inputlist_tensor, targetlist_tensor, sequence_length=exp["sequence_lenght"])
            all_demos_inputs.append(inputs)
            all_demos_targets.append(targets)
        inputs = torch.cat(all_demos_inputs)
        targets = torch.cat(all_demos_targets)
        torch.save(inputs, input_path)
        torch.save(targets, target_path)


    # Separate the training and validation data. 
    # We will be shuffling the demonstrations 
    # rows = torch.randperm(inputs.size(0)) 
    rows = torch.randperm(inputs.shape[0]) 
    shuffled_inputs = inputs[rows]
    shuffled_targets = targets[rows]

    training_size = int( inputs.shape[0] * 0.67 )
    inputs_training = shuffled_inputs[1:training_size]
    targets_training = shuffled_targets[1:training_size]

    inputs_validation = shuffled_inputs[training_size:]
    targets_validation = shuffled_targets[training_size:] 
    return inputs_training, targets_training, inputs_validation, targets_validation

In [5]:
def validate_bc_rnn(model, criterion, inputs_validation, targets_validation):
    """Calculates the average validation error for the behavior cloning model using an RNN with the specific criterion function.     
    The inputs and the targets a list of individual input and target. An input is a string of specific length. The target is a single value. 
    CHECK: I think that the target is supposed to be the last output of the RNN when the whole input string had been passed through it. 
    The model is reset before each of the strings (i.e. state is not transferred)
    model: an LSTM or similar model that can consume a sequence of inputs
    criterion: any function that calculates the distance between the targets
    """
    num_sequences = inputs_validation.shape[0]
    model.eval()
    val_loss = 0
    with torch.no_grad():  # Disable gradient computation
        for i in range(num_sequences):
            # Forward pass
            input_seq = inputs_validation[i]
            target = targets_validation[i]
            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]
            outputs = model(input_seq)
            loss = criterion(outputs, target)
            # Accumulate loss
            val_loss += loss.item()
    avg_loss = val_loss / num_sequences
    return avg_loss


In [6]:

def train_behavior_cloning(model, optimizer, criterion, inputs_training, targets_training, inputs_validation, targets_validation, num_epochs, writer = None):
    """Train a behavior cloning model of the LSTM class."""
    num_sequences = inputs_training.shape[0]

    for epoch in tqdm(range(num_epochs)):
        model.train()
        
        # Loop over each sequence in the batch
        training_loss = 0
        for i in range(num_sequences):
            # Prepare input and target
            input_seq = inputs_training[i]
            target = targets_training[i]

            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]

            # Forward pass
            output = model(input_seq)
            loss = criterion(output, target)
            training_loss += loss.item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        avg_training_loss = training_loss / num_sequences
        avg_validation_loss = validate_bc_rnn(model, criterion, inputs_validation=inputs_validation, targets_validation=targets_validation)
        if writer is not None:
            writer.add_scalar("TrainingLoss", avg_training_loss, epoch)
            writer.add_scalar("ValidationLoss", avg_validation_loss, epoch)
            writer.flush()
        if (epoch+1) % 2 == 0: # was 0
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_training_loss:.4f} Validation Loss: {avg_validation_loss:.4f} ')


# Train the LSTMXYPredictor model 

Trains the single layer LSTM model LSTMXYPredictor. This is a baseline LSTM model. 

Training notes:
* On the proprioception experiments, this reaches the performance:
    Epoch [20/100], Training Loss: 0.0079 Validation Loss: 0.0080
* No further improvement is observed from there. 

In [8]:
if exp["controller"] != "LSTMXYPredictor":
    raise Exception(f"The controller specified {exp['controller']} is not LSTMXYPredictor")

latent_size = spexp["latent_size"]
output_size = exp["control_size"]  # degrees of freedom in the robot
num_layers = exp["controller_num_layers"]
hidden_size = exp["controller_hidden_size"] 

# Instantiate model, loss function, and optimizer
model = LSTMXYPredictor(latent_size=latent_size, hidden_size=hidden_size, output_size = output_size, num_layers=num_layers)

exp.start_timer("data_preparation")
inputs_training, targets_training, inputs_validation, targets_validation = create_bc_training_and_validation(exp, spexp, sp)
exp.end_timer("data_preparation")

criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = exp["epochs"]

# Create a SummaryWriter instance
# where does the logdir go???
writer = SummaryWriter(logdir="/home/lboloni/runs/example")
exp.start_timer("train")
train_behavior_cloning(
    model, optimizer, criterion,
    inputs_training=inputs_training, 
    targets_training=targets_training, 
    inputs_validation=inputs_validation,
    targets_validation=targets_validation,
    num_epochs=num_epochs, writer=writer)
writer.close()
print("Training complete.")
exp.end_timer("train")
controller_path = pathlib.Path(exp.data_dir(), exp["controller_file"])
torch.save(model.state_dict(), controller_path)


UnboundLocalError: cannot access local variable 'inputs' where it is not associated with a value