# Train behavior cloning

Train a behavior cloning based robot controller. 
* Code for loading and pre-processing the training data, typically from a set of demonstrations as specified in an exp/run
* Train the controller. 
* The trained controllers should be saved into the exp/run

In [1]:
## NOTEPAD CHANGES OVERVIEW
# I changed the notebook to calculate validation loss when training bc if "validation" is ture
# I adjusted the data creation to include validation data
# Config file example: experiment_configs/behavior_cloning/bc_lstm_resid_00_validation.yaml

import sys
sys.path.append("..")

from exp_run_config import Config
Config.PROJECTNAME = "BerryPicker"

import pathlib
from tqdm import tqdm
import pprint
import torch
import math
torch.manual_seed(1)

from bc_trainingdata import create_trainingdata_bc_dict
from bc_factory import create_bc_model
from bc_LSTM_MDN import mdn_loss

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"
print(f"Using device: {device}")

/usr/lib/python3.12/pathlib.py
***ExpRun**: Loading pointer config file:
	/home/al5d/.config/BerryPicker/mainsettings.yaml
***ExpRun**: Loading machine-specific config file:
	~/WORK/BerryPicker/cfg/settings.yaml
Using device: cuda


### Exp/run initialization
Create the exp/run-s that describe the parameters of the training. Some of the code here is structured in such a way as to make the notebook automatizable with papermill.

In [2]:
# *** Initialize the variables with default values 
# *** This cell should be tagged as parameters     
# *** If papermill is used, some of the values will be overwritten 

# If it is set to true, the exprun will be recreated from scratch
creation_style = "exist-ok"

# If not None, set an external experiment path
external_path = None
# If not None, set an output path
data_path = None

experiment = "behavior_cloning"
# run = "bc_mlp_00"
# run = "bc_lstm_00"
# run = "bc_lstm_resid_00"
run = "bc_lstm_resid_00_validation"
# run = "bc_lstm_mdn_00"
# exp = Config().get_experiment(experiment, run)

In [3]:
if external_path:
    external_path = pathlib.Path(external_path)
    assert external_path.exists()
    Config().set_experiment_path(external_path)
    Config().copy_experiment("sensorprocessing_conv_vae")
    Config().copy_experiment("robot_al5d")
    Config().copy_experiment("demonstration")
    Config().copy_experiment("behavior_cloning")
if data_path:
    data_path = pathlib.Path(data_path)
    assert data_path.exists()
    Config().set_experiment_data(data_path)

exp = Config().get_experiment(experiment, run, creation_style=creation_style)
pprint.pprint(exp)
exp_sp = Config().get_experiment(exp["exp_sp"], exp["run_sp"])
exp_robot = Config().get_experiment(exp["exp_robot"], exp["run_robot"])

***ExpRun**: Configuration for exp/run: behavior_cloning/bc_lstm_resid_00_validation successfully loaded
Experiment:
    batch_size: 64
    control_size: 6
    controller: bc_LSTM_Residual
    controller_file: controller.pth
    data_dir: /home/al5d/WORK/BerryPicker/data/behavior_cloning/bc_lstm_resid_00_validation
    epochs: 300
    exp_robot: robot_al5d
    exp_run_sys_indep_file: /home/al5d/WORK/BerryPicker/src/BerryPicker/src/experiment_configs/behavior_cloning/bc_lstm_resid_00_validation.yaml
    exp_sp: sensorprocessing_conv_vae
    experiment_name: behavior_cloning
    hidden_size: 32
    loss: MSELoss
    name: bc_lstm_resid_00_validation
    optimizer: Adam
    optimizer_lr: 0.001
    run_name: bc_lstm_resid_00_validation
    run_robot: position_controller_00
    run_sp: sp_vae_128_300epochs_validation
    sequence_length: 10
    shuffle: false
    subrun_name: null
    time_started: '2025-09-20 14:07:21.143551'
    training_data:
    - - random-both-cameras-video
      - '20

### Training an RNN model
Functions for training an RNN type model. These models assume that the input is a sequence $[z_{t-k},...z_{t}]$ while the output is the next action $a_{t+1}$

In [None]:
def validate_bc_rnn(model, validation_loss, data, device):
    """Calculates the average validation error for the behavior cloning model using an RNN with the specific criterion function. Uses the z_validation an a_validation fields in "data". The inputs and the targets a list of individual input and target. 
    CHECK: I think that the target is supposed to be the last output of the RNN when the whole input string had been passed through it. 
    The model is reset before each of the strings (i.e. state is not transferred)
    model: an LSTM or similar model that can consume a sequence of inputs
    criterion: any function that calculates the distance between the targets
    """
    num_sequences = data["z_validation"].shape[0]
    model.eval()
    val_loss = 0
    with torch.no_grad():  # Disable gradient computation
        for i in range(num_sequences):
            input_seq = data["z_validation"][i].to(device)
            target = data["a_validation"][i].to(device)
            # Reshape for batch compatibility
            input_seq = input_seq.unsqueeze(0)  # Shape: [1, sequence_length, latent_size]
            target = target.unsqueeze(0)        # Shape: [1, latent_size]
            if not model.stochastic:
                outputs = model(input_seq)
                loss = validation_loss(outputs, target)
            else: # for MDN, the output is sampling
                outputs = model.forward_and_sample(input_seq)
            loss = validation_loss(outputs, target)
            val_loss += loss.item()
    avg_loss = val_loss / num_sequences
    return avg_loss


In [None]:
# CHANGES
# Changed function to accept a validation parameter to use validation loss when saving the model with early stopping

def train_bc_rnn(model, optimizer, criterion, data, num_epochs, batch_size=32, controller_path=None, validation=False):
    """Train a behavior cloning model using a sequence model (eg. an RNN)
    Uses a writer for TensorBoard _and_ tqdm
    Model with batch
    """

    # Variables to track validation progress
    best_val = math.inf
    best_model = None
    best_model_epoch = None
    early_stop_epoch = 10

    exp.start_timer("train")
    num_sequences = data["z_train"].shape[0]
    num_batches = num_sequences // batch_size

    for epoch in tqdm(range(num_epochs)):        
        model.train()
        # Loop over each sequence in the batch
        training_loss = 0
        for i in range(num_batches):
            # Prepare input and target
            input_seq = data["z_train"][i * batch_size: (i+1)* batch_size].to(device) # Shape: [batch_size, sequence_length, latent_size]
            target = data["a_train"][i * batch_size: (i+1)* batch_size].to(device) # Shape: [batch_size, latent_size]
            # Forward pass
            output = model(input_seq) # Shape: [batch_size, output_size]
            # Check for MDN, that is different
            if not model.stochastic:
                loss = criterion(output, target)
            else: 
                loss = mdn_loss(target, *output)
            training_loss += loss.item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()        
        avg_training_loss = training_loss / num_sequences

        #if writer is not None:
        #    writer.add_scalar("TrainingLoss", avg_training_loss, epoch)
        #    writer.add_scalar("ValidationLoss", avg_validation_loss, epoch)
        #    writer.flush()
        if (epoch+1) % 5 == 0: # was 0
            avg_validation_loss = validate_bc_rnn(model, criterion, data, device)

            # Added condition to track model with best validation loss if validation flag is true
            if validation:
                if avg_validation_loss < best_val:
                    best_val = avg_validation_loss
                    best_model = model.state_dict()
                    best_model_epoch = epoch
                if best_val < avg_validation_loss and epoch > best_model_epoch + early_stop_epoch:
                    print("Early stop.")
                    break
            else:
                best_model = model.state_dict()
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_training_loss:.4f} Validation Loss: {avg_validation_loss:.4f} ')
    print("Training complete.")
    exp.end_timer("train")

    # Save model in this function now for clarity
    torch.save(best_model, controller_path)

In [None]:
# CHANGES
# Checks for validation in config file and saves model based on validation score
# Splits data into training and validation if config "training" is True
# Performs shuffling on the training data unless config "shuffle" is False
#   FUNCTION: train_bc_rnn()
#   FUNCTION: create_trainingdata_bc()

controller_path = pathlib.Path(exp.data_dir(), exp["controller_file"])

# Checks if validation and shuffle parameters exists
if 'validation' in exp:
    validation = exp['validation']
else:
    validation = False

if controller_path.exists():
    print("***Train_BehaviorCloning: Controller exists. Re-run with creation-style=discard-old to recompute.")
else:
    print("***Train_BehaviorCloning: Proceeding to train the controller.")
    model, validation_loss, optimizer = create_bc_model(exp, exp_sp, device)
    print(model)

    data = create_trainingdata_bc_dict(exp, exp_sp, exp_robot, device="cpu", validation=validation)
    # Training Loop
    num_epochs = exp["epochs"]
    batch_size = exp["batch_size"]

    # Create a SummaryWriter instance
    # where does the logdir go???
    # writer = SummaryWriter(logdir="/home/lboloni/runs/example")
    train_bc_rnn(
            model, optimizer, validation_loss, data=data,
            num_epochs=num_epochs, batch_size=batch_size, controller_path=controller_path, validation=validation)
    # writer.close()

### Train the model 

Creates and trains a behavior cloning model specified by the exp.