## Behavior Cloning Policy Training Example

In [1]:
import os
import sys

import numpy as np
import pandas as pd

import torch

from tqdm import tqdm

# get the current script's directory
current_directory = os.path.dirname(os.path.abspath(__file__)) if "__file__" in locals() else os.getcwd()
# get the parent directory
parent_directory = os.path.dirname(current_directory)
# Add the parent directory to the sys.path
sys.path.append(parent_directory)

from utils import common, constants
from utils.config import Config
from utils.dataset_loader import PolicyDatasetLoader

from models.policy_model import RobotPolicy

In [2]:
current_path = os.getcwd()
parent_path = os.path.dirname(current_path)
grand_parent_path = os.path.dirname(parent_path)

In [3]:
results_path = os.path.join(grand_parent_path, "results")

In [4]:
dataset_path = os.path.join(grand_parent_path, "dataset")
demo_path = os.path.join(dataset_path, "human_demonstrations")

In [5]:
# collected dataset folder name in ("dataset \\ human_demonstrations \\ collection_date")
collection_date = "2024_01_23" # year_month_day

In [6]:
dataset_folder = os.path.join(demo_path,
                              collection_date)

In [7]:
json_files = os.listdir(dataset_folder + "\\jsons")

In [8]:
column_names = constants.COLUMN_NAMES

# Test

In [9]:
configs = Config()
# call the parameters method to set the parameters
configs.parameters()

Current Time:  Jan_27_2024-15_52_49


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Training Device: ", device)
configs.device = device

Training Device:  cpu


In [11]:
model_directory = os.path.join(results_path, "policy_network_params")

if not os.path.exists(model_directory):
    os.makedirs(model_directory)

In [12]:
saving_path = configs.model_saving_path(directory=model_directory)

In [13]:
json_path = os.path.join(dataset_folder + "\\jsons", json_files[0])

In [14]:
training_data = PolicyDatasetLoader(demo_data_json_paths=[json_path])



Number of Trajectories:  1
Each Trajectory Length:  8
Full Demo Dataset Size:  8


In [15]:
torch_loader = torch.utils.data.DataLoader(training_data,
                                           batch_size=configs.batch_size,
                                           shuffle=configs.data_shuffle,
                                           num_workers=configs.num_workers)
torch_loader

<torch.utils.data.dataloader.DataLoader at 0x1e134282530>

In [16]:
policy_network = RobotPolicy(state_size=configs.state_size,
                             hidden_size=configs.hidden_size,
                             out_size=configs.action_size,
                             device=configs.device)
policy_network

RobotPolicy(
  (policy): Sequential(
    (0): Linear(in_features=4, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=3, bias=True)
    (5): Tanh()
  )
)

In [17]:
# mean squared error loss
criterion = torch.nn.MSELoss()

In [18]:
# adam optimizer
optimizer = torch.optim.Adam(policy_network.parameters(),
                             lr=0.001)

In [19]:
num_epochs = 2

In [20]:
for epoch in range(num_epochs):
    
    # loop through each batch inside the dataset
    for sample in tqdm(torch_loader):
        
        # get batch of data
        state_input, action_output = sample
        
        # forward pass
        action_predictions = policy_network.forward(x=state_input)
        
        # compute mse loss
        loss = criterion(action_predictions, action_output)
        
        # backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")
    
    # save the action prediction model after each epoch
    filename = f"policy_network_epoch_{epoch + 1}.pt"
    torch.save(obj=policy_network.state_dict(),
               f=os.path.join(saving_path, filename))

100%|███████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 403.18it/s]


Epoch 1/2, Loss: 0.09494465589523315
Epoch 1/2, Loss: 0.09170800447463989
Epoch 1/2, Loss: 0.08233990520238876
Epoch 1/2, Loss: 0.06857556849718094


100%|██████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1997.53it/s]

Epoch 2/2, Loss: 0.05991328880190849
Epoch 2/2, Loss: 0.060775965452194214
Epoch 2/2, Loss: 0.05463934317231178
Epoch 2/2, Loss: 0.07716011255979538



