## Evaluate Trained Robot Policy Model and Visualize

In [1]:
import os
import sys

import torch

In [2]:
# get the current script's directory
current_directory = os.path.dirname(os.path.abspath(__file__)) if "__file__" in locals() else os.getcwd()
# get the parent directory
parent_directory = os.path.dirname(current_directory)
# add the parent directory to the sys.path
sys.path.append(parent_directory)

In [3]:
from utils import common, constants
from utils.config import Config
from utils.dataset_loader import PolicyDatasetLoader

from optimization.updater import Updater
from optimization.functions import setup_config, get_directories, load_policy

from models.policy_model import RobotPolicy

In [4]:
# available evaluating machine
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Evaluating Device: ", device)

# setup hyperparameters
configs = setup_config(device=device)

# create and return preliminary base paths
json_paths, results_path = get_directories(parent_directory=parent_directory)

Evaluating Device:  cpu
Current Time:  Jan_29_2024-20_28_41


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Evaluating Device: ", device)
configs.device = device

Evaluating Device:  cpu


In [6]:
policy_network = RobotPolicy(state_size=configs.state_size,
                             hidden_size=configs.hidden_size,
                             out_size=configs.action_size,
                             log_std_min=configs.policy_log_std_min,
                             log_std_max=configs.policy_log_std_max,
                             log_std_init=configs.policy_log_std_init,
                             device=configs.device)

In [7]:
# load demonstrations dataset
evaluation_data = PolicyDatasetLoader(demo_data_json_paths=json_paths)



Number of Trajectories:  43
Each Trajectory Length:  20
Full Demo Dataset Size:  922


# Functions

# Test

In [8]:
# folder name where parameters are located ("results / policy_network_params / loading_folder_name")
loading_folder_name = "Jan_29_2024-18_35_10"

In [9]:
params_name = "policy_network_epoch_100_loss_3_19916.pt"

In [10]:
# location of the trained model parameters (make sure that the folder exists where model is trained priorly)
model_folder_path = os.path.join(results_path,
                                 "policy_network_params",
                                 loading_folder_name)

In [11]:
model_path = os.path.join(model_folder_path, params_name)

In [12]:
# set trained parameters to neural network
policy_network = load_policy(policy_network=policy_network,
                             model_path=model_path)

In [13]:
updater = Updater(configs=configs,
                  policy_network=policy_network)

In [14]:
sample_index = 25

In [15]:
evaluation_sample = evaluation_data[sample_index]

In [16]:
# extract sample data in correct order
input_state = evaluation_sample[0].float().to(configs.device)
output_action = evaluation_sample[1].float().to(configs.device)
trajectory_index = evaluation_sample[2]
state_number = evaluation_sample[3]

In [17]:
# forward pass to get mean of Gaussian distribution
# add a batch dimension by unsqueezing
action_pred, action_std = policy_network.forward(x=input_state.unsqueeze(0))
action_log_prob, action_dist = policy_network.calculate_distribution(action_mu=action_pred,
                                                                     action_std=action_std)

In [18]:
action_mu_and_std = torch.cat((action_pred, action_std),
                              dim=-1)

In [19]:
# multivariate Gaussian negative log-likelihood loss function
nll_loss = updater.gaussian_nll_loss(y_true=output_action,
                                     y_pred=action_mu_and_std)

In [20]:
real_state_input = common.denormalize_state(state_norm=input_state.numpy(),
                                            norm_value_list=evaluation_data.state_norms)

In [21]:
real_action_output = common.denormalize_action(action_norm=output_action.unsqueeze(0).numpy(),
                                               norm_range_list=evaluation_data.action_norms)

In [22]:
pred_action_output = common.denormalize_action(action_norm=action_pred.detach().numpy(),
                                               norm_range_list=evaluation_data.action_norms)