In [15]:
import numpy as np
from imitation.policies.serialize import load_policy
from imitation.util.util import make_vec_env
from imitation.data.wrappers import RolloutInfoWrapper
from imitation.data import rollout
from imitation.algorithms.adversarial.gail import GAIL
from imitation.rewards.reward_nets import BasicRewardNet
from imitation.util.networks import RunningNorm
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.evaluation import evaluate_policy
from imitation.data.types import Trajectory,DictObs,TrajectoryWithRew
import h5py
from GailNavigationNetwork.model import NaviNet
from GailNavigationNetwork.utilities import preprocess




Available device is cuda of name NVIDIA GeForce RTX 4060 Laptop GPU



In [2]:


SEED = 42

env = make_vec_env(
    "seals:seals/CartPole-v0",
    rng=np.random.default_rng(SEED),
    n_envs=8,
    post_wrappers=[
        lambda env, _: RolloutInfoWrapper(env)
    ],  # needed for computing rollouts later
)
expert = load_policy(
    "ppo-huggingface",
    organization="HumanCompatibleAI",
    env_name="seals/CartPole-v0",
    venv=env,
)

In [3]:
rollouts = rollout.rollout(
    expert,
    env,
    rollout.make_sample_until(min_timesteps=None, min_episodes=60),
    rng=np.random.default_rng(SEED),
)

In [7]:
print(rollouts[0])

TrajectoryWithRew(obs=array([[-0.00650524,  0.04741862,  0.03976776,  0.03442311],
       [-0.00555687,  0.2419484 ,  0.04045622, -0.24545221],
       [-0.0007179 ,  0.04627267,  0.03554718,  0.05971209],
       ...,
       [ 0.14421803,  0.05748524,  0.01440079, -0.1876876 ],
       [ 0.14536773, -0.13783975,  0.01064704,  0.10950319],
       [ 0.14261094,  0.05712802,  0.0128371 , -0.17980173]],
      dtype=float32), acts=array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 

In [4]:

learner = PPO(
    env=env,
    policy=MlpPolicy,
    batch_size=64,
    ent_coef=0.0,
    learning_rate=0.0004,
    gamma=0.95,
    n_epochs=5,
    seed=SEED,
)
reward_net = BasicRewardNet(
    observation_space=env.observation_space,
    action_space=env.action_space,
    normalize_input_layer=RunningNorm,
)
gail_trainer = GAIL(
    demonstrations=rollouts,
    demo_batch_size=1024,
    gen_replay_buffer_capacity=512,
    n_disc_updates_per_round=8,
    venv=env,
    gen_algo=learner,
    reward_net=reward_net,
)

In [19]:
def create_demos(file_path,DEVICE="cuda"):
    '''
    Creates a gymnasium transition from the given file path
    of hdf5 file of known structure

    Args:
    file_path: str  
    Path to the hdf5 file   

    Returns:
    rollouts: gymnasium.Transition

    '''    
    read_file= h5py.File(file_path, "r")
    model= NaviNet().to(DEVICE)
    model.eval()
    len= read_file['kris_dynamics']['odom_data']['target_vector'].shape[0]
    rgbs=[]
    depths=[]
    targets=[]  
    acts=[]
    for i in range(len):
        target=read_file['kris_dynamics']['odom_data']['target_vector'][i]
        rgb=read_file['images']['rgb_data'][i]
        depth=read_file['images']['depth_data'][i]
        act=read_file['kris_dynamics']['odom_data']['odom_data_wheel'][i]
        # print(f"depth shape in rollout {depth.shape}")
        rgb=preprocess(rgb)
        depth=preprocess(depth)
        # rgb,depth=preprocess(rgb,depth)
        (rgb, depth) = (rgb.to(DEVICE), depth.to(DEVICE))
        rgb_features, depth_features = model(rgb,depth)
        rgb_features=rgb_features.detach().cpu().numpy()
        depth_features=depth_features.detach().cpu().numpy()
        print(f"targets feature in rollout {target.shape}")
        rgbs.append(rgb_features)
        depths.append(depth_features)
        targets.append(target) 
        acts.append(act)
        

    acts=np.array(acts[:-1])
    dones=[False for i in range(len)]
    dones[-1]=True
    infos= [{} for i in range(len-1)]
    rgbs=np.array(rgbs)
    depths=np.array(depths)
    targets=np.array(targets)
    rews=np.zeros(shape=(24,))
    # print(f"[rl_train] Creating rollouts {rgbs.shape} {depths.shape} , targets {targets.shape} acts {acts.shape}")
    obs_dict=DictObs( {'target_vector': targets,
            'rgb_features':rgbs,
            'depth_features': depths})

    traj = Trajectory(obs=obs_dict, acts=acts,infos=infos,terminal=dones)
    trajrw=TrajectoryWithRew(obs=obs_dict, acts=acts,infos=infos,terminal=dones,rews=rews)
    rollouts= rollout.flatten_trajectories([trajrw])


    return rollout.flatten_trajectories([traj])


In [20]:
 file_path="/home/foxy_user/foxy_ws/src/gail_navigation/GailNavigationNetwork/data/traj2.hdf5"
demonstrations=create_demos(file_path)

targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)
targets feature in rollout (7,)


TypeError: 'int' object is not callable