In [None]:
from pathlib import Path

project_dir = Path('.').absolute().parent
model_dir = project_dir / 'export_logs' / 'ppo' / 'A1GymEnv-v0_2'

In [None]:
import torch.nn as nn
from utils import ALGOS

model_path = model_dir / "A1GymEnv-v0.zip"
model = ALGOS["ppo"].load(model_path)

def extract_policy_layers(model):
    mlp_extractor = model.policy.mlp_extractor.policy_net
    action_net = model.policy.action_net

    layers = []
    for m in mlp_extractor.modules():
        if not isinstance(m, nn.Sequential):
            layers.append(m)
    layers.append(action_net)

    return nn.Sequential(*layers)

policy_net = extract_policy_layers(model)
policy_net.eval()

In [None]:
import torch
import numpy as np

# Export model weights as csv
params_dir = model_dir / 'parameters'
params_dir.mkdir(exist_ok=True, parents=True)

def save_tensor_as_csv(path, t: torch.Tensor):
    t_np = t.detach().cpu().numpy()
    np.savetxt(path, t_np, delimiter = ',')

for name, param in policy_net.named_parameters():
    if len(param.size()) == 1:
        param = torch.unsqueeze(param, axis=-1)
    name = name.replace('.', '_')
    print(name, param.size())
    save_tensor_as_csv(params_dir / f'{name}.csv', param)

In [None]:
# Export normalizer parameters
import pickle
normalizer_path = model_dir / "A1GymEnv-v0" / "vecnormalize.pkl"
with open(normalizer_path, "rb") as pkl:
    normalizer = pickle.load(pkl)

obs_mean = np.sqrt(normalizer.obs_rms.var + normalizer.epsilon)
obs_std = normalizer.obs_rms.mean 
obs_mean = obs_mean.reshape(1,-1)
obs_std = obs_std.reshape(1,-1)
print(obs_mean.shape, obs_std.shape)

np.savetxt(params_dir / 'obs_mean.csv', obs_mean, delimiter = ',')
np.savetxt(params_dir / 'obs_std.csv', obs_mean, delimiter = ',')

In [None]:
# Export default pose and motor polarity
from blind_walking.envs.env_wrappers import simple_openloop

pose_offset = simple_openloop.LaikagoPoseOffsetGenerator()._pose.reshape(1,-1)
np.savetxt(params_dir / 'pose_offset.csv', pose_offset, delimiter = ',')
print(pose_offset)

motor_polarity = np.array([1, -1, -1] * 4).reshape(1,-1)
np.savetxt(params_dir / 'motor_signs.csv', motor_polarity, delimiter = ',')

In [None]:
# Export sample in-out pairs
import json

policy_net.eval()
policy_net = policy_net.to(torch.device('cpu'))
sample_output_dir = model_dir / 'sample_inp_oup'
sample_output_dir.mkdir(exist_ok=True, parents=True)

sample_inputs = {
    'zeros': torch.zeros(1, 46),
    'ones': torch.ones(1, 46)
}
inp_oup_names = {}
for name, inp_value in sample_inputs.items():
    inp_name = name + '_in.csv'
    oup_name = name + '_out.csv'
    inp_oup_names[inp_name] = oup_name
    oup_value = policy_net(inp_value)
    save_tensor_as_csv(sample_output_dir / inp_name, inp_value)
    save_tensor_as_csv(sample_output_dir / oup_name, oup_value)

with open(sample_output_dir / 'inp_oup_name_pairs.txt', 'w') as file:
    for inp_name, oup_name in inp_oup_names.items():
        line = ','.join([inp_name, oup_name]) + "\n"
        file.write(line)

In [3]:
# Export butterworth filter coefficients and history
import gym 
import utils.import_envs
env = gym.make("A1GymEnv-v0")
env.reset()

print(env.env_step_counter)
filter = env.robot._action_filter
print(filter.a.shape)
print(filter.b.shape)
default_action = env.robot.GetMotorAngles()
print(default_action)


argv[0]=
0
(12, 2)
(12, 2)
[ 0.   0.9 -1.8  0.   0.9 -1.8  0.   0.9 -1.8  0.   0.9 -1.8]




In [2]:
for i in range(1):
    env.step(env.action_space.sample())
print(filter.a.shape)
print(filter.b.shape)
print(len(filter.yhist), filter.yhist[0].shape)
print(len(filter.xhist), filter.xhist[0].shape)

[ 0.   0.9 -1.8  0.   0.9 -1.8  0.   0.9 -1.8  0.   0.9 -1.8]
[[ 0. ]
 [ 0.9]
 [-1.8]
 [ 0. ]
 [ 0.9]
 [-1.8]
 [ 0. ]
 [ 0.9]
 [-1.8]
 [ 0. ]
 [ 0.9]
 [-1.8]]
Raw action:  [ 0.44601613  0.30490421 -1.11204529 -0.3697387   0.86836436 -1.41128629
 -0.15483707  0.77482317 -1.3015657   0.4584184   0.99935891 -0.87943053]
Filtered action:  [ 0.07145194  0.80466544 -1.68978941 -0.05923227  0.89493196 -1.73772793
 -0.02480495  0.87994663 -1.72015065  0.07343879  0.91591733 -1.65252445]
Step i: Processed action [ 0.07145194  0.80466544 -1.68978941 -0.05923227  0.89493196 -1.73772793
 -0.02480495  0.87994663 -1.72015065  0.07343879  0.91591733 -1.65252445]
Step i: Processed action [ 0.07145194  0.80466544 -1.68978941 -0.05923227  0.89493196 -1.73772793
 -0.02480495  0.87994663 -1.72015065  0.07343879  0.91591733 -1.65252445]
Step i: Processed action [ 0.07145194  0.80466544 -1.68978941 -0.05923227  0.89493196 -1.73772793
 -0.02480495  0.87994663 -1.72015065  0.07343879  0.91591733 -1.65252445]
