In [6]:
from pathlib import Path

project_dir = Path('.').absolute().parent
model_dir = project_dir / 'export_logs' / 'ppo' / 'A1GymEnv-v0_2'

In [7]:
import torch.nn as nn
from utils import ALGOS

model_path = model_dir / "A1GymEnv-v0.zip"
model = ALGOS["ppo"].load(model_path)

def extract_policy_layers(model):
    mlp_extractor = model.policy.mlp_extractor.policy_net
    action_net = model.policy.action_net

    layers = []
    for m in mlp_extractor.modules():
        if not isinstance(m, nn.Sequential):
            layers.append(m)
    layers.append(action_net)

    return nn.Sequential(*layers)

policy_net = extract_policy_layers(model)
policy_net.eval()

Sequential(
  (0): Linear(in_features=46, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=12, bias=True)
)

In [8]:
import torch
import numpy as np

# Export model weights as csv
params_dir = model_dir / 'parameters'
params_dir.mkdir(exist_ok=True, parents=True)

def save_tensor_as_csv(path, t: torch.Tensor):
    t_np = t.detach().cpu().numpy()
    np.savetxt(path, t_np, delimiter = ',')

for name, param in policy_net.named_parameters():
    if len(param.size()) == 1:
        param = torch.unsqueeze(param, axis=-1)
    name = name.replace('.', '_')
    print(name, param.size())
    save_tensor_as_csv(params_dir / f'{name}.csv', param)

0_weight torch.Size([256, 46])
0_bias torch.Size([256, 1])
2_weight torch.Size([256, 256])
2_bias torch.Size([256, 1])
4_weight torch.Size([12, 256])
4_bias torch.Size([12, 1])


In [9]:
# Export normalizer parameters
import pickle
normalizer_path = model_dir / "A1GymEnv-v0" / "vecnormalize.pkl"
with open(normalizer_path, "rb") as pkl:
    normalizer = pickle.load(pkl)

obs_mean = np.sqrt(normalizer.obs_rms.var + normalizer.epsilon)
obs_std = normalizer.obs_rms.mean 
obs_mean = obs_mean.reshape(1,-1)
obs_std = obs_std.reshape(1,-1)
print(obs_mean.shape, obs_std.shape)

np.savetxt(params_dir / 'obs_mean.csv', obs_mean, delimiter = ',')
np.savetxt(params_dir / 'obs_std.csv', obs_mean, delimiter = ',')

(1, 46) (1, 46)


In [10]:
# Export default pose and motor polarity
from blind_walking.envs.env_wrappers import simple_openloop

pose_offset = simple_openloop.LaikagoPoseOffsetGenerator()._pose.reshape(1,-1)
np.savetxt(params_dir / 'pose_offset.csv', pose_offset, delimiter = ',')
print(pose_offset)

motor_polarity = np.array([1, -1, -1] * 4).reshape(1,-1)
np.savetxt(params_dir / 'motor_signs.csv', motor_polarity, delimiter = ',')

[[ 0.    0.67 -1.25  0.    0.67 -1.25  0.    0.67 -1.25  0.    0.67 -1.25]]


In [11]:
# Export sample in-out pairs
import json

policy_net.eval()
policy_net = policy_net.to(torch.device('cpu'))
sample_output_dir = model_dir / 'sample_inp_oup'
sample_output_dir.mkdir(exist_ok=True, parents=True)

sample_inputs = {
    'zeros': torch.zeros(1, 46),
    'ones': torch.ones(1, 46)
}
inp_oup_names = {}
for name, inp_value in sample_inputs.items():
    inp_name = name + '_in.csv'
    oup_name = name + '_out.csv'
    inp_oup_names[inp_name] = oup_name
    oup_value = policy_net(inp_value)
    save_tensor_as_csv(sample_output_dir / inp_name, inp_value)
    save_tensor_as_csv(sample_output_dir / oup_name, oup_value)

with open(sample_output_dir / 'inp_oup_name_pairs.txt', 'w') as file:
    for inp_name, oup_name in inp_oup_names.items():
        line = ','.join([inp_name, oup_name]) + "\n"
        file.write(line)

In [14]:
# Export butterworth filter coefficients and history
import gym 
import numpy as np
import utils.import_envs
env = gym.make("A1GymEnv-v0")
env.reset()

filter = env.robot._action_filter
a = filter.a.T.copy()
b = filter.b.T.copy()
print(a.shape, b.shape)
print(a)

np.savetxt(params_dir / 'filter_a_coeff.csv', a, delimiter =',')
np.savetxt(params_dir / 'filter_b_coeff.csv', b, delimiter =',')


argv[0]=
(2, 12) (2, 12)


In [13]:
for i in range(1):
    env.step(env.action_space.sample())
print(filter.a.shape)
print(filter.b.shape)
print(len(filter.yhist), filter.yhist[0].shape)
print(len(filter.xhist), filter.xhist[0].shape)

[ 0.   0.9 -1.8  0.   0.9 -1.8  0.   0.9 -1.8  0.   0.9 -1.8]
[[ 0. ]
 [ 0.9]
 [-1.8]
 [ 0. ]
 [ 0.9]
 [-1.8]
 [ 0. ]
 [ 0.9]
 [-1.8]
 [ 0. ]
 [ 0.9]
 [-1.8]]
Raw action:  [-0.49561477  1.16606323 -1.03982396 -0.19126183  0.52372795 -0.77732503
  0.33878744  0.87886262 -0.77325642  0.36159971  0.71640352 -1.56371608]
Filtered action:  [-0.07939766  0.94262342 -1.67821953 -0.03064021  0.83972109 -1.63616711
  0.05427387  0.89661378 -1.63551532  0.0579284   0.87058778 -1.76214723]
Step i: Processed action [-0.07939766  0.94262342 -1.67821953 -0.03064021  0.83972109 -1.63616711
  0.05427387  0.89661378 -1.63551532  0.0579284   0.87058778 -1.76214723]
Step i: Processed action [-0.07939766  0.94262342 -1.67821953 -0.03064021  0.83972109 -1.63616711
  0.05427387  0.89661378 -1.63551532  0.0579284   0.87058778 -1.76214723]
Step i: Processed action [-0.07939766  0.94262342 -1.67821953 -0.03064021  0.83972109 -1.63616711
  0.05427387  0.89661378 -1.63551532  0.0579284   0.87058778 -1.76214723]
