## Load Tree weights

In [1]:
import numpy as np
import sys
import json
import torch
import gym
from rl import PPO

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()


### RL Data Norm

In [6]:
EnvName = 'MountainCar-v0'
m = 'cdt'

env = gym.make(EnvName).unwrapped
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n  # discrete

conf_path = '../src/'+m+'/'+m+'_rl_train.json'
with open(conf_path, "r") as read_file:
    rl_confs = json.load(read_file)  # hyperparameters for il training

model = PPO(state_dim, action_dim, rl_confs["General"]["policy_approx"], rl_confs[EnvName]["learner_args"],\
            **rl_confs[EnvName]["alg_confs"]).to(torch.device(rl_confs[EnvName]["learner_args"]["device"]))
i=0
model_path = rl_confs[EnvName]["train_confs"]["model_path"]+str(i)
print(model_path)
model.load_model(model_path)
print(model.state_dict())

for w in model.state_dict()['policy.dc_leaves'].detach().cpu().numpy():
    print(softmax(w))

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
CDT parameters:  {'num_intermediate_variables': 1, 'feature_learning_depth': 2, 'decision_depth': 2, 'input_dim': 2, 'output_dim': 3, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 128, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'episodes': 40, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0}
../data/cdt/model/mountaincar/rl_ppo0
OrderedDict([('policy.fl_leaf_weights', tensor([[ 1.2682, -8.0616],
        [ 4.4988,  1.1245],
        [ 0.7746, -8.9235],
        [-1.1001, -2.5657]], device='cuda:0')), ('policy.dc_leaves', tensor([[-1.5820, -1.6395,  2.2960],
        [-0.4989,  2.3197, -0.8824],
        [ 1.1209, -0.5438, -2.6799],
        [ 1.1128,  2.1277,  0.7750]], device='cuda:0')), ('policy.fl_inner_nodes.weight', tensor([[  1.6196,   3.5666,  10.0695],
        [ -2.9400,  -4.6343, -11.3407],
        [  0.9527,   1.9745,   4.4633]], device='c

In [15]:
EnvName = 'CartPole-v1'
m = 'cdt'

env = gym.make(EnvName).unwrapped
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n  # discrete

conf_path = '../src/'+m+'/'+m+'_rl_train_compare.json'
with open(conf_path, "r") as read_file:
    rl_confs = json.load(read_file)  # hyperparameters for il training
    
rl_confs[EnvName]['learner_args']['feature_learning_depth']=1
rl_confs[EnvName]['learner_args']['decision_depth']=2

model = PPO(state_dim, action_dim, rl_confs["General"]["policy_approx"], rl_confs[EnvName]["learner_args"],\
            **rl_confs[EnvName]["alg_confs"]).to(torch.device(rl_confs[EnvName]["learner_args"]["device"]))
i='_12_1'
model_path = rl_confs[EnvName]["train_confs"]["model_path"]+i
print(model_path)
model.load_model(model_path)
for w in model.state_dict()['policy.dc_leaves'].detach().cpu().numpy():
    print(softmax(w))
print(model.state_dict())

CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 1, 'decision_depth': 2, 'input_dim': 4, 'output_dim': 2, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'episodes': 40, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0}
../data/cdt_compare_depth/model/cartpole/rl_ppo_12_1
[0.00731772 0.9926823 ]
[0.7414794  0.25852057]
[0.9659206  0.03407942]
[0.7439119  0.25608805]
OrderedDict([('policy.fl_leaf_weights', tensor([[ 0.5395,  0.1492,  2.6079,  2.6178],
        [ 0.4740,  0.0534,  3.5718,  0.7234],
        [-0.2066,  0.5147,  3.6235,  3.6063],
        [ 0.2708,  2.5149,  3.1295,  1.0171]], device='cuda:0')), ('policy.dc_leaves', tensor([[-2.7513,  2.1589],
        [ 0.1815, -0.8722],
        [ 1.3746, -1.9698],
        [ 1.3319,  0.2655]], device='cuda:0')), ('policy.fl_inner_nodes.weight', tensor([[-1.2752,  0.2580,  0.0931,  0.0452,  0.3635]], device='cuda:0')), ('policy.dc_inn

In [2]:
EnvName = 'LunarLander-v2'
m = 'cdt'

env = gym.make(EnvName).unwrapped
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n  # discrete

conf_path = '../src/'+m+'/'+m+'_rl_train_compare.json'
with open(conf_path, "r") as read_file:
    rl_confs = json.load(read_file)  # hyperparameters for il training
    
rl_confs[EnvName]['learner_args']['feature_learning_depth']=2
rl_confs[EnvName]['learner_args']['decision_depth']=2

model = PPO(state_dim, action_dim, rl_confs["General"]["policy_approx"], rl_confs[EnvName]["learner_args"],\
            **rl_confs[EnvName]["alg_confs"]).to(torch.device(rl_confs[EnvName]["learner_args"]["device"]))
i='_22_1'
model_path = rl_confs[EnvName]["train_confs"]["model_path"]+i
print(model_path)
model.load_model(model_path)
for w in model.state_dict()['policy.dc_leaves'].detach().cpu().numpy():
    print(softmax(w))
print(model.state_dict())

  result = entry_point.load(False)


CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 1, 'decision_depth': 2, 'input_dim': 8, 'output_dim': 4, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'episodes': 40, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0}
../data/cdt_compare_depth/model/lunarlander/rl_ppo_22_1


RuntimeError: Error(s) in loading state_dict for PPO:
	size mismatch for policy.fl_leaf_weights: copying a param with shape torch.Size([8, 8]) from checkpoint, the shape in current model is torch.Size([4, 8]).
	size mismatch for policy.fl_inner_nodes.weight: copying a param with shape torch.Size([3, 9]) from checkpoint, the shape in current model is torch.Size([1, 9]).