# Tree Stability Analysis

For imitation learning only

In [2]:
import numpy as np
import sys
sys.path.append("..")
from src.cdt import CDT 
from src.sdt import SDT

def normalize(list_v):
    normalized_list = []
    for v in list_v:
        if np.sum(np.abs(v)) == 0:
            continue
        else:
            v =np.array(v)/np.sum(np.abs(v))
        normalized_list.append(v)
    return normalized_list

def l1_norm(a,b):
    '''
    Return the L1-norm distance of two vectors
    '''
    return np.linalg.norm(np.array(a)-np.array(b), ord=1)

def difference_metric(list1, list2, norm=True, symmetric=True):
    '''
    Calculate minimal difference of list1 and list2
    '''
    if norm:
        list1 = normalize(list1)
        list2 = normalize(list2)
    def similarity_measure(l1, l2):
        score = []
        for v1 in l1:
            sim_list = []
            for v2 in l2:
                sim = np.min([l1_norm(v1, v2),  l1_norm(v1, -1.*np.array(v2))])
                sim_list.append(sim)
            score.append(np.min(sim_list)) 
        return np.mean(score)
    
    if symmetric:
        final_score = 0.5*similarity_measure(list1, list2) + 0.5*similarity_measure(list2, list1)
#         print(similarity_measure(list1, list2), similarity_measure(list2, list1))
    else:
        final_score = similarity_measure(list1, list2)
        
    return final_score

# test
# a=[[1,2], [1.25, 2.25]]
# b=[[1.5,2.5], [4, 6]]
# difference_metric(a,b)

# SDT

### CartPole-v1

In [23]:
import os
import json
import numpy as np
import torch

EnvName = 'CartPole-v1'
m = 'sdt'
n = 5 # number of runs

conf_path = '../src/'+m+'/'+m+'_il_train.json'
with open(conf_path, "r") as read_file:
    il_confs = json.load(read_file)  # hyperparameters for il training
#         print(il_confs)

weights_list = []
for i in range(1, n+1):
#         print(il_confs[EnvName]["learner_args"])
    model_path = il_confs[EnvName]["learner_args"]["model_path"]+str(i)
    device = torch.device(il_confs[EnvName]["learner_args"]["device"])
    tree = SDT(il_confs[EnvName]["learner_args"]).to(device)
    tree.load_model(model_path)
    weights = tree.get_tree_weights(Bias=True)    
    weights_list.append(weights)

similarity_score=[]
# loop through all possible pairs
for i in range(1, n):
    for j in range(i+1, n+1):
        similarity_score.append(difference_metric(weights_list[i-1], weights_list[j-1]))

print(similarity_score)
print(np.mean(similarity_score))      

[1 2 3 4 5]
SDT parameters:  {'input_dim': 4, 'output_dim': 2, 'depth': 3, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, 'l1_regularization': 0, 'greatest_path_probability': 1, 'model_path': '../data/sdt/model/cartpole/il_model', 'log_path': '../data/sdt/log/cartpole/il_log'}
SDT parameters:  {'input_dim': 4, 'output_dim': 2, 'depth': 3, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, 'l1_regularization': 0, 'greatest_path_probability': 1, 'model_path': '../data/sdt/model/cartpole/il_model', 'log_path': '../data/sdt/log/cartpole/il_log'}
SDT parameters:  {'input_dim': 4, 'output_dim': 2, 'depth': 3, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, '

### LunarLander-v2

In [4]:
import os
import json
import numpy as np
import torch

EnvName = 'LunarLander-v2'
m = 'sdt'
n = 5 # number of runs

conf_path = '../src/'+m+'/'+m+'_il_train.json'
with open(conf_path, "r") as read_file:
    il_confs = json.load(read_file)  # hyperparameters for il training
#         print(il_confs)

weights_list = []
for i in range(1, n+1):
#         print(il_confs[EnvName]["learner_args"])
    model_path = il_confs[EnvName]["learner_args"]["model_path"]+str(i)
    device = torch.device(il_confs[EnvName]["learner_args"]["device"])
    tree = SDT(il_confs[EnvName]["learner_args"]).to(device)
    tree.load_model(model_path)
    weights = tree.get_tree_weights(Bias=True)    
    weights_list.append(weights)

tree = SDT(il_confs[EnvName]["learner_args"]).to(device)
random_weights = tree.get_tree_weights(Bias=True) 

similarity_score=[]
random_similarity_score=[]
# loop through all possible pairs
for i in range(1, n):
    for j in range(i+1, n+1):
        similarity_score.append(difference_metric(weights_list[i-1], weights_list[j-1]))
        
for i in range(1, n+1):       
    random_similarity_score.append(difference_metric(weights_list[i-1], random_weights))

print(np.mean(similarity_score), np.mean(random_similarity_score))      

SDT parameters:  {'input_dim': 8, 'output_dim': 4, 'depth': 4, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, 'l1_regularization': 0, 'greatest_path_probability': 1, 'model_path': '../data/sdt/model/lunarlander/il_model', 'log_path': '../data/sdt/log/lunarlander/il_log'}
SDT parameters:  {'input_dim': 8, 'output_dim': 4, 'depth': 4, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, 'l1_regularization': 0, 'greatest_path_probability': 1, 'model_path': '../data/sdt/model/lunarlander/il_model', 'log_path': '../data/sdt/log/lunarlander/il_log'}
SDT parameters:  {'input_dim': 8, 'output_dim': 4, 'depth': 4, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, '

In [7]:
# LunarLander Heuristic Decision Tree Agent Nodes
nodes_in_heuristic_tree = [  # first dim is bias, the rest are weights
    [0, 0,0,0,0,0,0,1,1],

    [-0.4, 0.5, 0,1,0,0,0,0,0],
    [-0.4, -0.5, 0,-1,0,0,0,0,0],
    [0, 1,0,0,0,0,0,0,0],

    # at
    [0.2, 0,0,0,0,-0.5,-1,0,0],
    [0.15, 0,0,0,0,-0.5,-1,0,0],
    [-0.25, 0,0,0,0,0.5,1,0,0],

    [-0.2, 0,0,0,0,-0.5,-1,0,0],
    [-0.25, 0,0,0,0,-0.5,-1,0,0],
    [0.15, 0,0,0,0,0.5,1,0,0],


    [0, 0.25, 0, 0.5, 0, -0.5, -1, 0, 0 ],
    [-0.05, 0.25, 0, 0.5, 0, -0.5, -1, 0, 0 ],
    [-0.05, -0.25, 0, -0.5, 0, 0.5, 1, 0, 0 ],


    # ht
    [-0.05, 0.275, -0.5, 0, -0.5, 0,0,0,0],

    [-0.05, -0.275, -0.5, 0, -0.5, 0,0,0,0],

    [-0.05, 0, 0, 0, -0.5, 0, 0, 0, 0],

    # at, ht
    [-0.2, 0.275, -0.5, 0,-0.5, 0.5,1,0,0],
    [0.2, 0.275, -0.5, 0,-0.5, -0.5, -1, 0,0],

    [-0.2, -0.275, -0.5, 0,-0.5, 0.5,1,0,0],
    [0.2, -0.275, -0.5, 0,-0.5, -0.5, -1, 0,0],

    [0.2, 0.275, -0.5, 0,-0.5, 0.5,1,0,0],
    [-0.2, 0.275, -0.5, 0,-0.5, -0.5, -1, 0,0],

    [0.2, -0.275, -0.5, 0,-0.5, 0.5,1,0,0],
    [-0.2, -0.275, -0.5, 0,-0.5, -0.5, -1, 0,0],

    [0, 0.025, -0.5, -0.5, -0.5, 0.5, 1, 0, 0],
    [0, 0.525, -0.5, 0.5, -0.5, -0.5, -1, 0, 0],

    [0, -0.525, -0.5, -0.5, -0.5, 0.5, 1, 0, 0],
    [0, -0.025, -0.5, 0.5, -0.5, -0.5, -1, 0, 0],

]


In [34]:
import os
import json
import numpy as np
import torch

EnvName = 'LunarLander-v2'
m = 'sdt'
n = 5 # number of runs

conf_path = '../src/'+m+'/'+m+'_il_train.json'
with open(conf_path, "r") as read_file:
    il_confs = json.load(read_file)  # hyperparameters for il training
#         print(il_confs)

weights_list = []
for i in range(1, n+1):
#         print(il_confs[EnvName]["learner_args"])
    model_path = il_confs[EnvName]["learner_args"]["model_path"]+str(i)
    device = torch.device(il_confs[EnvName]["learner_args"]["device"])
    tree = SDT(il_confs[EnvName]["learner_args"]).to(device)
    tree.load_model(model_path)
    weights = tree.get_tree_weights(Bias=True)    
    weights_list.append(weights)

similarity_score=[]
heuristic_similarity_score=[]
        
for i in range(1, n+1):
    heuristic_similarity_score.append(difference_metric(weights_list[i-1], nodes_in_heuristic_tree))

print(np.mean(heuristic_similarity_score))    

[1 2 3 4 5]
SDT parameters:  {'input_dim': 8, 'output_dim': 4, 'depth': 4, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, 'l1_regularization': 0, 'greatest_path_probability': 1, 'model_path': '../data/sdt/model/lunarlander/il_model', 'log_path': '../data/sdt/log/lunarlander/il_log'}
SDT parameters:  {'input_dim': 8, 'output_dim': 4, 'depth': 4, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 'beta': 0, 'l1_regularization': 0, 'greatest_path_probability': 1, 'model_path': '../data/sdt/model/lunarlander/il_model', 'log_path': '../data/sdt/log/lunarlander/il_log'}
SDT parameters:  {'input_dim': 8, 'output_dim': 4, 'depth': 4, 'lamda': 0.001, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'epochs': 80, 'device': 'cuda', 'log_interval': 100, 'exp_scheduler_gamma': 1.0, 

# CDT

### CartPole-v1

In [6]:
import os
import json
import numpy as np
import torch

EnvName = 'CartPole-v1'
m = 'cdt'
n = 5 # number of runs

conf_path = '../src/'+m+'/'+m+'_il_train.json'
with open(conf_path, "r") as read_file:
    il_confs = json.load(read_file)  # hyperparameters for il training
#         print(il_confs)

fl_weights_list = []
dm_weights_list = []
for i in range(1, n+1):
#         print(il_confs[EnvName]["learner_args"])
    model_path = il_confs[EnvName]["learner_args"]["model_path"]+str(i)
    device = torch.device(il_confs[EnvName]["learner_args"]["device"])
    tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
    tree.load_model(model_path)
    fl_weights, dm_weights = tree.get_tree_weights(Bias=True)    
    fl_weights_list.append(fl_weights)
    dm_weights_list.append(dm_weights)
    
tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
random_fl_weights, random_dm_weights = tree.get_tree_weights(Bias=True) 

fl_similarity_score=[]
dm_similarity_score=[]
random_fl_similarity_score=[]
random_dm_similarity_score=[]

# loop through all possible pairs
for i in range(1, n):
    for j in range(i+1, n+1):
        fl_similarity_score.append(difference_metric(fl_weights_list[i-1], fl_weights_list[j-1]))
        dm_similarity_score.append(difference_metric(dm_weights_list[i-1], dm_weights_list[j-1]))
        
for i in range(1, n+1):       
    random_fl_similarity_score.append(difference_metric(fl_weights_list[i-1], random_fl_weights))
    random_dm_similarity_score.append(difference_metric(dm_weights_list[i-1], random_dm_weights))

print(np.mean(fl_similarity_score), np.mean(dm_similarity_score))      
print(np.mean(random_fl_similarity_score), np.mean(random_dm_similarity_score))   

print(np.mean(fl_similarity_score+dm_similarity_score))
print(np.mean(random_fl_similarity_score+random_dm_similarity_score))

CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 2, 'decision_depth': 2, 'input_dim': 4, 'output_dim': 2, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'epochs': 80, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0, 'model_path': '../data/cdt/model/cartpole/il_model', 'log_path': '../data/cdt/log/cartpole/il_log'}
CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 2, 'decision_depth': 2, 'input_dim': 4, 'output_dim': 2, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'epochs': 80, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0, 'model_path': '../data/cdt/model/cartpole/il_model', 'log_path': '../data/cdt/log/cartpole/il_log'}
CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 2, 'decision_depth': 2, 'input_dim': 4, 'output_dim': 2, 'lr': 0.

### LunarLander-v2

In [30]:
import os
import json
import numpy as np
import torch

EnvName = 'LunarLander-v2'
m = 'cdt'
n = 5 # number of runs

conf_path = '../src/'+m+'/'+m+'_il_train.json'
with open(conf_path, "r") as read_file:
    il_confs = json.load(read_file)  # hyperparameters for il training
#         print(il_confs)

fl_weights_list = []
dm_weights_list = []
for i in range(1, n+1):
#         print(il_confs[EnvName]["learner_args"])
    model_path = il_confs[EnvName]["learner_args"]["model_path"]+str(i)
    device = torch.device(il_confs[EnvName]["learner_args"]["device"])
    tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
    tree.load_model(model_path)
    fl_weights, dm_weights = tree.get_tree_weights(Bias=True)    
    fl_weights_list.append(fl_weights)
    dm_weights_list.append(dm_weights)

tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
random_fl_weights, random_dm_weights = tree.get_tree_weights(Bias=True) 

fl_similarity_score=[]
dm_similarity_score=[]
random_fl_similarity_score=[]
random_dm_similarity_score=[]

# loop through all possible pairs
for i in range(1, n):
    for j in range(i+1, n+1):
        fl_similarity_score.append(difference_metric(fl_weights_list[i-1], fl_weights_list[j-1]))
        dm_similarity_score.append(difference_metric(dm_weights_list[i-1], dm_weights_list[j-1]))
        
for i in range(1, n+1):       
    random_fl_similarity_score.append(difference_metric(fl_weights_list[i-1], random_fl_weights))
    random_dm_similarity_score.append(difference_metric(dm_weights_list[i-1], random_dm_weights))

print(np.mean(fl_similarity_score), np.mean(dm_similarity_score))      
print(np.mean(random_fl_similarity_score), np.mean(random_dm_similarity_score))   

print(np.mean(fl_similarity_score+dm_similarity_score))
print(np.mean(random_fl_similarity_score+random_dm_similarity_score))

CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 2, 'decision_depth': 2, 'input_dim': 8, 'output_dim': 4, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'epochs': 80, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0, 'model_path': '../data/cdt/model/lunarlander/il_model', 'log_path': '../data/cdt/log/lunarlander/il_log'}
CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 2, 'decision_depth': 2, 'input_dim': 8, 'output_dim': 4, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'epochs': 80, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0, 'model_path': '../data/cdt/model/lunarlander/il_model', 'log_path': '../data/cdt/log/lunarlander/il_log'}
CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 2, 'decision_depth': 2, 'input_dim': 8, 'output_dim':

## Another way of calculating DM weights (based on raw input $x$)
**Adopted in the paper**.

Since the DM weighs are represented in $\{f\}$-space (the features), we want to transform them back to $\{x\}$-space (the raw inputs), so as to compare with normal SDT.

params:
    * R: raw feature dimension
    * K: intermediate feature dimension
    * O: output dimension
    * N: number of leaf nodes in feature learning tree
    * M: number of inner nodes in decision making tree

$\tilde{W}_{N\times K\times R}\cdot W'_{M\times K} \rightarrow W^{raw}_{N\times M \times R}$

### CartPole-v1

In [26]:
import os
import json
import numpy as np
import torch

EnvName = 'CartPole-v1'
m = 'cdt'
n = 5 # number of runs

conf_path = '../src/'+m+'/'+m+'_il_train.json'
with open(conf_path, "r") as read_file:
    il_confs = json.load(read_file)  # hyperparameters for il training
#         print(il_confs)

weights_list = []

def get_all_weights(tree):
    f_weights = tree.get_feature_weights()
#     print(f_weights.shape)
    f_weights = np.moveaxis(f_weights, 1, -1)  # (N, K, R) -> (N, R, K)
#     print(f_weights.shape)
    fl_weights, dm_weights = tree.get_tree_weights(Bias=True)    
    dm_bias = dm_weights[:, 0]
    dm_weights_no_bias = np.moveaxis(dm_weights[:, 1:], 0, 1)  # (M, K) -> (K, M)
#     print(dm_weights_no_bias.shape, f_weights.shape)
    dm_in_x = np.moveaxis(f_weights@dm_weights_no_bias, -1, 0)  # (N, R, K) \times (K, M) -> (N, R, M) -> (M, N, R)
    dm_bias = np.repeat(dm_bias, tree.num_fl_leaves, axis=0).reshape(-1, tree.num_fl_leaves, 1)  # (M,) -> (M, N, 1)
    dm_in_x = np.concatenate((dm_bias, dm_in_x), axis=-1)  # (M, N, R)+(M, N, 1) -> (M, N, R+1)
    dm_in_x = dm_in_x.reshape(-1, dm_in_x.shape[-1])  # (M, N, R+1) -> (M \times N, R+1)
#     print(dm_in_x.shape)
#     print(fl_weights.shape)
    return np.concatenate((fl_weights, dm_in_x), axis=0)
    
for i in range(1, n+1):
#         print(il_confs[EnvName]["learner_args"])
    model_path = il_confs[EnvName]["learner_args"]["model_path"]+str(i)
    device = torch.device(il_confs[EnvName]["learner_args"]["device"])
    tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
    tree.load_model(model_path)
    all_weights = get_all_weights(tree)
    print(all_weights.shape)

    weights_list.append(all_weights)
    
tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
random_weights = get_all_weights(tree)

similarity_score=[]
random_similarity_score=[]

# loop through all possible pairs
for i in range(1, n):
    for j in range(i+1, n+1):
        similarity_score.append(difference_metric(weights_list[i-1], weights_list[j-1]))
        
for i in range(1, n+1):       
    random_similarity_score.append(difference_metric(weights_list[i-1], random_weights))

print(np.mean(similarity_score))      
print(np.mean(random_similarity_score))   

CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 1, 'decision_depth': 2, 'input_dim': 4, 'output_dim': 2, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'epochs': 80, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0, 'model_path': '../data/cdt/model/cartpole/il_model', 'log_path': '../data/cdt/log/cartpole/il_log'}
(7, 5)
CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 1, 'decision_depth': 2, 'input_dim': 4, 'output_dim': 2, 'lr': 0.001, 'weight_decay': 0.0, 'batch_size': 1280, 'exp_scheduler_gamma': 1.0, 'device': 'cuda', 'epochs': 80, 'log_interval': 100, 'greatest_path_probability': 1, 'beta_fl': 0, 'beta_dc': 0, 'model_path': '../data/cdt/model/cartpole/il_model', 'log_path': '../data/cdt/log/cartpole/il_log'}
(7, 5)
CDT parameters:  {'num_intermediate_variables': 2, 'feature_learning_depth': 1, 'decision_depth': 2, 'input_dim': 4, 'output_dim

### LunarLander-v2

In [None]:
import os
import json
import numpy as np
import torch

EnvName = 'LunarLander-v2'
m = 'cdt'
n = 5 # number of runs

conf_path = '../src/'+m+'/'+m+'_il_train.json'
with open(conf_path, "r") as read_file:
    il_confs = json.load(read_file)  # hyperparameters for il training
#         print(il_confs)

weights_list = []

def get_all_weights(tree):
    f_weights = tree.get_feature_weights()
#     print(f_weights.shape)
    f_weights = np.moveaxis(f_weights, 1, -1)  # (N, K, R) -> (N, R, K)
#     print(f_weights.shape)
    fl_weights, dm_weights = tree.get_tree_weights(Bias=True)    
    dm_bias = dm_weights[:, 0]
    dm_weights_no_bias = np.moveaxis(dm_weights[:, 1:], 0, 1)  # (M, K) -> (K, M)
#     print(dm_weights_no_bias.shape, f_weights.shape)
    dm_in_x = np.moveaxis(f_weights@dm_weights_no_bias, -1, 0)  # (N, R, K) \times (K, M) -> (N, R, M) -> (M, N, R)
    dm_bias = np.repeat(dm_bias, tree.num_fl_leaves, axis=0).reshape(-1, tree.num_fl_leaves, 1)  # (M,) -> (M, N, 1)
    dm_in_x = np.concatenate((dm_bias, dm_in_x), axis=-1)  # (M, N, R)+(M, N, 1) -> (M, N, R+1)
    dm_in_x = dm_in_x.reshape(-1, dm_in_x.shape[-1])  # (M, N, R+1) -> (M \times N, R+1)
#     print(dm_in_x.shape)
#     print(fl_weights.shape)
    return np.concatenate((fl_weights, dm_in_x), axis=0)
    
for i in range(1, n+1):
#         print(il_confs[EnvName]["learner_args"])
    model_path = il_confs[EnvName]["learner_args"]["model_path"]+str(i)
    device = torch.device(il_confs[EnvName]["learner_args"]["device"])
    tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
    tree.load_model(model_path)
    all_weights = get_all_weights(tree)
    print(all_weights.shape)

    weights_list.append(all_weights)
    
tree = CDT(il_confs[EnvName]["learner_args"]).to(device)
random_weights = get_all_weights(tree)

similarity_score=[]
random_similarity_score=[]
heuristic_similarity_score=[]

# loop through all possible pairs
for i in range(1, n):
    for j in range(i+1, n+1):
        similarity_score.append(difference_metric(weights_list[i-1], weights_list[j-1]))
        
for i in range(1, n+1):       
    random_similarity_score.append(difference_metric(weights_list[i-1], random_weights))
    heuristic_similarity_score.append(difference_metric(weights_list[i-1], nodes_in_heuristic_tree))

print(np.mean(similarity_score))      
print(np.mean(random_similarity_score))   
print(np.mean(heuristic_similarity_score))