# Imports

In [2]:
config = {
    'dhdt': {
        'depth': 3,
        'learning_rate': 0.01,#1e-3,
        
        'initializer': 'he_normal', #GlorotUniform
        'initializer_index': 'ones', #GlorotUniform
        
        
        'loss': 'binary_crossentropy',#'mae',
        'optimizer': 'adam',        
        
        'beta_1': 10,
        'beta_2': 50,
        
        'activation': 'tanh', #sigmoid
        'squeeze_factor': 1,
        
        'batch_size': 512,
        'epochs': 1_000,
        'early_stopping_epochs': 50,
    },
    
    
    
    'make_classification': {
        'number_of_variables': 10,
        'n_samples': 5_000,
        'num_eval': 30,
    },

    'computation': {
        'random_seed': 42,
        'trials': 5,
        'n_jobs': 30,
        'verbosity': 0,
    },
}



In [3]:
import numpy as np

import sklearn
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, LabelEncoder, OrdinalEncoder

from livelossplot import PlotLosses

import os
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

from IPython.display import Image
from IPython.display import display, clear_output

import pandas as pd

os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = '' #'true'

import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import logging

import tensorflow as tf
import tensorflow_addons as tfa

tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(3)

np.seterr(all="ignore")

from keras import backend as K
from keras.utils.generic_utils import get_custom_objects


import seaborn as sns
sns.set_style("darkgrid")

import time
import random

from utilities.utilities import *
from utilities.DHDT import *

from joblib import Parallel, delayed

from itertools import product
from collections.abc import Iterable

from copy import deepcopy

tf.random.set_seed(config['computation']['random_seed'])
np.random.seed(config['computation']['random_seed'])
random.seed(config['computation']['random_seed'])

# Evaluation

## make_classification

In [5]:
split_values = tf.Variable(tf.keras.initializers.get({'class_name': 'RandomNormal', 'config': {'seed': 42}})(shape=(7, 5)),
                          trainable=True,
                          name='split_values')
split_values

<tf.Variable 'split_values:0' shape=(7, 5) dtype=float32, numpy=
array([[ 0.10033574,  0.04636555,  0.05786117, -0.05099785, -0.02391306],
       [-0.08597916, -0.01263373, -0.00267067,  0.0417807 ,  0.07248369],
       [ 0.08227255, -0.0294264 ,  0.03235679,  0.03987736, -0.0173113 ],
       [-0.07950286,  0.05116254, -0.08028042,  0.0457987 ,  0.02673771],
       [ 0.03928551,  0.04569229, -0.00315807,  0.08293498, -0.04713498],
       [-0.00194597,  0.00211207, -0.02013346, -0.13587832,  0.02131949],
       [-0.0222547 ,  0.03230785,  0.13138242, -0.02288633, -0.06248237]],
      dtype=float32)>

In [14]:
split_index = tf.Variable(tf.keras.initializers.get({'class_name': 'RandomNormal', 'config': {'seed': 42}})(shape=(7, 5)),
                          trainable=True,
                          name='split_values')
split_index = tfa.seq2seq.hardmax(split_index)
split_index

<tf.Tensor: shape=(7, 5), dtype=float32, numpy=
array([[0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)>

In [25]:
entry = tf.constant([[0.1,0.2,0.3,0.4,0.5], [0.9,0.2,0.9,0.4,0.5]])

In [26]:
split_values*split_index

<tf.Tensor: shape=(7, 5), dtype=float32, numpy=
array([[ 0.        ,  0.        ,  0.        , -0.05099785, -0.        ],
       [-0.        , -0.        , -0.00267067,  0.        ,  0.        ],
       [ 0.08227255, -0.        ,  0.        ,  0.        , -0.        ],
       [-0.        ,  0.05116254, -0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -0.        ,  0.        , -0.04713498],
       [-0.00194597,  0.        , -0.        , -0.        ,  0.        ],
       [-0.        ,  0.03230785,  0.        , -0.        , -0.        ]],
      dtype=float32)>

In [27]:
tf.reduce_sum(split_values*split_index, axis=1)

<tf.Tensor: shape=(7,), dtype=float32, numpy=
array([-0.05099785, -0.00267067,  0.08227255,  0.05116254, -0.04713498,
       -0.00194597,  0.03230785], dtype=float32)>

In [31]:
entry

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[0.1, 0.2, 0.3, 0.4, 0.5],
       [0.9, 0.2, 0.9, 0.4, 0.5]], dtype=float32)>

In [32]:
split_index

<tf.Tensor: shape=(7, 5), dtype=float32, numpy=
array([[0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)>

In [40]:
tf.expand_dims(split_index, 1)

<tf.Tensor: shape=(7, 2, 5), dtype=float32, numpy=
array([[[0. , 0. , 0. , 0.4, 0. ],
        [0. , 0. , 0. , 0.4, 0. ]],

       [[0. , 0. , 0.3, 0. , 0. ],
        [0. , 0. , 0.9, 0. , 0. ]],

       [[0.1, 0. , 0. , 0. , 0. ],
        [0.9, 0. , 0. , 0. , 0. ]],

       [[0. , 0.2, 0. , 0. , 0. ],
        [0. , 0.2, 0. , 0. , 0. ]],

       [[0. , 0. , 0. , 0. , 0.5],
        [0. , 0. , 0. , 0. , 0.5]],

       [[0.1, 0. , 0. , 0. , 0. ],
        [0.9, 0. , 0. , 0. , 0. ]],

       [[0. , 0.2, 0. , 0. , 0. ],
        [0. , 0.2, 0. , 0. , 0. ]]], dtype=float32)>

In [105]:
tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2)

<tf.Tensor: shape=(7, 2), dtype=float32, numpy=
array([[0.4, 0.4],
       [0.3, 0.9],
       [0.1, 0.9],
       [0.2, 0.2],
       [0.5, 0.5],
       [0.1, 0.9],
       [0.2, 0.2]], dtype=float32)>

In [106]:
tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2)

<tf.Tensor: shape=(2, 7), dtype=float32, numpy=
array([[0.4, 0.3, 0.1, 0.2, 0.5, 0.1, 0.2],
       [0.4, 0.9, 0.9, 0.2, 0.5, 0.9, 0.2]], dtype=float32)>

In [108]:
tf.reduce_sum(split_values*split_index, axis=1)

<tf.Tensor: shape=(7,), dtype=float32, numpy=
array([-0.05099785, -0.00267067,  0.08227255,  0.05116254, -0.04713498,
       -0.00194597,  0.03230785], dtype=float32)>

In [113]:
tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2) - tf.reduce_sum(split_values*split_index, axis=1)

<tf.Tensor: shape=(2, 7), dtype=float32, numpy=
array([[0.45099786, 0.3026707 , 0.01772745, 0.14883746, 0.547135  ,
        0.10194597, 0.16769215],
       [0.45099786, 0.9026706 , 0.81772745, 0.14883746, 0.547135  ,
        0.90194595, 0.16769215]], dtype=float32)>

In [114]:
#WORKING OTHER SHAPE (NEW!!#
tf.round(tf.sigmoid(tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2) - tf.reduce_sum(split_values*split_index, axis=1) - 0.5))


<tf.Tensor: shape=(2, 7), dtype=float32, numpy=
array([[0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 1., 1., 0.]], dtype=float32)>

In [111]:
#WORKING#
tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5))


<tf.Tensor: shape=(7, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [0., 1.],
       [0., 0.],
       [1., 1.],
       [0., 1.],
       [0., 0.]], dtype=float32)>

In [54]:
path_result_complete_numpy = path_result_complete.numpy()
path_result_complete_numpy

array([[0., 0.],
       [0., 1.],
       [0., 1.],
       [0., 0.],
       [1., 1.],
       [0., 1.],
       [0., 0.]], dtype=float32)

In [57]:
path_result_complete_numpy[[1,3]]

array([[0., 1.],
       [0., 0.]], dtype=float32)

In [77]:
internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5))
display(internal_node_result_complete)

paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

for path in paths:
    path_results_complete = tf.gather_nd(path_result_complete, [[node] for node in path])
    display(path_results_complete)

<tf.Tensor: shape=(7, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [0., 1.],
       [0., 0.],
       [1., 1.],
       [0., 1.],
       [0., 0.]], dtype=float32)>

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [0., 0.]], dtype=float32)>

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [1., 1.]], dtype=float32)>

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [0., 1.]], dtype=float32)>

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [0., 0.]], dtype=float32)>

In [69]:
internal_node_result_complete

<tf.Tensor: shape=(7, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [0., 1.],
       [0., 0.],
       [1., 1.],
       [0., 1.],
       [0., 0.]], dtype=float32)>

In [70]:
begin_idx

7

In [71]:
end_idx

15

In [76]:
begin_idx = 1
end_idx = 3


<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0., 1.],
       [0., 1.]], dtype=float32)>

In [121]:
internal_node_result_complete

<tf.Tensor: shape=(2, 7), dtype=float32, numpy=
array([[0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 1., 1., 0.]], dtype=float32)>

In [123]:
tf.cast(1-internal_node_result_complete, tf.int32)

<tf.Tensor: shape=(2, 7), dtype=int32, numpy=
array([[1, 1, 1, 1, 0, 1, 1],
       [1, 0, 0, 1, 0, 0, 1]], dtype=int32)>

In [132]:
internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2) - tf.reduce_sum(split_values*split_index, axis=1) - 0.5))
tf.concat([tf.expand_dims(internal_node_result_complete, 2), tf.expand_dims(1-internal_node_result_complete, 2)], axis=2) 

<tf.Tensor: shape=(2, 7, 2), dtype=float32, numpy=
array([[[0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.]]], dtype=float32)>

In [140]:
path_results_complete[0]

<tf.Tensor: shape=(2, 4, 2), dtype=float32, numpy=
array([[[0., 1.],
        [0., 0.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [0., 0.],
        [0., 0.],
        [0., 1.]]], dtype=float32)>

In [141]:
path_results_complete[1]

<tf.Tensor: shape=(2, 4, 2), dtype=float32, numpy=
array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]], dtype=float32)>

In [148]:
path_results_complete_layer

<tf.Tensor: shape=(2, 4, 2), dtype=float32, numpy=
array([[[0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.]]], dtype=float32)>

In [147]:
tf.split(path_results_complete_layer, 2)

[<tf.Tensor: shape=(1, 4, 2), dtype=float32, numpy=
 array([[[0., 1.],
         [1., 0.],
         [0., 1.],
         [0., 1.]]], dtype=float32)>,
 <tf.Tensor: shape=(1, 4, 2), dtype=float32, numpy=
 array([[[0., 1.],
         [1., 0.],
         [1., 0.],
         [0., 1.]]], dtype=float32)>]

In [151]:
internal_node_result_complete

<tf.Tensor: shape=(2, 7), dtype=float32, numpy=
array([[0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 1., 1., 0.]], dtype=float32)>

In [161]:
path_results_complete

<tf.Tensor: shape=(2, 2, 2, 3), dtype=float32, numpy=
array([[[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 1.],
         [0., 1., 1.]]],


       [[[0., 1., 1.],
         [0., 0., 1.]],

        [[0., 0., 0.],
         [0., 0., 0.]]]], dtype=float32)>

In [159]:
entry = tf.constant([[0.1,0.2,0.3,0.4,0.5], [0.9,0.2,0.9,0.4,0.5], [0.9,0.9,0.5,0.4,0.5]])b

In [171]:
entry = tf.constant([[0.1,0.2,0.3,0.4,0.5]])

In [170]:
print('B')

B


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.]], dtype=float32)>

In [186]:
path_results_complete_layer

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.]], dtype=float32)>

In [191]:
path_results_complete_layer

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.],
       [0.]], dtype=float32)>

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.],
       [0.]], dtype=float32)>

In [202]:
path_results_complete_layer[1::2]

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[1.],
       [0.]], dtype=float32)>

In [204]:
tf.constant([0,1]*2)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 1, 0, 1], dtype=int32)>

In [205]:
path_results_complete

<tf.Tensor: shape=(2, 2, 2, 1), dtype=float32, numpy=
array([[[[0.],
         [0.]],

        [[0.],
         [0.]]],


       [[[1.],
         [0.]],

        [[1.],
         [0.]]]], dtype=float32)>

In [230]:
tf.reshape(tf.constant([[1,2],[2,3]]), [4])

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([1, 2, 2, 3], dtype=int32)>

In [227]:
tf.keras.layers.Flatten()()

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 2],
       [2, 3]], dtype=int32)>

In [234]:
2 ** (layer_idx + 1)

2

In [245]:
r1 = tf.multiply([[0], [1]], [4,5,6,7])
r1

<tf.Tensor: shape=(2, 4), dtype=int32, numpy=
array([[0, 0, 0, 0],
       [4, 5, 6, 7]], dtype=int32)>

In [247]:
r2 = tf.multiply([[[0], [1]],[[1],[0]]], r1)
r2

<tf.Tensor: shape=(2, 2, 4), dtype=int32, numpy=
array([[[0, 0, 0, 0],
        [4, 5, 6, 7]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=int32)>

In [None]:
tf.multiply(

In [251]:
tf.squeeze(tf.stack([internal_node_result_complete[0:1,:], 1-internal_node_result_complete[0:1,:]]), 1)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.],
       [1.]], dtype=float32)>

In [259]:
tf.reshape(tf.stack([path_results_complete, 1-path_results_complete]), (,-1,-1))

InvalidArgumentError: Only one input size may be -1, not both 0 and 1 [Op:Reshape]

In [262]:
tf.stack([path_results_complete, 1-path_results_complete])

<tf.Tensor: shape=(2, 2, 2, 1), dtype=float32, numpy=
array([[[[0.],
         [0.]],

        [[0.],
         [1.]]],


       [[[1.],
         [1.]],

        [[1.],
         [0.]]]], dtype=float32)>

In [275]:
path_results_complete

<tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
array([[[0.],
        [0.]],

       [[1.],
        [1.]]], dtype=float32)>

In [280]:
path_results_complete_layer

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[0.],
       [1.],
       [0.],
       [0.]], dtype=float32)>

In [282]:
tf.concat([path_results_complete_layer, 1-path_results_complete_layer], 1)

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.]], dtype=float32)>

In [279]:
tf.concat([path_results_complete, 1-path_results_complete], 2)

<tf.Tensor: shape=(2, 2, 2), dtype=float32, numpy=
array([[[0., 1.],
        [0., 1.]],

       [[1., 0.],
        [1., 0.]]], dtype=float32)>

In [286]:
path_results_complete

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.]], dtype=float32)>

In [300]:
path_results_complete = internal_node_result_complete[0:1,:]
path_results_complete

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 0., 0.]], dtype=float32)>

In [298]:
path_results_complete

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [303]:
tf.transpose(tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5)))


<tf.Tensor: shape=(3, 7), dtype=float32, numpy=
array([[0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 1.]], dtype=float32)>

In [308]:
path_results_complete_extended

<tf.Tensor: shape=(7, 1, 2), dtype=float32, numpy=
array([[[0., 1.]],

       [[0., 1.]],

       [[0., 1.]],

       [[0., 1.]],

       [[1., 0.]],

       [[0., 1.]],

       [[0., 1.]]], dtype=float32)>

In [309]:
path_results_complete_layer_extended

<tf.Tensor: shape=(7, 4), dtype=float32, numpy=
array([[0., 0., 1., 1.],
       [1., 1., 0., 0.],
       [1., 1., 0., 0.],
       [0., 1., 1., 0.],
       [1., 1., 0., 0.],
       [1., 1., 0., 0.],
       [0., 1., 1., 0.]], dtype=float32)>

In [313]:
tf.multiply(path_results_complete_extended, tf.expand_dims(path_results_complete_layer_extended, 2))
            

<tf.Tensor: shape=(7, 4, 2), dtype=float32, numpy=
array([[[0., 0.],
        [0., 0.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 0.]],

       [[0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 1.],
        [0., 1.],
        [0., 0.]],

       [[1., 0.],
        [1., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 1.],
        [0., 1.],
        [0., 0.]]], dtype=float32)>

In [315]:
path_results_complete_layer_extended

<tf.Tensor: shape=(7, 0), dtype=float32, numpy=array([], shape=(7, 0), dtype=float32)>

In [316]:
path_results_complete

<tf.Tensor: shape=(7, 4, 2), dtype=float32, numpy=
array([[[0., 0.],
        [0., 0.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 0.]],

       [[0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 1.],
        [0., 1.],
        [0., 0.]],

       [[1., 0.],
        [1., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 1.],
        [0., 1.],
        [0., 0.]]], dtype=float32)>

In [320]:
internal_node_result_complete

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [325]:
internal_node_result_complete[0:1,:]

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 0., 0.]], dtype=float32)>

In [337]:
internal_node_result_complete[:,begin_idx:end_idx]

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0., 1., 0., 0.],
       [0., 1., 1., 0.],
       [1., 1., 1., 1.]], dtype=float32)>

In [334]:
path_results_complete_layer_extended

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[0., 1., 0., 0., 1., 0., 1., 1.],
       [0., 1., 1., 0., 1., 0., 0., 1.],
       [1., 1., 1., 1., 0., 0., 0., 0.]], dtype=float32)>

In [342]:
path_results_complete_layer

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0., 1., 0., 0.],
       [0., 1., 1., 0.],
       [1., 1., 1., 1.]], dtype=float32)>

In [341]:
tf.concat([path_results_complete_layer, 1-path_results_complete_layer], 1)

<tf.Tensor: shape=(3, 8), dtype=float32, numpy=
array([[0., 1., 0., 0., 1., 0., 1., 1.],
       [0., 1., 1., 0., 1., 0., 0., 1.],
       [1., 1., 1., 1., 0., 0., 0., 0.]], dtype=float32)>

In [352]:
internal_node_result_complete

<tf.Tensor: shape=(3, 7), dtype=float32, numpy=
array([[0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 1.]], dtype=float32)>

In [359]:
internal_node_result_complete[:,0:1]

<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.]], dtype=float32)>

In [360]:
internal_node_result_complete[:,1:3]

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [1., 1.],
       [1., 1.]], dtype=float32)>

In [350]:
l1 = internal_node_result_complete[:,0:1]*internal_node_result_complete[:,1:3]
l1

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 0.],
       [0., 0.]], dtype=float32)>

In [351]:
r1 = (1-internal_node_result_complete[:,0:1])*internal_node_result_complete[:,1:3]
r1

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [1., 1.],
       [1., 1.]], dtype=float32)>

In [363]:
internal_node_result_complete[:,1:3]

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [1., 1.],
       [1., 1.]], dtype=float32)>

In [364]:
internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5))
internal_node_result_complete

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [369]:
l1 = internal_node_result_complete[1:3,:][::2]
display(l1)
r1 = internal_node_result_complete[1:3,:][1::2]
display(r1)

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 1., 1.]], dtype=float32)>

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 1., 1.]], dtype=float32)>

In [378]:
l0 = internal_node_result_complete[0:1,:] 
l0

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 0., 0.]], dtype=float32)>

In [377]:
r0 = 1-internal_node_result_complete[0:1,:]
r0

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 1., 1.]], dtype=float32)>

In [379]:
l_0_1 = l0 * l1
l_0_1

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 0., 0.]], dtype=float32)>

In [380]:
r_0_1 = r0 * r1
r_0_1

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 1., 1.]], dtype=float32)>

In [382]:
l2 = internal_node_result_complete[3:7,:][::2]
display(l1)
r2 = internal_node_result_complete[3:7,:][1::2]
display(r1)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [0., 1., 1.]], dtype=float32)>

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [383]:
r_0_1_2 = r_0_1 * r2
r_0_1_2

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [384]:
l_0_1_2 = l_0_1 * l2
l_0_1_2

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 0., 0.]], dtype=float32)>

In [387]:
d0 = internal_node_result_complete[0:1,:]
display(d0)
d1 = internal_node_result_complete[1:3,:]
display(d1)
d2 = internal_node_result_complete[3:7,:]
display(d2)


<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 0., 0.]], dtype=float32)>

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 1., 1.],
       [0., 1., 1.]], dtype=float32)>

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [404]:
d0l_d1 = d0*tf.split(d1,2)[0]
display(d0l_d1)
d0r_d1 = (1-d0)*tf.split(d1,2)[1]
display(d0r_d1)

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 0., 0.]], dtype=float32)>

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0., 1., 1.]], dtype=float32)>

In [None]:
d1l_d2 = d0*tf.split(d1,2)[0]
display(d0l_d1)
d1r_d2 = (1-d0)*tf.split(d1,2)[1]
display(d0r_d1)

In [406]:
d2

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [413]:
(1-d2)

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[1., 1., 0.],
       [0., 0., 0.],
       [1., 0., 0.],
       [1., 1., 0.]], dtype=float32)>

In [429]:
tf.stack([d2, (1-d2)], axis=1)

<tf.Tensor: shape=(4, 2, 3), dtype=float32, numpy=
array([[[0., 0., 1.],
        [1., 1., 0.]],

       [[1., 1., 1.],
        [0., 0., 0.]],

       [[0., 1., 1.],
        [1., 0., 0.]],

       [[0., 0., 1.],
        [1., 1., 0.]]], dtype=float32)>

In [437]:
depth = 3
samples = 3

In [443]:
d1_combined = tf.reshape(tf.stack([d1, (1-d1)], axis=1), [2**(3-1),samples])
d1_combined

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0., 1., 1.],
       [1., 0., 0.],
       [0., 1., 1.],
       [1., 0., 0.]], dtype=float32)>

In [442]:
d2_combined = tf.reshape(tf.stack([d2, (1-d2)], axis=1), [2**3,samples])
d2_combined

<tf.Tensor: shape=(8, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [1., 1., 0.],
       [1., 1., 1.],
       [0., 0., 0.],
       [0., 1., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 1., 0.]], dtype=float32)>

In [480]:
d1_combined

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0., 1., 1.],
       [1., 0., 0.],
       [0., 1., 1.],
       [1., 0., 0.]], dtype=float32)>

In [494]:
d2_d1 = tf.reshape(tf.multiply(tf.split(d1_combined, 4), tf.split(d2_combined, 4)),  [2**3,samples])
d2_d1

<tf.Tensor: shape=(8, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 0.],
       [0., 1., 1.],
       [0., 0., 0.],
       [0., 0., 0.],
       [1., 0., 0.]], dtype=float32)>

In [495]:
#d2_d1 = tf.reshape(d1_combined * tf.split(d2_combined, 2),  [2**3,samples])
#d2_d1

In [496]:
d0_combined = tf.reshape(tf.stack([d0, (1-d0)], axis=1), [2**(3-2),samples])
d0_combined

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [1., 1., 1.]], dtype=float32)>

In [497]:
d2_d1_d0 = tf.reshape(tf.multiply(tf.split(d0_combined, 2), tf.split(d2_d1, 2)),  [2**3,samples])
d2_d1_d0


<tf.Tensor: shape=(8, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 1., 1.],
       [0., 0., 0.],
       [0., 0., 0.],
       [1., 0., 0.]], dtype=float32)>

In [473]:
d2_d1_d0 = tf.reshape(d0_combined * tf.split(d2_d1, 4),  [2**3,samples])
d2_d1_d0


<tf.Tensor: shape=(8, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [1., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [1., 0., 0.],
       [0., 0., 0.],
       [1., 0., 0.]], dtype=float32)>

In [450]:
internal_node_result_complete

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [452]:
[2**layer_idx,samples]

[2, 3]

In [453]:
layer_result

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 1., 1.],
       [0., 1., 1.]], dtype=float32)>

In [454]:
tf.stack([layer_result, (1-layer_result)], axis=1)

<tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
array([[[0., 1., 1.],
        [1., 0., 0.]],

       [[0., 1., 1.],
        [1., 0., 0.]]], dtype=float32)>

In [456]:
layer_result_combined

<tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
array([[[0., 1., 1.],
        [1., 0., 0.]],

       [[0., 1., 1.],
        [1., 0., 0.]]], dtype=float32)>

In [462]:
internal_node_result_complete[0:end_idx,:]

<tf.Tensor: shape=(0, 3), dtype=float32, numpy=array([], shape=(0, 3), dtype=float32)>

In [463]:
tf.reshape(tf.stack([internal_node_result_complete[0:1,:], (1-internal_node_result_complete[0:1,:])], axis=1), [2**1, samples])


<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [1., 1., 1.]], dtype=float32)>

# WORKING HERE

In [None]:
split_values = tf.Variable(tf.keras.initializers.get({'class_name': 'RandomNormal', 'config': {'seed': 42}})(shape=(7, 5)),
                          trainable=True,
                          name='split_values')
split_values

split_index = tf.Variable(tf.keras.initializers.get({'class_name': 'RandomNormal', 'config': {'seed': 42}})(shape=(7, 5)),
                          trainable=True,
                          name='split_values')
split_index = tfa.seq2seq.hardmax(split_index)
split_index

In [None]:
entry = tf.constant([[0.1,0.2,0.3,0.4,0.5], [0.9,0.2,0.9,0.4,0.5], [0.9,0.9,0.5,0.4,0.5]])

In [507]:
#internal_node_result_complete = tf.transpose(tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5)))
internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5))


display(internal_node_result_complete)

paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

begin_idx = 0
end_idx = 1

#path_results_complete_layer_laeft = 1
#path_results_complete_layer_right = 1

layer_result = internal_node_result_complete[begin_idx:end_idx,:]

layer_result_combined = tf.reshape(tf.stack([layer_result, (1-layer_result)], axis=1), [2**1, samples])

path_results_complete = layer_result_combined

begin_idx = end_idx
end_idx = begin_idx + 2 ** (0 + 1)

print('___________________')
print(path_results_complete)
print('___________________')
for layer_idx in range(1, 3):
    print('layer_idx', layer_idx)
    layer_result = internal_node_result_complete[begin_idx:end_idx,:]
    
    layer_result_combined = tf.stack([layer_result, (1-layer_result)], axis=1)
    layer_result_combined = tf.reshape(layer_result_combined, [2**(layer_idx+1),samples])
    
    path_results_complete = tf.reshape(tf.multiply(tf.split(path_results_complete, 2**(layer_idx)), tf.split(layer_result_combined, 2**(layer_idx))),  [2**(layer_idx+1),samples])
    
    begin_idx = end_idx
    end_idx = begin_idx + 2 ** (layer_idx + 1)
    
    print('path_results_complete', path_results_complete)
    print('___________________')
    

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

___________________
tf.Tensor(
[[0. 0. 0.]
 [1. 1. 1.]], shape=(2, 3), dtype=float32)
___________________
layer_idx 1
path_results_complete tf.Tensor(
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 1. 1.]
 [1. 0. 0.]], shape=(4, 3), dtype=float32)
___________________
layer_idx 2
path_results_complete tf.Tensor(
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 1. 1.]
 [0. 0. 0.]
 [0. 0. 0.]
 [1. 0. 0.]], shape=(8, 3), dtype=float32)
___________________


In [510]:
leaf_classes_array = tf.Variable(tf.keras.initializers.get({'class_name': 'RandomUniform', 'config': {'seed': 42}})(shape=(8,)),
                                      trainable=True,
                                      name='leaf_classes_array')
leaf_classes_array

<tf.Variable 'leaf_classes_array:0' shape=(8,) dtype=float32, numpy=
array([ 0.03001893, -0.01201599, -0.0346938 , -0.04343851,  0.01601556,
        0.02921749,  0.03235624, -0.00659348], dtype=float32)>

In [513]:
path_results_complete

<tf.Tensor: shape=(8, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 1., 1.],
       [0., 0., 0.],
       [0., 0., 0.],
       [1., 0., 0.]], dtype=float32)>

In [521]:
tf.reduce_sum(tf.transpose(path_results_complete)*leaf_classes_array, axis=1)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([-0.00659348,  0.01601556,  0.01601556], dtype=float32)>

In [519]:
tf.reduce_sum(path_results_complete*tf.expand_dims(leaf_classes_array, 1), axis=0)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([-0.00659348,  0.01601556,  0.01601556], dtype=float32)>

In [418]:
tf.multiply(tf.expand_dims(d2, 1), (1-d2))

<tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy=
array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[1., 1., 0.],
        [0., 0., 0.],
        [1., 0., 0.],
        [1., 1., 0.]],

       [[0., 1., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 1., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]], dtype=float32)>

In [395]:
internal_node_result_complete

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

In [400]:
range(0, 3)

TypeError: range() takes no keyword arguments

In [394]:
#internal_node_result_complete = tf.transpose(tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5)))
internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5))


display(internal_node_result_complete)

paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

begin_idx = 0
end_idx = 1

#path_results_complete_layer_laeft = 1
#path_results_complete_layer_right = 1

print('___________________')
print(path_results_complete)
print('___________________')
for layer_idx in range(0, 3):
    
    internal_node_result_complete[begin_idx:end_idx,:]
    
    
    begin_idx = end_idx
    end_idx = begin_idx + 2 ** (layer_idx + 1)
    
    print('path_results_complete', path_results_complete)
    print('___________________')
    

<tf.Tensor: shape=(7, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.]], dtype=float32)>

___________________
tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________
path_results_complete tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________
path_results_complete tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________
path_results_complete tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________


In [None]:
d0 

In [None]:
 =aaa

In [358]:
internal_node_result_complete[:,3:7]

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0., 1., 0., 0.],
       [0., 1., 1., 0.],
       [1., 1., 1., 1.]], dtype=float32)>

In [357]:
l1*internal_node_result_complete[:,3:7]

InvalidArgumentError: Incompatible shapes: [3,2] vs. [3,4] [Op:Mul]

In [344]:
#internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2) - tf.reduce_sum(split_values*split_index, axis=1) - 0.5))
#internal_node_result_complete_expanded = tf.concat([tf.expand_dims(internal_node_result_complete, 2), tf.expand_dims(1-internal_node_result_complete, 2)], axis=2) 
internal_node_result_complete = tf.transpose(tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5)))


display(internal_node_result_complete)

paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

begin_idx = 0
end_idx = 1

#path_results_complete_layer_laeft = 1
#path_results_complete_layer_right = 1

path_results_complete = internal_node_result_complete[:,0:1]
print('___________________')
print(path_results_complete)
print('___________________')
for layer_idx in range(0, 2):
    begin_idx = end_idx
    end_idx = begin_idx + 2 ** (layer_idx + 1)
    
    print('path_results_complete', path_results_complete)
    print('___________________')
    

<tf.Tensor: shape=(3, 7), dtype=float32, numpy=
array([[0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 1.]], dtype=float32)>

___________________
tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________
path_results_complete tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________
path_results_complete tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________


In [331]:
#internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2) - tf.reduce_sum(split_values*split_index, axis=1) - 0.5))
#internal_node_result_complete_expanded = tf.concat([tf.expand_dims(internal_node_result_complete, 2), tf.expand_dims(1-internal_node_result_complete, 2)], axis=2) 
internal_node_result_complete = tf.transpose(tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5)))


display(internal_node_result_complete)

paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

begin_idx = 0
end_idx = 1

#path_results_complete_layer_laeft = 1
#path_results_complete_layer_right = 1

path_results_complete = internal_node_result_complete[:,0:1]
print('___________________')
print(path_results_complete)
print('___________________')
for layer_idx in range(0, 2):
    begin_idx = end_idx
    end_idx = begin_idx + 2 ** (layer_idx + 1)
    
    print(begin_idx, end_idx, layer_idx)
    #path_results_complete_layer = tf.transpose(internal_node_result_complete[begin_idx:end_idx,:])
    path_results_complete_layer = internal_node_result_complete[:,begin_idx:end_idx]
    path_results_complete_layer_extended = tf.concat([path_results_complete_layer, 1-path_results_complete_layer], 1)
    print('path_results_complete_layer_extended', path_results_complete_layer_extended)
    
    path_results_complete_extended = tf.expand_dims(tf.concat([path_results_complete, 1-path_results_complete], 1), 1)
    print('path_results_complete_extended', path_results_complete_extended)
    #path_results_complete_layer_left, path_results_complete_layer_right = tf.split(path_results_complete_layer, 2)
    #path_results_complete_layer_left = path_results_complete_layer[0::2] 
    #path_results_complete_layer_right = path_results_complete_layer[1::2]
        
    #print(path_results_complete_layer_left)
    #print(path_results_complete_layer_right)    
    #path_results_complete = tf.stack([path_results_complete*path_results_complete_layer_left, (1-path_results_complete)*path_results_complete_layer_right])
   # print('path_results_complete_layer', path_results_complete_layer)

    path_results_complete = tf.multiply(path_results_complete_extended, tf.expand_dims(path_results_complete_layer_extended, 2))

    #tf.multiply(path_results_complete_extended, path_results_complete_layer_extended)
    
    #print('tf.squeeze(tf.stack([path_results_complete, (1-path_results_complete)])', tf.reshape(tf.stack([path_results_complete, (1-path_results_complete)]), [2 ** (layer_idx + 1)]))
    #path_results_complete = path_results_complete_layer * tf.reshape(tf.stack([path_results_complete, (1-path_results_complete)]), [2 ** (layer_idx + 1)])
    
    print('path_results_complete', path_results_complete)
    print('___________________')
    

<tf.Tensor: shape=(3, 7), dtype=float32, numpy=
array([[0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 1.]], dtype=float32)>

___________________
tf.Tensor(
[[0.]
 [0.]
 [0.]], shape=(3, 1), dtype=float32)
___________________
1 3 0
path_results_complete_layer_extended tf.Tensor(
[[0. 0. 1. 1.]
 [1. 1. 0. 0.]
 [1. 1. 0. 0.]], shape=(3, 4), dtype=float32)
path_results_complete_extended tf.Tensor(
[[[0. 1.]]

 [[0. 1.]]

 [[0. 1.]]], shape=(3, 1, 2), dtype=float32)
path_results_complete tf.Tensor(
[[[0. 0.]
  [0. 0.]
  [0. 1.]
  [0. 1.]]

 [[0. 1.]
  [0. 1.]
  [0. 0.]
  [0. 0.]]

 [[0. 1.]
  [0. 1.]
  [0. 0.]
  [0. 0.]]], shape=(3, 4, 2), dtype=float32)
___________________
3 7 1
path_results_complete_layer_extended tf.Tensor(
[[0. 1. 0. 0. 1. 0. 1. 1.]
 [0. 1. 1. 0. 1. 0. 0. 1.]
 [1. 1. 1. 1. 0. 0. 0. 0.]], shape=(3, 8), dtype=float32)
path_results_complete_extended tf.Tensor(
[[[[0. 0.]
   [0. 0.]
   [0. 1.]
   [0. 1.]
   [1. 1.]
   [1. 1.]
   [1. 0.]
   [1. 0.]]]


 [[[0. 1.]
   [0. 1.]
   [0. 0.]
   [0. 0.]
   [1. 0.]
   [1. 0.]
   [1. 1.]
   [1. 1.]]]


 [[[0. 1.]
   [0. 1.]
   [0. 0.]
   [0. 0.]
   [1. 0.]


In [174]:
#internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2) - tf.reduce_sum(split_values*split_index, axis=1) - 0.5))
#internal_node_result_complete_expanded = tf.concat([tf.expand_dims(internal_node_result_complete, 2), tf.expand_dims(1-internal_node_result_complete, 2)], axis=2) 
internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(tf.expand_dims(split_index, 1)*entry, axis=2) - tf.expand_dims(tf.reduce_sum(split_values*split_index, axis=1), 1) - 0.5))


display(internal_node_result_complete)

paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

begin_idx = 0
end_idx = 1

path_results_complete = 1
print('___________________')
print('___________________')
for layer_idx in range(3):
    #path_results_complete_layer = tf.transpose(internal_node_result_complete[begin_idx:end_idx,:])
    path_results_complete_layer = internal_node_result_complete[begin_idx:end_idx,:]
    
    if layer_idx == 0:
        path_results_complete = path_results_complete * path_results_complete_layer
        print('path_results_complete', path_results_complete)
        print('___________________')
    else:    
        path_results_complete_layer_left, path_results_complete_layer_right = tf.split(path_results_complete_layer, 2)
        print('path_results_complete_layer', path_results_complete_layer)
        print('path_results_complete_layer_left', path_results_complete_layer_left)
        print('path_results_complete_layer_right', path_results_complete_layer_right)
        
        print('left', path_results_complete*path_results_complete_layer_left)
        print('right', (1-path_results_complete)*path_results_complete_layer_right)
        path_results_complete = tf.stack([path_results_complete*path_results_complete_layer_left, (1-path_results_complete)*path_results_complete_layer_right])
        print('path_results_complete', path_results_complete)
        print('___________________')
    begin_idx = end_idx
    end_idx = begin_idx + 2 ** (layer_idx + 1)    

<tf.Tensor: shape=(7, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.]], dtype=float32)>

___________________
___________________
path_results_complete tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
___________________
path_results_complete_layer tf.Tensor(
[[0.]
 [0.]], shape=(2, 1), dtype=float32)
path_results_complete_layer_left tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
path_results_complete_layer_right tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
left tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
right tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
path_results_complete tf.Tensor(
[[[0.]]

 [[0.]]], shape=(2, 1, 1), dtype=float32)
___________________
path_results_complete_layer tf.Tensor(
[[0.]
 [1.]
 [0.]
 [0.]], shape=(4, 1), dtype=float32)
path_results_complete_layer_left tf.Tensor(
[[0.]
 [1.]], shape=(2, 1), dtype=float32)
path_results_complete_layer_right tf.Tensor(
[[0.]
 [0.]], shape=(2, 1), dtype=float32)
left tf.Tensor(
[[[0.]
  [0.]]

 [[0.]
  [0.]]], shape=(2, 2, 1), dtype=float32)
right tf.Tensor(
[[[0.]
  [0.]]

 [[0.]
  [0.]]], shape=(2, 2, 1), dt

In [146]:
internal_node_result_complete = tf.round(tf.sigmoid(tf.reduce_sum(split_index*tf.expand_dims(entry, 1), axis=2) - tf.reduce_sum(split_values*split_index, axis=1) - 0.5))
internal_node_result_complete_expanded = tf.concat([tf.expand_dims(internal_node_result_complete, 2), tf.expand_dims(1-internal_node_result_complete, 2)], axis=2) 

display(internal_node_result_complete_expanded)

paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

begin_idx = 0
end_idx = 1

path_results_complete = 1
for layer_idx in range(3):
    #path_results_complete_layer = tf.transpose(internal_node_result_complete[begin_idx:end_idx,:])
    path_results_complete_layer = internal_node_result_complete_expanded[:,begin_idx:end_idx,:]
    
    #path_results_complete = tf.gather_nd(path_result_complete, [[node] for node in path])
    print(layer_idx, 'path_results_complete', path_results_complete)
    print(layer_idx, 'path_results_complete_layer', path_results_complete_layer)
    if layer_idx != 2:   
        path_results_complete = path_results_complete * path_results_complete_layer
    else:
        path_results_complete = tf.expand_dims(path_results_complete, 2) * path_results_complete_layer

    print(layer_idx, 'path_results_complete', path_results_complete)
    
    begin_idx = end_idx
    end_idx = begin_idx + 2 ** (layer_idx + 1)    

<tf.Tensor: shape=(2, 7, 2), dtype=float32, numpy=
array([[[0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.]],

       [[0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.]]], dtype=float32)>

0 path_results_complete 1
0 path_results_complete_layer tf.Tensor(
[[[0. 1.]]

 [[0. 1.]]], shape=(2, 1, 2), dtype=float32)
0 path_results_complete tf.Tensor(
[[[0. 1.]]

 [[0. 1.]]], shape=(2, 1, 2), dtype=float32)
1 path_results_complete tf.Tensor(
[[[0. 1.]]

 [[0. 1.]]], shape=(2, 1, 2), dtype=float32)
1 path_results_complete_layer tf.Tensor(
[[[0. 1.]
  [0. 1.]]

 [[1. 0.]
  [1. 0.]]], shape=(2, 2, 2), dtype=float32)
1 path_results_complete tf.Tensor(
[[[0. 1.]
  [0. 1.]]

 [[0. 0.]
  [0. 0.]]], shape=(2, 2, 2), dtype=float32)
2 path_results_complete tf.Tensor(
[[[0. 1.]
  [0. 1.]]

 [[0. 0.]
  [0. 0.]]], shape=(2, 2, 2), dtype=float32)
2 path_results_complete_layer tf.Tensor(
[[[0. 1.]
  [1. 0.]
  [0. 1.]
  [0. 1.]]

 [[0. 1.]
  [1. 0.]
  [1. 0.]
  [0. 1.]]], shape=(2, 4, 2), dtype=float32)
2 path_results_complete tf.Tensor(
[[[[0. 1.]
   [0. 0.]
   [0. 1.]
   [0. 1.]]

  [[0. 1.]
   [0. 0.]
   [0. 0.]
   [0. 1.]]]


 [[[0. 0.]
   [0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0.

In [62]:
path = [0,1,3]

In [63]:
tf.gather_nd(path_result_complete, [[node] for node in path])

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 1.],
       [0., 0.]], dtype=float32)>

In [24]:
tf.round(tf.sigmoid(tf.reduce_sum(entry*split_index, axis=1) - tf.reduce_sum(split_values*split_index, axis=1) - 0.5))

<tf.Tensor: shape=(7,), dtype=float32, numpy=array([0., 0., 0., 0., 1., 0., 0.], dtype=float32)>

In [None]:
tf.round(tf.sigmoid(respective_input_value - split_values*split_index - 0.5))

<tf.Tensor: shape=(7, 5), dtype=float32, numpy=
array([[0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)>

In [9]:
entry*

<tf.Tensor: shape=(7, 5), dtype=float32, numpy=
array([[ 0.10033574,  0.0927311 ,  0.17358351, -0.2039914 , -0.11956531],
       [-0.08597916, -0.02526746, -0.00801202,  0.1671228 ,  0.36241844],
       [ 0.08227255, -0.0588528 ,  0.09707038,  0.15950945, -0.08655648],
       [-0.07950286,  0.10232508, -0.24084124,  0.18319482,  0.13368854],
       [ 0.03928551,  0.09138457, -0.00947421,  0.3317399 , -0.23567489],
       [-0.00194597,  0.00422414, -0.06040039, -0.5435133 ,  0.10659747],
       [-0.0222547 ,  0.0646157 ,  0.39414728, -0.09154531, -0.31241184]],
      dtype=float32)>

In [3]:
if False:
    tf.random.set_seed(config['computation']['random_seed'])
    np.random.seed(config['computation']['random_seed'])
    random.seed(config['computation']['random_seed'])  
    
    metrics = ['accuracy', 'f1']
    
    config_test = deepcopy(config)
    #config_test['make_classification']['n_samples'] = 10_000
    #config_test['dhdt']['epochs'] = 500
    config_test['dhdt']['initializer_index'] = 'he_normal'#'ones', #GlorotUniform
    config_test['dhdt']['activation'] = 'sigmoid'

    dataset_dict = {}
    model_dict = {}

    scores_dict = {'sklearn': {},
                   'DHDT': {}}

    dataset_dict = get_preprocessed_dataset('make_classification',
                                            random_seed=config_test['computation']['random_seed'],
                                            config=config_test['make_classification'],
                                            verbosity=1)

    model_dict['sklearn'] = DecisionTreeClassifier(max_depth=3, 
                                                   random_state=config_test['computation']['random_seed'])

    model_dict['sklearn'].fit(dataset_dict['X_train'], 
                              dataset_dict['y_train'])



    model_dict['DHDT'] = DHDT(dataset_dict['X_train'].shape[1],

                                depth = config_test['dhdt']['depth'],

                                learning_rate = config_test['dhdt']['learning_rate'],
                                optimizer = config_test['dhdt']['optimizer'],

                                initializer = config_test['dhdt']['initializer'],
                                initializer_index = config_test['dhdt']['initializer_index'],                              
                              
                                beta_1 = config_test['dhdt']['beta_1'],
                                beta_2 = config_test['dhdt']['beta_2'],

                                activation = config_test['dhdt']['activation'],
                                squeeze_factor = config_test['dhdt']['squeeze_factor'],

                                loss = config_test['dhdt']['loss'],#'mae',

                                random_seed = config_test['computation']['random_seed'],
                                verbosity = 1)        


    scores_dict['history'] = model_dict['DHDT'].fit(dataset_dict['X_train'], 
                                                  dataset_dict['y_train'], 
                                                  batch_size=config_test['dhdt']['batch_size'], 
                                                  epochs=config_test['dhdt']['epochs'], 
                                                  early_stopping_epochs=config_test['dhdt']['early_stopping_epochs'], 
                                                  valid_data=(dataset_dict['X_valid'], dataset_dict['y_valid']))



    dataset_dict['y_test_dhdt'] = model_dict['DHDT'].predict(dataset_dict['X_test'])
    dataset_dict['y_valid_dhdt'] = model_dict['DHDT'].predict(dataset_dict['X_valid'])

    dataset_dict['y_test_sklearn'] = model_dict['sklearn'].predict(dataset_dict['X_test'])
    dataset_dict['y_valid_sklearn'] = model_dict['sklearn'].predict(dataset_dict['X_valid'])     
    
    for metric in metrics:
        
        if metric in ['accuracy', 'f1']:
            y_test_dhdt = np.round(dataset_dict['y_test_dhdt'])
            y_valid_dhdt = np.round(dataset_dict['y_valid_dhdt'])
            y_test_sklearn = np.round(dataset_dict['y_test_sklearn'])
            y_valid_sklearn = np.round(dataset_dict['y_valid_sklearn'])         
        else:
            y_test_dhdt = dataset_dict['y_test_dhdt']
            y_valid_dhdt = dataset_dict['y_valid_dhdt']
            y_test_sklearn = dataset_dict['y_test_sklearn']
            y_valid_sklearn =    dataset_dict['y_valid_sklearn']                
        
        scores_dict['sklearn'][metric + '_test'] = sklearn.metrics.get_scorer(metric)._score_func(dataset_dict['y_test'], y_test_sklearn)
        scores_dict['DHDT'][metric + '_test'] = sklearn.metrics.get_scorer(metric)._score_func(dataset_dict['y_test'], y_test_dhdt)

        scores_dict['sklearn'][metric + '_valid'] = sklearn.metrics.get_scorer(metric)._score_func(dataset_dict['y_valid'], y_valid_sklearn)   
        scores_dict['DHDT'][metric + '_valid'] = sklearn.metrics.get_scorer(metric)._score_func(dataset_dict['y_valid'], y_valid_dhdt)


In [None]:
if True:
    parallel_eval_synthetic = Parallel(n_jobs=config['computation']['n_jobs'], verbose=3, backend='loky') #loky #sequential multiprocessing
    evaluation_results_synthetic = parallel_eval_synthetic(delayed(evaluate_synthetic_parallel)(index = index,
                                                                                                random_seed_data = config['computation']['random_seed']+index,
                                                                                                random_seed_model = config['computation']['random_seed'],#+random_seed_model,
                                                                                                config = config,
                                                                                                verbosity = -1) for index in range(config['make_classification']['num_eval']))

    for i, synthetic_result in enumerate(evaluation_results_synthetic):
        if i == 0:
            model_dict_synthetic = synthetic_result[0]
            scores_dict_synthetic = synthetic_result[1]
            dataset_dict_synthetic = synthetic_result[2]
        else: 
            model_dict_synthetic = mergeDict(model_dict_synthetic, synthetic_result[0])
            scores_dict_synthetic = mergeDict(scores_dict_synthetic, synthetic_result[1])
            dataset_dict_synthetic = mergeDict(dataset_dict_synthetic, synthetic_result[2])        
    
    metric_identifer = '_test'
    metrics = ['accuracy', 'f1']
    index = [i for i in range(config['make_classification']['num_eval'])]
    columns = flatten_list([[[approach + ' ' + metric + '_mean', approach + ' ' + metric + '_max', approach + ' ' + metric + '_std'] for metric in metrics] for approach in ['DHDT', 'sklearn']])


    results_DHDT = None
    results_sklearn = None
    for metric in metrics:
        scores_DHDT = [scores_dict_synthetic[i]['DHDT'][metric + metric_identifer] for i in range(config['make_classification']['num_eval'])]

        scores_sklearn = [scores_dict_synthetic[i]['sklearn'][metric + metric_identifer] for i in range(config['make_classification']['num_eval'])]

        scores_DHDT_mean = np.mean(scores_DHDT, axis=1) if config['computation']['trials'] > 1 else scores_DHDT
        scores_sklearn_mean = np.mean(scores_sklearn, axis=1) if config['computation']['trials'] > 1 else scores_sklearn

        scores_DHDT_max = np.max(scores_DHDT, axis=1) if config['computation']['trials'] > 1 else scores_DHDT
        scores_sklearn_max = np.max(scores_sklearn, axis=1) if config['computation']['trials'] > 1 else scores_sklearn

        scores_DHDT_std = np.std(scores_DHDT, axis=1) if config['computation']['trials'] > 1 else np.array([0.0] * config['computation']['trials'])
        scores_sklearn_std = np.std(scores_sklearn, axis=1) if config['computation']['trials'] > 1 else np.array([0.0] * config['computation']['trials'])

        results_DHDT_by_metric = np.vstack([scores_DHDT_mean, scores_DHDT_max, scores_DHDT_std])
        results_sklearn_by_metric = np.vstack([scores_sklearn_mean, scores_sklearn_max, scores_sklearn_std])

        if results_DHDT is None and results_sklearn is None:
            results_DHDT = results_DHDT_by_metric
            results_sklearn = results_sklearn_by_metric
        else:
            results_DHDT = np.vstack([results_DHDT, results_DHDT_by_metric])
            results_sklearn = np.vstack([results_sklearn, results_sklearn_by_metric])

    scores_dataframe_synthetic = pd.DataFrame(data=np.vstack([results_DHDT, results_sklearn]).T, index = index, columns = columns)    
        
    index = [index_name.split(' ')[1] for index_name in scores_dataframe_synthetic.mean()[scores_dataframe_synthetic.shape[1]//2:].index]
    mean_result_dataframe_synthetic = np.round(pd.DataFrame(data=np.vstack([scores_dataframe_synthetic.mean()[:scores_dataframe_synthetic.shape[1]//2], scores_dataframe_synthetic.mean()[scores_dataframe_synthetic.shape[1]//2:]]).T, index=index, columns=['DHDT', 'sklearn']), 3)

        
    display(scores_dataframe_synthetic.head(5))
    display(scores_dataframe_synthetic[scores_dataframe_synthetic.columns[0::3]].iloc[:,[0,2,1,3]].head(5))
    display(scores_dataframe_synthetic[scores_dataframe_synthetic.columns[1::3]].iloc[:,[0,2,1,3]].head(5))
    display(mean_result_dataframe_synthetic)
    

[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done   4 out of  30 | elapsed:  4.2min remaining: 27.0min
[Parallel(n_jobs=30)]: Done  15 out of  30 | elapsed:  4.5min remaining:  4.5min


## Real-World Eval

In [None]:
if False:

    identifier_list = [
                        'Adult',#: 32,
                        'Bank Marketing',#: 32,
                        'Loan Credit',#: 32,

                        'Credit Card',#: 23, 
                        'Car',#: 21,


                        'Absenteeism',#: 15,
                        'Loan House',#: 15,
                        'Cervical Cancer',#: 15,

                        'Heart Disease',#: 13,           

                        'Titanic',#: 10,
                        'Medical Insurance',#: 10,
                        'Wisconsin Breast Cancer Original',#: 10,
                        'Wisconsin Diagnostic Breast Cancer',#: 10,
                        'Wisconsin Prognostic Breast Cancer',#: 10,
                        'Abalone',#: 10,

                        'Habermans Survival',#: 3, 
                      ]
    
    parallel_eval_real_world = Parallel(n_jobs=config['computation']['n_jobs'], verbose=3, backend='loky') #loky #sequential multiprocessing
    evaluation_results_real_world = parallel_eval_real_world(delayed(evaluate_real_world_parallel)(identifier_list=identifier_list, 
                                                                                                   random_seed_model=config['computation']['random_seed']+i,
                                                                                                   config = config,
                                                                                                   verbosity = -1) for i in range(config['computation']['trials']))


    for i, real_world_result in enumerate(evaluation_results_real_world):
        if i == 0:
            model_dict_real_world = real_world_result[0]
            scores_dict_real_world = real_world_result[1]
            dataset_dict_real_world = real_world_result[2]
        else: 
            model_dict_real_world = mergeDict(model_dict_real_world, real_world_result[0])
            scores_dict_real_world = mergeDict(scores_dict_real_world, real_world_result[1])
            dataset_dict_real_world = mergeDict(dataset_dict_real_world, real_world_result[2])    

    metric_identifer = '_test'
    metrics = ['accuracy', 'f1']
    index = identifier_list
    columns = flatten_list([[[approach + ' ' + metric + '_mean', approach + ' ' + metric + '_max', approach + ' ' + metric + '_std'] for metric in metrics] for approach in ['DHDT', 'sklearn']])


    results_DHDT = None
    results_sklearn = None
    for metric in metrics:
        scores_DHDT = [scores_dict_real_world[identifier]['DHDT'][metric + metric_identifer] for identifier in identifier_list]

        scores_sklearn = [scores_dict_real_world[identifier]['sklearn'][metric + metric_identifer] for identifier in identifier_list]    

        scores_DHDT_mean = np.mean(scores_DHDT, axis=1) if config['computation']['trials'] > 1 else scores_DHDT
        scores_sklearn_mean = np.mean(scores_sklearn, axis=1) if config['computation']['trials'] > 1 else scores_sklearn

        scores_DHDT_max = np.max(scores_DHDT, axis=1) if config['computation']['trials'] > 1 else scores_DHDT
        scores_sklearn_max = np.max(scores_sklearn, axis=1) if config['computation']['trials'] > 1 else scores_sklearn

        scores_DHDT_std = np.std(scores_DHDT, axis=1) if config['computation']['trials'] > 1 else np.array([0.0] * config['computation']['trials'])
        scores_sklearn_std = np.std(scores_sklearn, axis=1) if config['computation']['trials'] > 1 else np.array([0.0] * config['computation']['trials'])

        results_DHDT_by_metric = np.vstack([scores_DHDT_mean, scores_DHDT_max, scores_DHDT_std])
        results_sklearn_by_metric = np.vstack([scores_sklearn_mean, scores_sklearn_max, scores_sklearn_std])

        if results_DHDT is None and results_sklearn is None:
            results_DHDT = results_DHDT_by_metric
            results_sklearn = results_sklearn_by_metric
        else:
            results_DHDT = np.vstack([results_DHDT, results_DHDT_by_metric])
            results_sklearn = np.vstack([results_sklearn, results_sklearn_by_metric])
            
    scores_dataframe_real_world = pd.DataFrame(data=np.vstack([results_DHDT, results_sklearn]).T, index = index, columns = columns)
            
    index = [index_name.split(' ')[1] for index_name in scores_dataframe_real_world.mean()[scores_dataframe_real_world.shape[1]//2:].index]
    mean_result_dataframe_real_world = np.round(pd.DataFrame(data=np.vstack([scores_dataframe_real_world.mean()[:scores_dataframe_real_world.shape[1]//2], scores_dataframe_real_world.mean()[scores_dataframe_real_world.shape[1]//2:]]).T, index=index, columns=['DHDT', 'sklearn']), 3)
                
    display(scores_dataframe_real_world)
    display(scores_dataframe_real_world[scores_dataframe_real_world.columns[0::3]].iloc[:,[0,2,1,3]])    
    display(scores_dataframe_real_world[scores_dataframe_real_world.columns[1::3]].iloc[:,[0,2,1,3]])    

    display(mean_result_dataframe_real_world)
    


In [None]:
if False:
    plot_index = 0
    
    identifier = identifier_list[0]#"Absenteeism"
    plt.figure(figsize=(15,8))
    image = model_dict_real_world[identifier]['DHDT'][plot_index].plot(normalizer_list=dataset_dict_real_world[identifier]['normalizer_list'])
    display(image)

    plt.figure(figsize=(15,8))
    plot_tree(model_dict_real_world[identifier]['sklearn'][plot_index], fontsize=10) 
    plt.show()

# Hyperparameter Optimization

In [None]:
parameter_dict = {
        'depth': [3],
        'learning_rate': [0.05, 0.01, 0.005, 0.001, 0.0005], #[0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001],
        
        'loss': ['binary_crossentropy'], #['binary_crossentropy', 'rmse'], #'mae',
        'optimizer': ['adam', 'sgd'], #['adam', 'sgd'],        
        
        'initializer': ['GlorotUniform', 'GlorotNormal', 'HeUniform', 'HeNormal', 'LecunUniform', 'LecunNormal', 'RandomNormal', 'RandomUniform'], #RandomNormal, RandomUniform
        'initializer_index': ['GlorotUniform', 'GlorotNormal', 'HeUniform', 'HeNormal', 'LecunUniform', 'LecunNormal', 'RandomNormal', 'RandomUniform', 'zeros', 'ones'], #RandomNormal, RandomUniform

        'beta_1': [10, 50], #[10, 50, 100],
        'beta_2': [10, 50], #[10, 50, 100],
        
        'activation': ['sigmoid', 'tanh'],
        'squeeze_factor': [0.2, 0.5, 1, 2, 5], #[0.2, 0.5, 1, 2, 5],    
}

parameter_grid = ParameterGrid(parameter_dict)

In [None]:
if False:
    parallel_hpo_synthetic = Parallel(n_jobs=config['computation']['n_jobs'], verbose=3, backend='loky') #loky #sequential multiprocessing
    evaluation_results_hpo_synthetic = parallel_hpo_synthetic(delayed(evaluate_parameter_setting_synthetic)(parameter_setting, config, metrics= ['accuracy', 'f1']) for parameter_setting in parameter_grid)

    comparator_metric = 'f1'

    dhdt_mean_list = [np.mean(evaluation_results_hpo_synthetic[i][0]['DHDT ' + comparator_metric + '_mean']) for i in range(len(parameter_grid))]
    dhdt_mean_list = sorted(dhdt_mean_list)

    dhdt_max_mean_list = [np.mean(evaluation_results_hpo_synthetic[i][0]['DHDT ' + comparator_metric + '_max']) for i in range(len(parameter_grid))]
    dhdt_max_mean_list = [x for _, x in sorted(zip(dhdt_mean_list, dhdt_max_mean_list))]

    sklearn_mean_list = [np.mean(evaluation_results_hpo_synthetic[i][0]['sklearn ' + comparator_metric + '_mean']) for i in range(len(parameter_grid))]
    sklearn_mean_list = [x for _, x in sorted(zip(dhdt_mean_list, sklearn_mean_list))]

    parameter_setting_list = [evaluation_results_hpo_synthetic[i][1] for i in range(len(parameter_grid))]
    parameter_setting_list = [x for _, x in sorted(zip(dhdt_mean_list, parameter_setting_list))]

    hpo_results_synthetic = []
    for i, (dhdt_mean, dhdt_max_mean, sklearn_mean, parameter_setting) in enumerate(zip(dhdt_mean_list, dhdt_max_mean_list, sklearn_mean_list, parameter_setting_list)):
        result_dict = {
                             'DHDT mean (mean)': dhdt_mean,
                             'DHDT max (mean)': dhdt_max_mean,
                             'sklearn mean': sklearn_mean,
                             'parameters': parameter_setting
                            }

        hpo_results_synthetic.append(result_dict)

    display(hpo_results_synthetic[:5])
    
    
    

In [None]:
if False:
    identifier_list = [
                        'Adult',#: 32,
                        'Bank Marketing',#: 32,
                        'Loan Credit',#: 32,

                        'Credit Card',#: 23, 
                        'Car',#: 21,


                        'Absenteeism',#: 15,
                        'Loan House',#: 15,
                        'Cervical Cancer',#: 15,

                        'Heart Disease',#: 13,           

                        'Titanic',#: 10,
                        'Medical Insurance',#: 10,
                        'Wisconsin Breast Cancer Original',#: 10,
                        'Wisconsin Diagnostic Breast Cancer',#: 10,
                        'Wisconsin Prognostic Breast Cancer',#: 10,
                        'Abalone',#: 10,

                        'Habermans Survival',#: 3, 
                      ]

    hpo_results_real_world = {}

    for identifier in identifier_list:
        parallel_hpo_real = Parallel(n_jobs=config['computation']['n_jobs'], verbose=3, backend='loky') #loky #sequential multiprocessing
        evaluation_results_hpo_real = parallel_hpo_real(delayed(evaluate_parameter_setting_real_world)(parameter_setting, identifier, config, metrics= ['accuracy', 'f1']) for parameter_setting in parameter_grid)

        comparator_metric = 'f1'

        dhdt_mean_list = [np.mean(evaluation_results_hpo_real[i][0]['DHDT ' + comparator_metric + '_mean']) for i in range(len(parameter_grid))]
        dhdt_mean_list = sorted(dhdt_mean_list)

        dhdt_max_mean_list = [np.mean(evaluation_results_hpo_real[i][0]['DHDT ' + comparator_metric + '_max']) for i in range(len(parameter_grid))]
        dhdt_max_mean_list = [x for _, x in sorted(zip(dhdt_mean_list, dhdt_max_mean_list))]

        sklearn_mean_list = [np.mean(evaluation_results_hpo_real[i][0]['sklearn ' + comparator_metric + '_mean']) for i in range(len(parameter_grid))]
        sklearn_mean_list = [x for _, x in sorted(zip(dhdt_mean_list, sklearn_mean_list))]

        parameter_setting_list = [evaluation_results_hpo_real[i][1] for i in range(len(parameter_grid))]
        parameter_setting_list = [x for _, x in sorted(zip(dhdt_mean_list, parameter_setting_list))]

        hpo_results_real_world_by_identifer = []
        for i, (dhdt_mean, dhdt_max_mean, sklearn_mean, parameter_setting) in enumerate(zip(dhdt_mean_list, dhdt_max_mean_list, sklearn_mean_list, parameter_setting_list)):
            result_dict = {
                                 'DHDT mean (mean)': dhdt_mean,
                                 'DHDT max (mean)': dhdt_max_mean,
                                 'sklearn mean': sklearn_mean,
                                 'parameters': parameter_setting
                                }

            hpo_results_real_world_by_identifer.append(result_dict)

        hpo_results_real_world[identifier] = hpo_results_real_world_by_identifer

        display(hpo_results_real_world_by_identifer[:5])
        