In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler


import os
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

from IPython.display import Image
from IPython.display import display, clear_output

import pandas as pd

os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = '' #'true'

#######################################################################################################################################
import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import logging

tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(3)

from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
def sigmoid_squeeze(x):
    x = 1/(1+K.exp(-3*x))
    return x  


In [2]:
def make_batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]    

In [3]:
def normalize_real_world_data(X_data):
    normalizer_list = []
    if isinstance(X_data, pd.DataFrame):
        for column_name in X_data:
            scaler = MinMaxScaler()
            scaler.fit(X_data[column_name].values.reshape(-1, 1))
            X_data[column_name] = scaler.transform(X_data[column_name].values.reshape(-1, 1)).ravel()
            normalizer_list.append(scaler)
    else:
        for i, column in enumerate(X_data.T):
            scaler = MinMaxScaler()
            scaler.fit(column.reshape(-1, 1))
            X_data[:,i] = scaler.transform(column.reshape(-1, 1)).ravel()
            normalizer_list.append(scaler)
        
    return X_data, normalizer_list

In [4]:
tfa.activations.sparsemax([1.,3.,5.])

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 0., 1.], dtype=float32)>

In [5]:
tfa.seq2seq.hardmax([1.,3.,5.])

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 0., 1.], dtype=float32)>

In [6]:
tf.keras.activations.hard_sigmoid(tf.constant(2.))

<tf.Tensor: shape=(), dtype=float32, numpy=0.9>

In [7]:
tf.sigmoid(1000*0.1)

<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

In [8]:
tfa.activations.sparsemax([1., 9, 6])

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 1., 0.], dtype=float32)>

In [9]:
class DHDT(tf.Module):
    
    def __init__(
            self,
            depth=3,
            function_representation_type = 3,
            number_of_variables = 5,
            learning_rate=1e-3,
            loss='binary_crossentropy',#'mae',
            optimizer = 'adam',
            random_seed=42,
            verbosity=1):    
        
        
        self.depth = depth
        self.learning_rate = learning_rate
        self.loss = tf.keras.losses.get(loss)
        self.seed = random_seed
        self.verbosity = verbosity
        self.function_representation_type = function_representation_type
        self.number_of_variables = number_of_variables
        
        self.internal_node_num_ = 2 ** self.depth - 1 
        self.leaf_node_num_ = 2 ** self.depth
        
        tf.random.set_seed(self.seed)
        
        function_representation_length = ( 
          (2 ** self.depth - 1) * 2 + (2 ** self.depth)  if self.function_representation_type == 1 
          else (2 ** self.depth - 1) + ((2 ** self.depth - 1) * self.number_of_variables) + (2 ** self.depth) if self.function_representation_type == 2 
          else ((2 ** self.depth - 1) * self.number_of_variables * 2) + (2 ** self.depth)  if self.function_representation_type >= 3 
          else None
                                      )        
        
        self.dt_params =  tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(function_representation_length,)),
                                      trainable=True,
                                      name='dt_params')
        
        tf.print(self.dt_params)
        
        maximum_depth = self.depth
        leaf_node_num_ = 2 ** maximum_depth
        internal_node_num_ = 2 ** maximum_depth - 1
        
        #dt_params_activation = self.dt_params#self.apply_activation(self.dt_params)
        
        #internal_nodes, leaf_nodes = self.get_shaped_parameters_for_decision_tree(dt_params_activation)

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_         
        
        self.split_values = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_values_num_params,)),
                                      trainable=True,
                                      name='dt_params')
        #tf.sigmoid(self.dt_params[:split_values_num_params])
        self.split_index_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_index_num_params,)),
                                      trainable=True,
                                      name='dt_params')
        #self.dt_params[split_values_num_params:split_values_num_params+split_index_num_params]    
        self.leaf_classes_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(leaf_classes_num_params,)),
                                      trainable=True,
                                      name='dt_params')
        #tf.sigmoid(self.dt_params[split_values_num_params+split_index_num_params:])        
        
        self.optimizer = tf.keras.optimizers.get(optimizer)
        self.optimizer.learning_rate = self.learning_rate
        
    def fit(self, X, y, batch_size=32, epochs=100, early_stopping_epochs=5):
        
        minimum_loss_epoch = np.inf
        epochs_without_improvement = 0        
        
        for current_epoch in tqdm(range(epochs)):
            tf.random.set_seed(self.seed + current_epoch)
            X = tf.random.shuffle(X, seed=self.seed + current_epoch)
            tf.random.set_seed(self.seed + current_epoch)
            y = tf.random.shuffle(y, seed=self.seed + current_epoch)
            
            loss_list = []
            for index, (X_batch, y_batch) in enumerate(zip(make_batch(X, batch_size), make_batch(y, batch_size))):
                current_loss = self.backward(X_batch, y_batch)
                loss_list.append(float(current_loss))
                
                if self.verbosity >= 2:
                    batch_idx = (index+1)*batch_size
                    msg = "Epoch: {:02d} | Batch: {:03d} | Loss: {:.5f} |"
                    print(msg.format(current_epoch, batch_idx, current_loss))                   
                  
            if self.verbosity > 0:    
                msg = "Epoch: {:02d} | Loss: {:.5f} |"
                print(msg.format(current_epoch, np.mean(loss_list)))              
            
            current_loss_epoch = np.mean(loss_list)

            if current_loss_epoch < minimum_loss_epoch:
                minimum_loss_epoch = current_loss_epoch
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                
            if epochs_without_improvement >= early_stopping_epochs:
                break
    
    
    
    @tf.function(jit_compile=True)                    
    def forward(self, X):
        X = tf.dtypes.cast(tf.convert_to_tensor(X), tf.float32)       

        if True:
            internal_node_num_ = self.internal_node_num_
            leaf_node_num_ = self.leaf_node_num_

            split_values_num_params = self.number_of_variables * internal_node_num_
            split_index_num_params = self.number_of_variables * internal_node_num_
            leaf_classes_num_params = self.leaf_node_num_             
            
            paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

            #split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
            function_values_dhdt = []
            for entry in X:
                
                result = 0
                for leaf_index, path in enumerate(paths):
                    path_result_left = 1
                    path_result_right = 1
                    for internal_node_index in path: 
                        #tf.print(path, internal_node_index)
                        #split_index = tfa.seq2seq.hardmax(self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])
                        split_index = tfa.activations.sparsemax(10 * self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])                        
                        split_values = self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]

                        
                        internal_node_split_value = tf.reduce_sum(split_index*split_values)
                        respective_input_value = tf.reduce_sum(split_index*entry)

                                                
                        #tf.print('internal_node_split_value', internal_node_split_value)
                        #tf.print('respective_input_value', respective_input_value)
                        
                        #split_decision = tf.keras.activations.relu(tf.math.sign(respective_input_value - internal_node_split_value - 0.5))
                        split_decision = tf.sigmoid(1000 * (respective_input_value - internal_node_split_value - 0.5))
                        
                        #tf.print('split_decision', split_decision)


                        path_result_left *= split_decision
                        path_result_right *= (1 - split_decision)

                        #tf.print('path_result_left', path_result_left)
                        #tf.print('path_result_right', path_result_right)

                    
                    result += self.leaf_classes_array[leaf_index*2] * path_result_left + self.leaf_classes_array[leaf_index*2+1] * path_result_right
                #tf.print('RESULT', result)
                    
                function_values_dhdt.append(result)
            function_values_dhdt = tf.stack(function_values_dhdt)
            #tf.print('function_values_dhdt', function_values_dhdt)
            
        elif False:

            internal_node_num_ = self.internal_node_num_
            leaf_node_num_ = self.leaf_node_num_

            split_values_num_params = self.number_of_variables * internal_node_num_
            split_index_num_params = self.number_of_variables * internal_node_num_
            leaf_classes_num_params = self.leaf_node_num_ 

            #split_values = tf.sigmoid(self.dt_params[:split_values_num_params])
            #split_index_array = self.dt_params[split_values_num_params:split_values_num_params+split_index_num_params]    
            #leaf_classes_array = tf.sigmoid(self.dt_params[split_values_num_params+split_index_num_params:])

            split_values = tf.sigmoid(self.split_values)
            split_index_array = self.split_index_array   
            leaf_classes_array = tf.sigmoid(self.leaf_classes_array)

            tf.print('split_values', split_values, summarize=-1)
            tf.print('split_index_array', split_index_array, summarize=-1)
            tf.print('leaf_classes_array', leaf_classes_array, summarize=-1)
        
            split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
            #split_index_array = tf.reshape(split_index_array, (internal_node_num_, -1))#tfa.activations.sparsemax(tf.reshape(split_index_array, (internal_node_num_, -1)))
            split_values_selected = tf.reduce_sum(split_index_array * tf.reshape(split_values, (internal_node_num_, -1)), axis=1)

            X_extended = []
            for entry in X:
                X_extended.append([entry]*internal_node_num_)
            X_extended = tf.stack(X_extended)
            X_extended_reduced = tf.reduce_sum(split_index_array * X_extended, axis=2)

            split_results = tf.round(tf.sigmoid(X_extended_reduced - split_values_selected))

            tf.print('split_results', split_results, summarize=-1)

                        
            
            if True:
                tree_extended = []
                for i in range(self.depth):
                    duplicate_factor = 2**(self.depth-i)//2
                    row = tf.reshape(tf.constant([], tf.float32), shape=(split_results.shape[0],0))
                    for j in range(2**(i)):
                        value = split_results[:,2**(i)-1+j]
                        inverse_value = 1-split_results[:,2**(i)-1+j]

                        #row.extend(tf.stack([value]*duplicate_factor, axis=1))
                        #row.extend(tf.stack([inverse_value]*duplicate_factor, axis=1))

                        value_extended = tf.stack([value]*duplicate_factor, axis=1)
                        inverse_value_extended = tf.stack([inverse_value]*duplicate_factor, axis=1)

                        new_values = tf.concat([value_extended, inverse_value_extended], axis=1)

                        row = tf.concat([row, new_values], axis=1)
                        #row = tf.stack(value_extended, inverse_value_extended)
                        #for _ in range(duplicate_factor):
                        #    row.extend([value, inverse_value])
                    #print(row)
                    #print(tf.stack(row))
                    tree_extended.append(tf.stack(row))
                tree_extended = tf.stack(tree_extended)
                tree_extended = tf.transpose(tree_extended, perm=[1,0,2])
                tf.print(tree_extended)   
                tree_leaf_identifier = tf.reduce_prod(tree_extended, axis=1)
                #print(tree_leaf_identifier)
                tree_leaf_output = tree_leaf_identifier * leaf_classes_array
                #tf.print(tree_leaf_output)
                function_values_dhdt = tf.reduce_max(tree_leaf_output, axis= 1)
            else:
                for split_result in split_results:

                    path_list = []
                    #add_factor == 0
                    leaf_counter = 0
                    internal_counter = 0
                    for i in range(self.depth):
                        print('i', i)
                        index = 2**(i)-1+internal_counter
                        print('index', index)
                        value = split_result[index]
                        print('value', value)
                        if value == 0:
                            leaf_counter += 2**(self.depth-i-1)
                            internal_counter = internal_counter ** 2 + 1
                            print('internal_counter', internal_counter)
                            #add_factor += 0
                        #else:
                            #add_factor += 1

                            #print(value)
                    print(leaf_counter)
                    function_values_dhdt = leaf_classes_array[leaf_counter]
                    print(leaf_counter)
        else:
            internal_node_num_ = self.internal_node_num_
            leaf_node_num_ = self.leaf_node_num_

            split_values_num_params = self.number_of_variables * internal_node_num_
            split_index_num_params = self.number_of_variables * internal_node_num_
            leaf_classes_num_params = self.leaf_node_num_ 

            #split_values = tf.sigmoid(self.dt_params[:split_values_num_params])
            #split_index_array = self.dt_params[split_values_num_params:split_values_num_params+split_index_num_params]    
            #leaf_classes_array = tf.sigmoid(self.dt_params[split_values_num_params+split_index_num_params:])

            split_values = tf.sigmoid(self.split_values)
            split_index_array = self.split_index_array   
            leaf_classes_array = tf.sigmoid(self.leaf_classes_array)

            tf.print('split_values', split_values, summarize=-1)
            tf.print('split_index_array', split_index_array, summarize=-1)
            tf.print('leaf_classes_array', leaf_classes_array, summarize=-1)            
            
            
            for entry in X[:3]:

                path_list = []
                #add_factor == 0
                leaf_counter = 0
                internal_counter = 0
                for i in range(self.depth):
                    print('i', i)
                    internal_index = 2**(i)-1+internal_counter
                    print('internal_index', internal_index)

                    split_index_for_internal = tf.argmax(split_index_array[self.number_of_variables*internal_index:self.number_of_variables*(internal_index+1)])
                    split_values_for_internal = split_values[self.number_of_variables*internal_index+split_index_for_internal]

                    entry_for_internal = entry[split_index_for_internal]


                    #value = tf.reduce_max(split_values)#(split_index_array)
                    value = tf.round(tf.sigmoid(entry_for_internal - split_values_for_internal))
                    
                    print('value', value)
                    if value < 0.5:
                        leaf_counter += 2**(self.depth-i-1)
                        internal_counter = internal_counter ** 2 + 1
                        print('internal_counter', internal_counter)            
                function_values_dhdt = leaf_classes_array[leaf_counter]
    
        #tf.print(function_values_dhdt)        

        return function_values_dhdt  
           
    def predict(self, X):
        return self.forward(X)
        
    def backward(self, x,y):
        #optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)#tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
        with tf.GradientTape(persistent=True) as tape:
            #tape.watch(self.dt_params)
            predicted = self.forward(x)
            current_loss = self.loss(y, predicted)
            
        tf.print('predicted', predicted)
        tf.print('current_loss', current_loss, summarize=-1)
        #tf.print('self.dt_params', self.dt_params, summarize=-1)
        grads = tape.gradient(current_loss, self.leaf_classes_array)
        self.optimizer.apply_gradients(zip([grads], [self.leaf_classes_array]))
        tf.print('grads', grads, summarize=-1)        
        
        grads = tape.gradient(current_loss, self.split_values)
        self.optimizer.apply_gradients(zip([grads], [self.split_values]))
        tf.print('grads', grads, summarize=-1)
        grads = tape.gradient(current_loss, self.split_index_array)
        self.optimizer.apply_gradients(zip([grads], [self.split_index_array]))
        tf.print('grads', grads, summarize=-1)

        #optimizer.apply_gradients(zip(grads, self.dt_params),
        #                          global_step=tf.compat.v1.train.get_or_create_global_step())     
        
        #self.optimizer.apply_gradients(zip([grads], [self.dt_params]))
        #tf.print('self.dt_params', self.dt_params, summarize=-1)
        return current_loss
        
    
    def apply_activation(self, dt_params):

        dt_params_activation = dt_params
        
        if self.function_representation_type == 1:
            pass
        elif self.function_representation_type == 2:
            pass
        elif self.function_representation_type >= 3:
            outputs_coeff_neurons_num_ = self.internal_node_num_ * self.number_of_variables

            if self.function_representation_type == 3:
                dt_params_activation[:outputs_coeff_neurons_num_].assign(tf.math.sigmoid(dt_params[:outputs_coeff_neurons_num_]))
            elif self.function_representation_type == 4:
                dt_params_activation[:outputs_coeff_neurons_num_].assign(sigmoid_squeeze(dt_params[:outputs_coeff_neurons_num_]))

            current_position = outputs_coeff_neurons_num_
            for outputs_index in range(self.internal_node_num_):
                outputs_identifer_neurons = self.number_of_variables

                dt_params_activation[current_position:current_position+outputs_identifer_neurons].assign(tf.math.softmax(dt_params[current_position:current_position+outputs_identifer_neurons]))
                current_position += outputs_identifer_neurons

            dt_params_activation[current_position:].assign(tf.math.sigmoid(dt_params[current_position:]))

        
        return dt_params_activation
    
    def get_shaped_parameters_for_decision_tree(self, parameter_array):

        internal_node_num_ = 2 ** self.depth - 1 
        leaf_node_num_ = 2 ** self.depth

        if self.function_representation_type == 1:

            splits_coeff = parameter_array[:internal_node_num_]
            splits_coeff = tf.clip_by_value(splits_coeff, clip_value_min=0, clip_value_max=1)
            splits_coeff_list = tf.split(splits_coeff, internal_node_num_)
            splits_index = tf.cast(tf.clip_by_value(tf.round(parameter_array[internal_node_num_:internal_node_num_*2]), clip_value_min=0, clip_value_max=self.number_of_variables-1), tf.int64)
            splits_index_list = tf.split(splits_index, internal_node_num_)

            splits_list = []
            for values_node, indices_node in zip(splits_coeff_list, splits_index_list):
                sparse_tensor = tf.sparse.SparseTensor(indices=tf.expand_dims(indices_node, axis=1), values=values_node, dense_shape=[self.number_of_variables])
                dense_tensor = tf.sparse.to_dense(sparse_tensor)
                splits_list.append(dense_tensor)             

            splits = tf.stack(splits_list)            

            leaf_classes = parameter_array[internal_node_num_*2:]  
            leaf_classes = tf.clip_by_value(leaf_classes, clip_value_min=0, clip_value_max=1)

        elif self.function_representation_type == 2:

            split_values_num_params = internal_node_num_ 
            split_index_num_params = self.number_of_variables * internal_node_num_
            leaf_classes_num_params = leaf_node_num_ 

            split_values = parameter_array[:split_values_num_params]
            split_values_list_by_internal_node = tf.split(split_values, internal_node_num_)

            split_index_array = parameter_array[split_values_num_params:split_values_num_params+split_index_num_params]    
            split_index_list_by_internal_node = tf.split(split_index_array, internal_node_num_)
            split_index_list_by_internal_node_by_decision_sparsity = []
            for tensor in split_index_list_by_internal_node:
                split_tensor = tf.split(tensor, 1)
                split_index_list_by_internal_node_by_decision_sparsity.append(split_tensor)
            split_index_list_by_internal_node_by_decision_sparsity_argmax = tf.split(tf.argmax(split_index_list_by_internal_node_by_decision_sparsity, axis=2), internal_node_num_)
            split_index_list_by_internal_node_by_decision_sparsity_argmax_new = []
            for tensor in split_index_list_by_internal_node_by_decision_sparsity_argmax:
                tensor_squeeze = tf.squeeze(tensor, axis=0)
                split_index_list_by_internal_node_by_decision_sparsity_argmax_new.append(tensor_squeeze)
            split_index_list_by_internal_node_by_decision_sparsity_argmax = split_index_list_by_internal_node_by_decision_sparsity_argmax_new    
            dense_tensor_list = []
            for indices_node, values_node in zip(split_index_list_by_internal_node_by_decision_sparsity_argmax,  split_values_list_by_internal_node):
                sparse_tensor = tf.sparse.SparseTensor(indices=tf.expand_dims(indices_node, axis=1), values=values_node, dense_shape=[self.number_of_variables])
                dense_tensor = tf.sparse.to_dense(sparse_tensor)
                dense_tensor_list.append(dense_tensor) 
            splits = tf.stack(dense_tensor_list)

            leaf_classes_array = parameter_array[split_values_num_params+split_index_num_params:]  
            split_index_list_by_leaf_node = tf.split(leaf_classes_array, leaf_node_num_)

            leaf_classes = tf.squeeze(tf.stack(split_index_list_by_leaf_node))

        elif self.function_representation_type >= 3:

            split_values_num_params = self.number_of_variables * internal_node_num_
            split_index_num_params = self.number_of_variables * internal_node_num_
            leaf_classes_num_params = leaf_node_num_ 

            split_values = parameter_array[:split_values_num_params]
            split_values_list_by_internal_node = tf.split(split_values, internal_node_num_)

            split_index_array = parameter_array[split_values_num_params:split_values_num_params+split_index_num_params]    
            split_index_list_by_internal_node = tf.split(split_index_array, internal_node_num_)         

            split_index_list_by_internal_node_max = tfa.seq2seq.hardmax(split_index_list_by_internal_node)#tfa.activations.sparsemax(split_index_list_by_internal_node)

            splits = tf.stack(tf.multiply(split_values_list_by_internal_node, split_index_list_by_internal_node_max))

            leaf_classes_array = parameter_array[split_values_num_params+split_index_num_params:]  
            split_index_list_by_leaf_node = tf.split(leaf_classes_array, leaf_node_num_)

            leaf_classes = tf.squeeze(tf.stack(split_index_list_by_leaf_node))



        return splits, leaf_classes


    def calculate_function_value_from_vanilla_decision_tree_parameter_single_sample_wrapper(self, 
                                                                                            internal_nodes, 
                                                                                            leaf_nodes, 
                                                                                            leaf_node_num_, 
                                                                                            internal_node_num_, 
                                                                                            maximum_depth, 
                                                                                            number_of_variables):

        #self.internal_nodes = tf.cast(self.internal_nodes, tf.float32)
        #self.leaf_nodes = tf.cast(self.leaf_nodes, tf.float32)   
        
        tf.print('internal_nodes', internal_nodes, summarize=-1)
        tf.print('leaf_nodes', leaf_nodes, summarize=-1)
        def calculate_function_value_from_vanilla_decision_tree_parameter_single_sample(x):

            x = tf.cast(x, tf.float32)     
            
            internal_nodes_split = tf.split(internal_nodes, internal_node_num_)
            internal_nodes_split_new = [[] for _ in range(maximum_depth)]
            for i, tensor in enumerate(internal_nodes_split):
                current_depth = np.ceil(np.log2((i+1)+1)).astype(np.int32)

                internal_nodes_split_new[current_depth-1].append(tf.squeeze(tensor, axis=0))

            internal_nodes_split = internal_nodes_split_new

            split_value_list = []
            for i in range(maximum_depth):
                current_depth = i+1
                num_nodes_current_layer = 2**current_depth - 1 - (2**(current_depth-1) - 1)
                split_value_list_per_depth = []
                for j in range(num_nodes_current_layer):
                    zero_identifier = tf.not_equal(internal_nodes_split[i][j], tf.zeros_like(internal_nodes_split[i][j]))
                    split_complete = tf.greater(x, tf.math.sigmoid(internal_nodes_split[i][j]))
                    split_value = tf.reduce_any(tf.logical_and(zero_identifier, split_complete))
                    split_value_filled = tf.fill( [2**(maximum_depth-current_depth)] , split_value)
                    split_value_neg_filled = tf.fill([2**(maximum_depth-current_depth)], tf.logical_not(split_value))
                    split_value_list_per_depth.append(tf.keras.backend.flatten(tf.stack([split_value_neg_filled, split_value_filled])))        
                split_value_list.append(tf.keras.backend.flatten(tf.stack(split_value_list_per_depth)))

            split_values = tf.cast(tf.reduce_all(tf.stack(split_value_list), axis=0), tf.float32)    
            leaf_classes = tf.cast(tf.math.sigmoid(leaf_nodes), tf.float32)
            #final_class_probability = 1-tf.reduce_max(split_values)                                                                                                                                        
            final_class_probability = 1-tf.reduce_max(tf.multiply(leaf_classes, split_values))                                                                                                                                            
            return final_class_probability

        return calculate_function_value_from_vanilla_decision_tree_parameter_single_sample


    def plot(self, normalizer_list=None, path='./dt_plot.png'):
        from anytree import Node, RenderTree
        from anytree.exporter import DotExporter
        

        parameter_array = self.apply_activation(self.dt_params)
        
        splits, leaf_classes = self.get_shaped_parameters_for_decision_tree(parameter_array)

        splits = splits.numpy()
        leaf_classes = leaf_classes.numpy()


        if normalizer_list is not None: 
            transpose = splits.transpose()
            transpose_normalized = []
            for i, column in enumerate(transpose):
                column_new = column
                if len(column_new[column_new != 0]) != 0:
                    column_new[column_new != 0] = normalizer_list[i].inverse_transform(column[column != 0].reshape(-1, 1)).ravel()
                #column_new = normalizer_list[i].inverse_transform(column.reshape(-1, 1)).ravel()
                transpose_normalized.append(column_new)
            splits = np.array(transpose_normalized).transpose()

        splits_by_layer = []
        for i in range(self.depth+1):
            start = 2**i - 1
            end = 2**(i+1) -1
            splits_by_layer.append(splits[start:end])

        nodes = {
        }
        #tree = Tree()
        for i, splits in enumerate(splits_by_layer):
            for j, split in enumerate(splits):
                if i == 0:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, data=None)            
                else:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    parent_node_id = int(np.floor((current_node_id-1)/2))
                    parent_name = 'n' + str(parent_node_id)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)

        for j, leaf_class in enumerate(leaf_classes):
            i = self.depth
            current_node_id = int(2**i - 1 + j)
            name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
            parent_node_id = int(np.floor((current_node_id-1)/2))
            parent_name = 'n' + str(parent_node_id)
            #split_variable = np.argmax(np.abs(split))
            #split_value = np.round(split[split_variable], 3)
            split_description = str(np.round((1-leaf_class), 3))#'x' + str(split_variable) + ' <= '  + str(split_value)
            nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)
            #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)        

            DotExporter(nodes['n0'], nodeattrfunc=lambda node: 'label="{}"'.format(node.display_name)).to_picture(path)


        return Image(path)#, nodes#nodes#tree        

        
    

In [10]:
X, y = make_classification(
    n_samples=10_000, n_features=5, n_informative=2, n_redundant=2, random_state=42
)

#todo: anpassen, dass nur basierend auf train data normalized
X, normalizer_list = normalize_real_world_data(X)

train_samples = 128#1000  # Samples used for training the models


X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    shuffle=False,
    test_size=10_000 - train_samples,
    random_state=42
)

In [11]:
model_sklearn = DecisionTreeClassifier(max_depth=3, random_state=42)

model_sklearn.fit(X_train, y_train)

model_sklearn.score(X_test, y_test)

0.8461304700162074

In [12]:
X

array([[0.29962307, 0.34420182, 0.78918445, 0.32522398, 0.32361171],
       [0.48974096, 0.41928906, 0.42401393, 0.66532031, 0.76704839],
       [0.65659485, 0.60011619, 0.54951077, 0.48750183, 0.59648464],
       ...,
       [0.20006322, 0.18973097, 0.53357224, 0.62434102, 0.41132694],
       [0.49538044, 0.49099686, 0.68280369, 0.38745923, 0.32775163],
       [0.4299303 , 0.40670348, 0.58172411, 0.51258715, 0.43861696]])

In [13]:
model_dhdt = DHDT(
            depth=3,
            function_representation_type = 3,
            number_of_variables = 5,
            learning_rate=1e-2,
            loss='binary_crossentropy',#'binary_crossentropy',
            random_seed=41,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=10, early_stopping_epochs=50)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

[-0.147681668 -0.171174631 0.0654113144 ... 0.0471678823 -0.117765374 -0.0936949179]


  0%|          | 0/10 [00:00<?, ?it/s]

InaccessibleTensorError: in user code:

    File "/tmp/ipykernel_2567093/1609930587.py", line 163, in forward  *
        function_values_dhdt = tf.stack(function_values_dhdt)

    InaccessibleTensorError: <tf.Tensor 'while/add_31:0' shape=() dtype=float32> is out of scope and cannot be used here. Use return values, explicit Python locals or TensorFlow collections to access it.
    Please see https://www.tensorflow.org/guide/function#all_outputs_of_a_tffunction_must_be_return_values for more information.
    
    <tf.Tensor 'while/add_31:0' shape=() dtype=float32> was defined here:
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/runpy.py", line 192, in _run_module_as_main
          return _run_code(code, main_globals, None,
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/runpy.py", line 85, in _run_code
          exec(code, run_globals)
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
          app.launch_new_instance()
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
          app.start()
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start
          self.io_loop.start()
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
          self.asyncio_loop.run_forever()
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/asyncio/base_events.py", line 563, in run_forever
          self._run_once()
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/asyncio/base_events.py", line 1844, in _run_once
          handle._run()
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/asyncio/events.py", line 81, in _run
          self._context.run(self._callback, *self._args)
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
          await self.process_one()
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one
          await dispatch(*args)
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
          await result
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
          reply_content = await reply_content
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 353, in do_execute
          res = shell.run_cell(code, store_history=store_history, silent=silent)
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
          return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2898, in run_cell
          result = self._run_cell(
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2944, in _run_cell
          return runner(coro)
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
          coro.send(None)
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3169, in run_cell_async
          has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3361, in run_ast_nodes
          if (await self.run_code(code, result,  async_=asy)):
        File "/home/smarton/anaconda3/envs/XAI/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3441, in run_code
          exec(code_obj, self.user_global_ns, self.user_ns)
        File "/tmp/ipykernel_2567093/2201969884.py", line 10, in <module>
          model_dhdt.fit(X_train, y_train, batch_size=64, epochs=10, early_stopping_epochs=50)
        File "/tmp/ipykernel_2567093/1609930587.py", line 85, in fit
          current_loss = self.backward(X_batch, y_batch)
        File "/tmp/ipykernel_2567093/1609930587.py", line 319, in backward
          predicted = self.forward(x)
        File "/tmp/ipykernel_2567093/1609930587.py", line 114, in forward
          if True:
        File "/tmp/ipykernel_2567093/1609930587.py", line 126, in forward
          for entry in X:
        File "/tmp/ipykernel_2567093/1609930587.py", line 129, in forward
          for leaf_index, path in enumerate(paths):
        File "/tmp/ipykernel_2567093/1609930587.py", line 159, in forward
          result += self.leaf_classes_array[leaf_index*2] * path_result_left + self.leaf_classes_array[leaf_index*2+1] * path_result_right
    
    The tensor <tf.Tensor 'while/add_31:0' shape=() dtype=float32> cannot be accessed from FuncGraph(name=forward, id=139930238363200), because it was defined in FuncGraph(name=while_body_214, id=139929706349424), which is out of scope.


In [None]:
parameter_array = model_dhdt.dt_params
parameter_array

In [None]:
internal_node_num_ = model_dhdt.internal_node_num_
leaf_node_num_ = model_dhdt.leaf_node_num_

split_values_num_params = model_dhdt.number_of_variables * internal_node_num_
split_index_num_params = model_dhdt.number_of_variables * internal_node_num_
leaf_classes_num_params = leaf_node_num_ 

split_values = parameter_array[:split_values_num_params]
split_values_list_by_internal_node = tf.split(split_values, internal_node_num_)

split_index_array = parameter_array[split_values_num_params:split_values_num_params+split_index_num_params]    
split_index_list_by_internal_node = tf.split(split_index_array, internal_node_num_)         

split_index_list_by_internal_node_max = tfa.seq2seq.hardmax(split_index_list_by_internal_node)#tfa.activations.sparsemax(split_index_list_by_internal_node)

splits = tf.stack(tf.multiply(split_values_list_by_internal_node, split_index_list_by_internal_node_max))

leaf_classes_array = parameter_array[split_values_num_params+split_index_num_params:]  
split_index_list_by_leaf_node = tf.split(leaf_classes_array, leaf_node_num_)

print(split_values)
print(split_index_array)

print(leaf_classes_array)


In [None]:
split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
split_index_array

In [None]:
split_index_array * tf.reshape(split_values, (internal_node_num_, -1))

In [None]:
split_values_selected = tf.reduce_sum(split_index_array * tf.reshape(split_values, (internal_node_num_, -1)), axis=1)+0.4
split_values_selected

In [None]:
X_train_extended = []
for entry in X_train[:3]:
    X_train_extended.append([entry]*internal_node_num_)
X_train_extended = np.array(X_train_extended)
X_train_extended

In [None]:
X_train_extended_reduced = tf.reduce_sum(split_index_array * X_train_extended, axis=2)
X_train_extended_reduced

In [None]:
split_results = tf.round(tf.sigmoid(X_train_extended_reduced/2 - split_values_selected))
split_results

In [None]:
tf.greater(X_train_extended_reduced/2, split_values_selected)

In [None]:
split_results

In [None]:
print(split_values)
print(split_index_array)

print(leaf_classes_array)

In [None]:
split_index_array = parameter_array[split_values_num_params:split_values_num_params+split_index_num_params]    
split_index_array

In [None]:

split_index_array

In [None]:
split_index_for_internal

In [None]:
split_values_for_internal

In [None]:
split_index_array[internal_node_num_*internal_index:internal_node_num_*(internal_index+1)]

In [None]:
number_of_variables

In [None]:
for entry in X_train[:3]:
    
    path_list = []
    #add_factor == 0
    leaf_counter = 0
    internal_counter = 0
    for i in range(depth):
        print('i', i)
        internal_index = 2**(i)-1+internal_counter
        print('internal_index', internal_index)
        
        split_index_for_internal = np.argmax(split_index_array[5*internal_index:5*(internal_index+1)])
        split_values_for_internal = split_values[5*internal_index+split_index_for_internal]
        
        entry_for_internal = entry[split_index_for_internal]
        
        
        value = tf.round(tf.sigmoid(entry_for_internal - split_values_for_internal))
        print('value', value)
        if value == 0:
            leaf_counter += 2**(depth-i-1)
            internal_counter = internal_counter ** 2 + 1
            print('internal_counter', internal_counter)
            #add_factor += 0
        #else:
            #add_factor += 1

            #print(value)
    print(leaf_counter)


In [None]:
for split_result in split_results:
    
    path_list = []
    #add_factor == 0
    leaf_counter = 0
    internal_counter = 0
    for i in range(depth):
        print('i', i)
        for j in (2):#range(2**(i)):
            print('i+j', 2**(i)-1+j)
            value = split_result[2**(i)-1+j+internal_counter]
            print('value', value)
            if value == 0:
                print(depth-i-1)
                leaf_counter += 2**(depth-i-1)
                internal_counter = internal_counter
                #add_factor += 0
            #else:
                #add_factor += 1

            #print(value)
    print(leaf_counter)


In [None]:
depth = 3

for split_result in split_results:

    tree_extended = []
    for i in range(depth):
        duplicate_factor = 2**(depth-i)//2
        row = []
        for j in range(2**(i)):
            value = split_result[2**(i)-1+j]
            inverse_value = 1-split_result[2**(i)-1+j]

            row.extend([value]*duplicate_factor)
            row.extend([inverse_value]*duplicate_factor)
            #for _ in range(duplicate_factor):
            #    row.extend([value, inverse_value])
        #print(tf.stack(row))
        tree_extended.append(tf.stack(row))
    tree_extended = tf.stack(tree_extended)
    print(tree_extended)

In [None]:
split_result

In [None]:
split_results

In [None]:
split_results[:,i+j]

In [None]:
row

In [None]:
value

In [None]:
duplicate_factor

In [None]:
tf.stack([value]*4, axis=1)

In [None]:
value_extended = tf.stack([value]*duplicate_factor, axis=1)
inverse_value_extended = tf.stack([inverse_value]*duplicate_factor, axis=1)

In [None]:
value_extended

In [None]:
inverse_value_extended

In [None]:
new_values = tf.concat([value_extended, inverse_value_extended], axis=1)
new_values

In [None]:
new_values = tf.concat([value_extended, inverse_value_extended], axis=1)

In [None]:
new_values.shape

In [None]:
tf.reshape(tf.constant([], tf.float32), shape=(3,0))

In [None]:
new_values

In [None]:
tf.concat([tf.reshape(tf.constant([], tf.float32), shape=(3,0)), new_values], axis=1)

In [None]:
new_values

In [None]:
split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
split_values_selected = tf.reduce_sum(split_index_array * tf.reshape(split_values, (internal_node_num_, -1)), axis=1)

X_train_extended = []
for entry in X_train[:3]:
    X_train_extended.append([entry]*internal_node_num_)
X_train_extended = np.array(X_train_extended)
X_train_extended_reduced = tf.reduce_sum(split_index_array * X_train_extended, axis=2)

split_results = tf.round(tf.sigmoid(X_train_extended_reduced/2 - split_values_selected))

In [None]:



tree_extended = []
for i in range(depth):
    duplicate_factor = 2**(depth-i)//2
    row = tf.reshape(tf.constant([], tf.float32), shape=(split_results.shape[0],0))
    for j in range(2**(i)):
        value = split_results[:,i+j]
        inverse_value = 1-split_results[:,i+j]

        #row.extend(tf.stack([value]*duplicate_factor, axis=1))
        #row.extend(tf.stack([inverse_value]*duplicate_factor, axis=1))
        
        value_extended = tf.stack([value]*duplicate_factor, axis=1)
        inverse_value_extended = tf.stack([inverse_value]*duplicate_factor, axis=1)
        
        new_values = tf.concat([value_extended, inverse_value_extended], axis=1)
        
        row = tf.concat([row, new_values], axis=1)
        #row = tf.stack(value_extended, inverse_value_extended)
        #for _ in range(duplicate_factor):
        #    row.extend([value, inverse_value])
    #print(row)
    #print(tf.stack(row))
    tree_extended.append(tf.stack(row))
tree_extended = tf.stack(tree_extended)
tree_extended = tf.transpose(tree_extended, perm=[1,0,2])
print(tree_extended)

In [None]:
leaf_classes_array

In [None]:
tree_leaf_identifier = tf.reduce_prod(tree_extended, axis=1)
#print(tree_leaf_identifier)
tree_leaf_output = tree_leaf_identifier * leaf_classes_array
#print(tree_leaf_identifier_output)
y_pred = tf.reduce_max(tree_leaf_output, axis= 1)
#print(y_pred)

In [None]:
tree_extended = []
for i in range(depth):
    duplicate_factor = 2**(depth-i)//2
    row = tf.reshape(tf.constant([], tf.float32), shape=(split_results.shape[0],0))
    for j in range(2**(i)):
        value = split_results[:,i+j]
        inverse_value = 1-split_results[:,i+j]

        #row.extend(tf.stack([value]*duplicate_factor, axis=1))
        #row.extend(tf.stack([inverse_value]*duplicate_factor, axis=1))
        
        value_extended = tf.stack([value]*duplicate_factor, axis=1)
        inverse_value_extended = tf.stack([inverse_value]*duplicate_factor, axis=1)
        
        new_values = tf.concat([value_extended, inverse_value_extended], axis=1)
        
        row = tf.concat([row, new_values], axis=1)
        #row = tf.stack(value_extended, inverse_value_extended)
        #for _ in range(duplicate_factor):
        #    row.extend([value, inverse_value])
    #print(row)
    #print(tf.stack(row))
    tree_extended.append(tf.stack(row))
tree_extended = tf.stack(tree_extended)
tree_extended = tf.transpose(tree_extended, perm=[1,0,2])
print(tree_extended)

In [None]:
tf.reshape(tree_extended, (3,3,8))

In [None]:
internal_nodes_extended = model_dhdt.get_shaped_parameters_for_decision_tree(model_dhdt.dt_params)[0]
internal_nodes_extended

In [None]:
generate_paths --> go through paths to finde true path --> get leaf

In [None]:
for tensor in tf.unstack(internal_nodes_extended):
    print(tensor)

In [None]:
model_dhdt = DHDT(
            depth=3,
            function_representation_type = 3,
            number_of_variables = 5,
            learning_rate=1e-2,
            loss='mae',#'binary_crossentropy',
            random_seed=42,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=2, early_stopping_epochs=50)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

In [None]:
model_dhdt = DHDT(
            depth=3,
            function_representation_type = 3,
            number_of_variables = 5,
            learning_rate=1e-2,
            loss='mae',#'binary_crossentropy',
            random_seed=42,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=10, early_stopping_epochs=50)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

In [None]:
plt.figure(figsize=(15,8))
image = model_dhdt.plot()
display(image)

plt.figure(figsize=(15,8))
plot_tree(model_sklearn, fontsize=10) 
plt.show()