In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from livelossplot import PlotLosses

import os
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

from IPython.display import Image
from IPython.display import display, clear_output

import pandas as pd

os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = '' #'true'

#######################################################################################################################################
import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import logging

tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(3)

from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
def sigmoid_squeeze(x, factor=3):
    x = 1/(1+K.exp(-factor*x))
    return x  

import seaborn as sns
sns.set_style("darkgrid")


In [2]:
def make_batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]    

In [3]:
def normalize_real_world_data(X_data):
    normalizer_list = []
    if isinstance(X_data, pd.DataFrame):
        for column_name in X_data:
            scaler = MinMaxScaler()
            scaler.fit(X_data[column_name].values.reshape(-1, 1))
            X_data[column_name] = scaler.transform(X_data[column_name].values.reshape(-1, 1)).ravel()
            normalizer_list.append(scaler)
    else:
        for i, column in enumerate(X_data.T):
            scaler = MinMaxScaler()
            scaler.fit(column.reshape(-1, 1))
            X_data[:,i] = scaler.transform(column.reshape(-1, 1)).ravel()
            normalizer_list.append(scaler)
        
    return X_data, normalizer_list

In [4]:
class DHDT(tf.Module):
    
    def __init__(
            self,
            depth=3,
            number_of_variables = 5,
            squeeze_factor = 5,
            learning_rate=1e-3,
            loss='binary_crossentropy',#'mae',
            optimizer = 'adam',
            random_seed=42,
            verbosity=1):    
        
        
        self.depth = depth
        self.learning_rate = learning_rate
        self.loss = tf.keras.losses.get(loss)
        self.seed = random_seed
        self.verbosity = verbosity
        self.number_of_variables = number_of_variables
        self.squeeze_factor = squeeze_factor
        
        self.internal_node_num_ = 2 ** self.depth - 1 
        self.leaf_node_num_ = 2 ** self.depth
        
        tf.random.set_seed(self.seed)
                        
        maximum_depth = self.depth
        leaf_node_num_ = 2 ** maximum_depth
        internal_node_num_ = 2 ** maximum_depth - 1
                
        #internal_nodes, leaf_nodes = self.get_shaped_parameters_for_decision_tree(dt_params_activation)

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_         
        
        self.split_values = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_values_num_params,)),
                                      trainable=True,
                                      name='split_values')
        self.split_index_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_index_num_params,)),
                                      trainable=True,
                                      name='split_index_array')
        self.leaf_classes_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(leaf_classes_num_params,)),
                                      trainable=True,
                                      name='leaf_classes_array')
        
        self.optimizer = tf.keras.optimizers.get(optimizer)
        self.optimizer.learning_rate = self.learning_rate
                
        self.plotlosses = PlotLosses()    
        
    def fit(self, X_train, y_train, batch_size=32, epochs=100, early_stopping_epochs=5, valid_data=None):
        
        minimum_loss_epoch = np.inf
        epochs_without_improvement = 0        
        
        for current_epoch in tqdm(range(epochs)):
            tf.random.set_seed(self.seed + current_epoch)
            X_train = tf.random.shuffle(X_train, seed=self.seed + current_epoch)
            tf.random.set_seed(self.seed + current_epoch)
            y_train = tf.random.shuffle(y_train, seed=self.seed + current_epoch)
            
            loss_list = []
            for index, (X_batch, y_batch) in enumerate(zip(make_batch(X_train, batch_size), make_batch(y_train, batch_size))):
                current_loss = self.backward(X_batch, y_batch)
                loss_list.append(float(current_loss))
                
                if self.verbosity >= 2:
                    batch_idx = (index+1)*batch_size
                    msg = "Epoch: {:02d} | Batch: {:03d} | Loss: {:.5f} |"
                    print(msg.format(current_epoch, batch_idx, current_loss))                   
                  
            current_loss_epoch = np.mean(loss_list)
            if self.verbosity > 0:    
                msg = "Epoch: {:02d} | Loss: {:.5f} |"
                print(msg.format(current_epoch, current_loss_epoch))              

            
            if False:
                loss_dict = {'loss': current_loss_epoch}

                loss_dict['acc'] = accuracy_score(y_train, np.round(self.forward_hard(X_train)))
                if valid_data is not None:
                    loss_dict['val_loss'] = self.loss(valid_data[1], self.forward(valid_data[0]))
                    loss_dict['val_acc'] = accuracy_score(valid_data[1], np.round(self.forward_hard(valid_data[0])))

                self.plotlosses.update(loss_dict)#({'acc': 0.0, 'val_acc': 0.0, 'loss': np.mean(loss_list), 'val_loss': 0.0})
                self.plotlosses.send()            

            if current_loss_epoch < minimum_loss_epoch:
                minimum_loss_epoch = current_loss_epoch
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                
            if epochs_without_improvement >= early_stopping_epochs:
                break
    
    
    
    @tf.function(jit_compile=True)                    
    def forward(self, X):
        X = tf.dtypes.cast(tf.convert_to_tensor(X), tf.float32)       

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_             

        paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

        #split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
        #function_values_dhdt = tf.reshape(tf.constant([], tf.float32), shape=(0,)) #[]
        #function_values_dhdt = tf.zeros(shape=(X.shape[0],)) #[]
        #entry_index = 0
        #for entry in tf.unstack(X):
            


        def process(entry):
            result = 0
            for leaf_index, path in enumerate(paths):
                path_result_left = 1
                path_result_right = 1
                for internal_node_index in path: 
                    #tf.print(path, internal_node_index)
                    #split_index = tfa.seq2seq.hardmax(self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])
                    split_index = tfa.activations.sparsemax(100 * self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])                        
                    
                    #split_values = tf.sigmoid(self.split_values)[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    split_values = sigmoid_squeeze(self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]-0.5, self.squeeze_factor)
                    #split_values = self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    
                    internal_node_split_value = tf.reduce_sum(split_index*split_values)
                    respective_input_value = tf.reduce_sum(split_index*entry)


                    #tf.print('internal_node_split_value', internal_node_split_value)
                    #tf.print('respective_input_value', respective_input_value)

                    #split_decision = tf.keras.activations.relu(tf.math.sign(respective_input_value - internal_node_split_value - 0.5))
                    split_decision = tf.sigmoid(100 * (respective_input_value - internal_node_split_value - 0.5))

                    #tf.print('split_decision', split_decision)


                    path_result_left *= split_decision
                    path_result_right *= (1 - split_decision)

                    #tf.print('path_result_left', path_result_left)
                    #tf.print('path_result_right', path_result_right)

                #tf.print('path_result_left', path_result_left, summarize=-1)
                #tf.print('path_result_right', path_result_right, summarize=-1)
                #tf.print('tf.sigmoid(self.leaf_classes_array)', tf.sigmoid(self.leaf_classes_array), summarize=-1)
                
                #result += tf.sigmoid(self.leaf_classes_array)[leaf_index*2] * path_result_left + tf.sigmoid(self.leaf_classes_array)[leaf_index*2+1] * path_result_right
                result += self.leaf_classes_array[leaf_index*2] * path_result_left + self.leaf_classes_array[leaf_index*2+1] * path_result_right
                #tf.print(result)
            return result
            #tf.print('RESULT', result)

            #function_values_dhdt.append(result)
            #tf.autograph.experimental.set_loop_options(
            #        shape_invariants=[(function_values_dhdt, tf.TensorShape([None]))]
            #    )            
            #function_values_dhdt = tf.concat([function_values_dhdt, [result]], 0)
            #function_values_dhdt[entry_index] = result
            #entry_index += 1
        #function_values_dhdt = tf.stack(function_values_dhdt)
        #tf.print('function_values_dhdt', function_values_dhdt)

        function_values_dhdt = tf.vectorized_map(process, X)
        
        return function_values_dhdt  
           
    
    @tf.function(jit_compile=True)                    
    def forward_hard(self, X):
        X = tf.dtypes.cast(tf.convert_to_tensor(X), tf.float32)       

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_             

        paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

        #split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
        #function_values_dhdt = tf.reshape(tf.constant([], tf.float32), shape=(0,)) #[]
        #function_values_dhdt = tf.zeros(shape=(X.shape[0],)) #[]
        #entry_index = 0
        #for entry in tf.unstack(X):
            


        def process(entry):
            result = 0
            for leaf_index, path in enumerate(paths):
                path_result_left = 1
                path_result_right = 1
                for internal_node_index in path: 
                    #tf.print(path, internal_node_index)
                    split_index = tfa.seq2seq.hardmax(self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])
                    #split_index = tfa.activations.sparsemax(10 * self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])                        
                    
                    #split_values = tf.sigmoid(self.split_values)[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    split_values = sigmoid_squeeze(self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]-0.5, self.squeeze_factor)
                    #split_values = self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    
                    internal_node_split_value = tf.reduce_sum(split_index*split_values)
                    respective_input_value = tf.reduce_sum(split_index*entry)


                    #tf.print('internal_node_split_value', internal_node_split_value)
                    #tf.print('respective_input_value', respective_input_value)

                    #split_decision = tf.keras.activations.relu(tf.math.sign(respective_input_value - internal_node_split_value - 0.5))
                    #split_decision = tf.sigmoid(100 * (respective_input_value - internal_node_split_value - 0.5))
                    split_decision = tf.round(tf.sigmoid(respective_input_value - internal_node_split_value - 0.5))
                    #tf.print('split_decision', split_decision)


                    path_result_left *= split_decision
                    path_result_right *= (1 - split_decision)

                    #tf.print('path_result_left', path_result_left)
                    #tf.print('path_result_right', path_result_right)

                #tf.print('path_result_left', path_result_left, summarize=-1)
                #tf.print('path_result_right', path_result_right, summarize=-1)
                #tf.print('tf.sigmoid(self.leaf_classes_array)', tf.sigmoid(self.leaf_classes_array), summarize=-1)
                
                #result += tf.sigmoid(self.leaf_classes_array)[leaf_index*2] * path_result_left + tf.sigmoid(self.leaf_classes_array)[leaf_index*2+1] * path_result_right
                result += self.leaf_classes_array[leaf_index*2] * path_result_left + self.leaf_classes_array[leaf_index*2+1] * path_result_right
                #tf.print(result)
            return result
            #tf.print('RESULT', result)

            #function_values_dhdt.append(result)
            #tf.autograph.experimental.set_loop_options(
            #        shape_invariants=[(function_values_dhdt, tf.TensorShape([None]))]
            #    )            
            #function_values_dhdt = tf.concat([function_values_dhdt, [result]], 0)
            #function_values_dhdt[entry_index] = result
            #entry_index += 1
        #function_values_dhdt = tf.stack(function_values_dhdt)
        #tf.print('function_values_dhdt', function_values_dhdt)

        function_values_dhdt = tf.vectorized_map(process, X)
        
        return function_values_dhdt  
           
        
    def predict(self, X):
        return tf.sigmoid(self.forward_hard(X))
        
    def backward(self, x,y):
        #optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)#tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
        with tf.GradientTape(persistent=True) as tape:
            predicted = self.forward(x)
            if self.loss.__name__  == 'binary_crossentropy':
                current_loss = self.loss(y, predicted, from_logits=True)
            else:
                current_loss = self.loss(y, predicted, from_logits=True)
        #tf.print('predicted', predicted)
        #tf.print('current_loss', current_loss, summarize=-1)
        grads = tape.gradient(current_loss, self.leaf_classes_array)
        self.optimizer.apply_gradients(zip([grads], [self.leaf_classes_array]))
        #tf.print('grads', grads, summarize=-1)        
        
        grads = tape.gradient(current_loss, self.split_values)
        self.optimizer.apply_gradients(zip([grads], [self.split_values]))
        #tf.print('grads', grads, summarize=-1)
        grads = tape.gradient(current_loss, self.split_index_array)
        self.optimizer.apply_gradients(zip([grads], [self.split_index_array]))
        #tf.print('grads', grads, summarize=-1)

        #                          global_step=tf.compat.v1.train.get_or_create_global_step())     
        
        return current_loss
        
    def plot(self, normalizer_list=None, path='./dt_plot.png'):
        from anytree import Node, RenderTree
        from anytree.exporter import DotExporter

        internal_node_num_ = 2 ** self.depth - 1 
        
        #split_values = self.split_values
        split_values = sigmoid_squeeze(self.split_values, self.squeeze_factor)
        split_values_list_by_internal_node = tf.split(split_values, internal_node_num_)

        split_index_array = self.split_index_array 
        split_index_list_by_internal_node = tf.split(split_index_array, internal_node_num_)         

        split_index_list_by_internal_node_max = tfa.seq2seq.hardmax(split_index_list_by_internal_node)#tfa.activations.sparsemax(split_index_list_by_internal_node)

        splits = tf.stack(tf.multiply(split_values_list_by_internal_node, split_index_list_by_internal_node_max))

        
        splits = splits.numpy()
        leaf_classes = tf.sigmoid(self.leaf_classes_array).numpy()


        if normalizer_list is not None: 
            transpose = splits.transpose()
            transpose_normalized = []
            for i, column in enumerate(transpose):
                column_new = column
                if len(column_new[column_new != 0]) != 0:
                    column_new[column_new != 0] = normalizer_list[i].inverse_transform(column[column != 0].reshape(-1, 1)).ravel()
                #column_new = normalizer_list[i].inverse_transform(column.reshape(-1, 1)).ravel()
                transpose_normalized.append(column_new)
            splits = np.array(transpose_normalized).transpose()

        splits_by_layer = []
        for i in range(self.depth+1):
            start = 2**i - 1
            end = 2**(i+1) -1
            splits_by_layer.append(splits[start:end])

        nodes = {
        }
        #tree = Tree()
        for i, splits in enumerate(splits_by_layer):
            for j, split in enumerate(splits):
                if i == 0:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, data=None)            
                else:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    parent_node_id = int(np.floor((current_node_id-1)/2))
                    parent_name = 'n' + str(parent_node_id)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)

        for j, leaf_class in enumerate(leaf_classes):
            i = self.depth
            current_node_id = int(2**i - 1 + j)
            name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
            parent_node_id = int(np.floor((current_node_id-1)/2))
            parent_name = 'n' + str(parent_node_id)
            #split_variable = np.argmax(np.abs(split))
            #split_value = np.round(split[split_variable], 3)
            split_description = str(np.round((leaf_class), 3))#'x' + str(split_variable) + ' <= '  + str(split_value)
            nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)
            #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)        

            DotExporter(nodes['n0'], nodeattrfunc=lambda node: 'label="{}"'.format(node.display_name)).to_picture(path)


        return Image(path)#, nodes#nodes#tree        

        
    

In [5]:
class DHDT(tf.Module):
    
    def __init__(
            self,
            depth=3,
            function_representation_type = 3,
            number_of_variables = 5,
            squeeze_factor = 5,
            learning_rate=1e-3,
            loss='binary_crossentropy',#'mae',
            optimizer = 'adam',
            random_seed=42,
            verbosity=1):    
        
        
        self.depth = depth
        self.learning_rate = learning_rate
        self.loss = tf.keras.losses.get(loss)
        self.seed = random_seed
        self.verbosity = verbosity
        self.function_representation_type = function_representation_type
        self.number_of_variables = number_of_variables
        self.squeeze_factor = squeeze_factor
        
        self.internal_node_num_ = 2 ** self.depth - 1 
        self.leaf_node_num_ = 2 ** self.depth
        
        tf.random.set_seed(self.seed)
        
        function_representation_length = ( 
          (2 ** self.depth - 1) * 2 + (2 ** self.depth)  if self.function_representation_type == 1 
          else (2 ** self.depth - 1) + ((2 ** self.depth - 1) * self.number_of_variables) + (2 ** self.depth) if self.function_representation_type == 2 
          else ((2 ** self.depth - 1) * self.number_of_variables * 2) + (2 ** self.depth)  if self.function_representation_type >= 3 
          else None
                                      )        
        
        self.dt_params =  tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(function_representation_length,)),
                                      trainable=True,
                                      name='dt_params')
        
        tf.print(self.dt_params)
        
        maximum_depth = self.depth
        leaf_node_num_ = 2 ** maximum_depth
        internal_node_num_ = 2 ** maximum_depth - 1
        
        #dt_params_activation = self.dt_params#self.apply_activation(self.dt_params)
        
        #internal_nodes, leaf_nodes = self.get_shaped_parameters_for_decision_tree(dt_params_activation)

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_         
        
        self.split_values = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_values_num_params,)),
                                      trainable=True,
                                      name='split_values')
        #tf.sigmoid(self.dt_params[:split_values_num_params])
        self.split_index_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_index_num_params,)),
                                      trainable=True,
                                      name='split_index_array')
        #self.dt_params[split_values_num_params:split_values_num_params+split_index_num_params]    
        self.leaf_classes_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(leaf_classes_num_params,)),
                                      trainable=True,
                                      name='leaf_classes_array')
        #tf.sigmoid(self.dt_params[split_values_num_params+split_index_num_params:])        
        
        self.optimizer = tf.keras.optimizers.get(optimizer)
        self.optimizer.learning_rate = self.learning_rate
                
    def fit(self, X, y, batch_size=32, epochs=100, early_stopping_epochs=5):
        
        minimum_loss_epoch = np.inf
        epochs_without_improvement = 0        
        
        for current_epoch in tqdm(range(epochs)):
            tf.random.set_seed(self.seed + current_epoch)
            X = tf.random.shuffle(X, seed=self.seed + current_epoch)
            tf.random.set_seed(self.seed + current_epoch)
            y = tf.random.shuffle(y, seed=self.seed + current_epoch)
            
            loss_list = []
            for index, (X_batch, y_batch) in enumerate(zip(make_batch(X, batch_size), make_batch(y, batch_size))):
                current_loss = self.backward(X_batch, y_batch)
                loss_list.append(float(current_loss))
                
                if self.verbosity >= 2:
                    batch_idx = (index+1)*batch_size
                    msg = "Epoch: {:02d} | Batch: {:03d} | Loss: {:.5f} |"
                    print(msg.format(current_epoch, batch_idx, current_loss))                   
                  
            if self.verbosity > 0:    
                msg = "Epoch: {:02d} | Loss: {:.5f} |"
                print(msg.format(current_epoch, np.mean(loss_list)))              
            
            current_loss_epoch = np.mean(loss_list)

            if current_loss_epoch < minimum_loss_epoch:
                minimum_loss_epoch = current_loss_epoch
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                
            if epochs_without_improvement >= early_stopping_epochs:
                break
    
    
    
    @tf.function(jit_compile=True)                    
    def forward(self, X):
        X = tf.dtypes.cast(tf.convert_to_tensor(X), tf.float32)       

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_             

        paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

        #split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
        #function_values_dhdt = tf.reshape(tf.constant([], tf.float32), shape=(0,)) #[]
        #function_values_dhdt = tf.zeros(shape=(X.shape[0],)) #[]
        #entry_index = 0
        #for entry in tf.unstack(X):
            


        def process(entry):
            result = 0
            for leaf_index, path in enumerate(paths):
                path_result_left = 1
                path_result_right = 1
                for internal_node_index in path: 
                    #tf.print(path, internal_node_index)
                    #split_index = tfa.seq2seq.hardmax(self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])
                    split_index = tfa.activations.sparsemax(10 * self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])                        
                    
                    #split_values = tf.sigmoid(self.split_values)[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    split_values = sigmoid_squeeze(self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]-0.5, self.squeeze_factor)
                    #split_values = self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    
                    internal_node_split_value = tf.reduce_sum(split_index*split_values)
                    respective_input_value = tf.reduce_sum(split_index*entry)


                    #tf.print('internal_node_split_value', internal_node_split_value)
                    #tf.print('respective_input_value', respective_input_value)

                    #split_decision = tf.keras.activations.relu(tf.math.sign(respective_input_value - internal_node_split_value - 0.5))
                    split_decision = tf.sigmoid(100 * (respective_input_value - internal_node_split_value - 0.5))

                    #tf.print('split_decision', split_decision)


                    path_result_left *= split_decision
                    path_result_right *= (1 - split_decision)

                    #tf.print('path_result_left', path_result_left)
                    #tf.print('path_result_right', path_result_right)

                #tf.print('path_result_left', path_result_left, summarize=-1)
                #tf.print('path_result_right', path_result_right, summarize=-1)
                #tf.print('tf.sigmoid(self.leaf_classes_array)', tf.sigmoid(self.leaf_classes_array), summarize=-1)
                
                #result += tf.sigmoid(self.leaf_classes_array)[leaf_index*2] * path_result_left + tf.sigmoid(self.leaf_classes_array)[leaf_index*2+1] * path_result_right
                result += self.leaf_classes_array[leaf_index*2] * path_result_left + self.leaf_classes_array[leaf_index*2+1] * path_result_right
                #tf.print(result)
            return result
            #tf.print('RESULT', result)

            #function_values_dhdt.append(result)
            #tf.autograph.experimental.set_loop_options(
            #        shape_invariants=[(function_values_dhdt, tf.TensorShape([None]))]
            #    )            
            #function_values_dhdt = tf.concat([function_values_dhdt, [result]], 0)
            #function_values_dhdt[entry_index] = result
            #entry_index += 1
        #function_values_dhdt = tf.stack(function_values_dhdt)
        #tf.print('function_values_dhdt', function_values_dhdt)

        function_values_dhdt = tf.vectorized_map(process, X)
        
        return function_values_dhdt  
           
    def predict(self, X):
        return tf.sigmoid(self.forward(X))
        
    def backward(self, x,y):
        #optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)#tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
        with tf.GradientTape(persistent=True) as tape:
            #tape.watch(self.dt_params)
            predicted = self.forward(x)
            current_loss = self.loss(y, predicted, from_logits=True)
            
        #tf.print('predicted', predicted)
        #tf.print('current_loss', current_loss, summarize=-1)
        #tf.print('self.dt_params', self.dt_params, summarize=-1)
        grads = tape.gradient(current_loss, self.leaf_classes_array)
        self.optimizer.apply_gradients(zip([grads], [self.leaf_classes_array]))
        #tf.print('grads', grads, summarize=-1)        
        
        grads = tape.gradient(current_loss, self.split_values)
        self.optimizer.apply_gradients(zip([grads], [self.split_values]))
        #tf.print('grads', grads, summarize=-1)
        grads = tape.gradient(current_loss, self.split_index_array)
        self.optimizer.apply_gradients(zip([grads], [self.split_index_array]))
        #tf.print('grads', grads, summarize=-1)

        #optimizer.apply_gradients(zip(grads, self.dt_params),
        #                          global_step=tf.compat.v1.train.get_or_create_global_step())     
        
        #self.optimizer.apply_gradients(zip([grads], [self.dt_params]))
        #tf.print('self.dt_params', self.dt_params, summarize=-1)
        return current_loss
        
    def plot(self, normalizer_list=None, path='./dt_plot.png'):
        from anytree import Node, RenderTree
        from anytree.exporter import DotExporter

        internal_node_num_ = 2 ** self.depth - 1 
        
        #split_values = self.split_values
        split_values = sigmoid_squeeze(self.split_values, self.squeeze_factor)
        split_values_list_by_internal_node = tf.split(split_values, internal_node_num_)

        split_index_array = self.split_index_array 
        split_index_list_by_internal_node = tf.split(split_index_array, internal_node_num_)         

        split_index_list_by_internal_node_max = tfa.seq2seq.hardmax(split_index_list_by_internal_node)#tfa.activations.sparsemax(split_index_list_by_internal_node)

        splits = tf.stack(tf.multiply(split_values_list_by_internal_node, split_index_list_by_internal_node_max))

        
        splits = splits.numpy()
        leaf_classes = tf.sigmoid(self.leaf_classes_array).numpy()


        if normalizer_list is not None: 
            transpose = splits.transpose()
            transpose_normalized = []
            for i, column in enumerate(transpose):
                column_new = column
                if len(column_new[column_new != 0]) != 0:
                    column_new[column_new != 0] = normalizer_list[i].inverse_transform(column[column != 0].reshape(-1, 1)).ravel()
                #column_new = normalizer_list[i].inverse_transform(column.reshape(-1, 1)).ravel()
                transpose_normalized.append(column_new)
            splits = np.array(transpose_normalized).transpose()

        splits_by_layer = []
        for i in range(self.depth+1):
            start = 2**i - 1
            end = 2**(i+1) -1
            splits_by_layer.append(splits[start:end])

        nodes = {
        }
        #tree = Tree()
        for i, splits in enumerate(splits_by_layer):
            for j, split in enumerate(splits):
                if i == 0:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, data=None)            
                else:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    parent_node_id = int(np.floor((current_node_id-1)/2))
                    parent_name = 'n' + str(parent_node_id)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)

        for j, leaf_class in enumerate(leaf_classes):
            i = self.depth
            current_node_id = int(2**i - 1 + j)
            name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
            parent_node_id = int(np.floor((current_node_id-1)/2))
            parent_name = 'n' + str(parent_node_id)
            #split_variable = np.argmax(np.abs(split))
            #split_value = np.round(split[split_variable], 3)
            split_description = str(np.round((leaf_class), 3))#'x' + str(split_variable) + ' <= '  + str(split_value)
            nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)
            #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)        

            DotExporter(nodes['n0'], nodeattrfunc=lambda node: 'label="{}"'.format(node.display_name)).to_picture(path)


        return Image(path)#, nodes#nodes#tree  

In [12]:
class DHDT(tf.Module):
    
    def __init__(
            self,
            depth=3,
            number_of_variables = 5,
            squeeze_factor = 5,
            learning_rate=1e-3,
            loss='binary_crossentropy',#'mae',
            optimizer = 'adam',
            random_seed=42,
            verbosity=1):    
        
        
        self.depth = depth
        self.learning_rate = learning_rate
        self.loss = tf.keras.losses.get(loss)
        self.seed = random_seed
        self.verbosity = verbosity
        self.number_of_variables = number_of_variables
        self.squeeze_factor = squeeze_factor
        
        self.internal_node_num_ = 2 ** self.depth - 1 
        self.leaf_node_num_ = 2 ** self.depth
        
        tf.random.set_seed(self.seed)
        
        function_representation_length = ((2 ** self.depth - 1) * self.number_of_variables * 2) + (2 ** self.depth)
        
        #dt_params =  tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(function_representation_length,)),
        #                              trainable=True,
        #                              name='dt_params')
        
        
        maximum_depth = self.depth
        leaf_node_num_ = 2 ** maximum_depth
        internal_node_num_ = 2 ** maximum_depth - 1
        
        #dt_params_activation = self.dt_params#self.apply_activation(self.dt_params)
        
        #internal_nodes, leaf_nodes = self.get_shaped_parameters_for_decision_tree(dt_params_activation)

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_         
        
        self.split_values = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_values_num_params,)),
                                      trainable=True,
                                      name='split_values')
        tf.print(self.split_values, summarize=-1)
        self.split_index_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(split_index_num_params,)),
                                      trainable=True,
                                      name='split_index_array')
        tf.print(self.split_index_array, summarize=-1)
        self.leaf_classes_array = tf.Variable(tf.keras.initializers.GlorotUniform(seed=self.seed)(shape=(leaf_classes_num_params,)),
                                      trainable=True,
                                      name='leaf_classes_array')
        tf.print(self.leaf_classes_array, summarize=-1)
        
        self.optimizer = tf.keras.optimizers.get(optimizer)
        self.optimizer.learning_rate = self.learning_rate  
        
    def fit(self, X, y, batch_size=32, epochs=100, early_stopping_epochs=5):
        
        minimum_loss_epoch = np.inf
        epochs_without_improvement = 0        
        
        for current_epoch in tqdm(range(epochs)):
            tf.random.set_seed(self.seed + current_epoch)
            X = tf.random.shuffle(X, seed=self.seed + current_epoch)
            tf.random.set_seed(self.seed + current_epoch)
            y = tf.random.shuffle(y, seed=self.seed + current_epoch)
            
            loss_list = []
            for index, (X_batch, y_batch) in enumerate(zip(make_batch(X, batch_size), make_batch(y, batch_size))):
                current_loss = self.backward(X_batch, y_batch)
                loss_list.append(float(current_loss))
                
                if self.verbosity >= 2:
                    batch_idx = (index+1)*batch_size
                    msg = "Epoch: {:02d} | Batch: {:03d} | Loss: {:.5f} |"
                    print(msg.format(current_epoch, batch_idx, current_loss))                   
                  
            if self.verbosity > 0:    
                msg = "Epoch: {:02d} | Loss: {:.5f} |"
                print(msg.format(current_epoch, np.mean(loss_list)))              
            
            current_loss_epoch = np.mean(loss_list)

            if current_loss_epoch < minimum_loss_epoch:
                minimum_loss_epoch = current_loss_epoch
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                
            if epochs_without_improvement >= early_stopping_epochs:
                break
    
    
    
    @tf.function(jit_compile=True)                    
    def forward(self, X):
        X = tf.dtypes.cast(tf.convert_to_tensor(X), tf.float32)       

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_             

        paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

        #split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
        #function_values_dhdt = tf.reshape(tf.constant([], tf.float32), shape=(0,)) #[]
        #function_values_dhdt = tf.zeros(shape=(X.shape[0],)) #[]
        #entry_index = 0
        #for entry in tf.unstack(X):
            


        def process(entry):
            result = 0
            for leaf_index, path in enumerate(paths):
                path_result_left = 1
                path_result_right = 1
                for internal_node_index in path: 
                    #tf.print(path, internal_node_index)
                    #split_index = tfa.seq2seq.hardmax(self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])
                    split_index = tfa.activations.sparsemax(100 * self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])                        
                    
                    #split_values = tf.sigmoid(self.split_values)[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    split_values = sigmoid_squeeze(self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]-0.5, self.squeeze_factor)
                    #split_values = self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    
                    internal_node_split_value = tf.reduce_sum(split_index*split_values)
                    respective_input_value = tf.reduce_sum(split_index*entry)


                    #tf.print('internal_node_split_value', internal_node_split_value)
                    #tf.print('respective_input_value', respective_input_value)

                    #split_decision = tf.keras.activations.relu(tf.math.sign(respective_input_value - internal_node_split_value - 0.5))
                    split_decision = tf.sigmoid(100 * (respective_input_value - internal_node_split_value - 0.5))

                    #tf.print('split_decision', split_decision)


                    path_result_left *= split_decision
                    path_result_right *= (1 - split_decision)

                    #tf.print('path_result_left', path_result_left)
                    #tf.print('path_result_right', path_result_right)

                #tf.print('path_result_left', path_result_left, summarize=-1)
                #tf.print('path_result_right', path_result_right, summarize=-1)
                #tf.print('tf.sigmoid(self.leaf_classes_array)', tf.sigmoid(self.leaf_classes_array), summarize=-1)
                
                #result += tf.sigmoid(self.leaf_classes_array)[leaf_index*2] * path_result_left + tf.sigmoid(self.leaf_classes_array)[leaf_index*2+1] * path_result_right
                result += self.leaf_classes_array[leaf_index*2] * path_result_left + self.leaf_classes_array[leaf_index*2+1] * path_result_right
                #tf.print(result)
            return result
            #tf.print('RESULT', result)

            #function_values_dhdt.append(result)
            #tf.autograph.experimental.set_loop_options(
            #        shape_invariants=[(function_values_dhdt, tf.TensorShape([None]))]
            #    )            
            #function_values_dhdt = tf.concat([function_values_dhdt, [result]], 0)
            #function_values_dhdt[entry_index] = result
            #entry_index += 1
        #function_values_dhdt = tf.stack(function_values_dhdt)
        #tf.print('function_values_dhdt', function_values_dhdt)

        function_values_dhdt = tf.vectorized_map(process, X)
        
        return function_values_dhdt  
           
    
    @tf.function(jit_compile=True)                    
    def forward_hard(self, X):
        X = tf.dtypes.cast(tf.convert_to_tensor(X), tf.float32)       

        internal_node_num_ = self.internal_node_num_
        leaf_node_num_ = self.leaf_node_num_

        split_values_num_params = self.number_of_variables * internal_node_num_
        split_index_num_params = self.number_of_variables * internal_node_num_
        leaf_classes_num_params = self.leaf_node_num_             

        paths = [[0,1,3], [0,1,4], [0,2,5], [0,2,6]]

        #split_index_array = tfa.seq2seq.hardmax(tf.reshape(split_index_array, (internal_node_num_, -1)))
        #function_values_dhdt = tf.reshape(tf.constant([], tf.float32), shape=(0,)) #[]
        #function_values_dhdt = tf.zeros(shape=(X.shape[0],)) #[]
        #entry_index = 0
        #for entry in tf.unstack(X):
            


        def process(entry):
            result = 0
            for leaf_index, path in enumerate(paths):
                path_result_left = 1
                path_result_right = 1
                for internal_node_index in path: 
                    #tf.print(path, internal_node_index)
                    split_index = tfa.seq2seq.hardmax(self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])
                    #split_index = tfa.activations.sparsemax(10 * self.split_index_array[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)])                        
                    
                    #split_values = tf.sigmoid(self.split_values)[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    split_values = sigmoid_squeeze(self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]-0.5, self.squeeze_factor)
                    #split_values = self.split_values[self.number_of_variables*internal_node_index:self.number_of_variables*(internal_node_index+1)]
                    
                    internal_node_split_value = tf.reduce_sum(split_index*split_values)
                    respective_input_value = tf.reduce_sum(split_index*entry)


                    #tf.print('internal_node_split_value', internal_node_split_value)
                    #tf.print('respective_input_value', respective_input_value)

                    #split_decision = tf.keras.activations.relu(tf.math.sign(respective_input_value - internal_node_split_value - 0.5))
                    #split_decision = tf.sigmoid(100 * (respective_input_value - internal_node_split_value - 0.5))
                    split_decision = tf.round(tf.sigmoid(respective_input_value - internal_node_split_value - 0.5))
                    #tf.print('split_decision', split_decision)


                    path_result_left *= split_decision
                    path_result_right *= (1 - split_decision)

                    #tf.print('path_result_left', path_result_left)
                    #tf.print('path_result_right', path_result_right)

                #tf.print('path_result_left', path_result_left, summarize=-1)
                #tf.print('path_result_right', path_result_right, summarize=-1)
                #tf.print('tf.sigmoid(self.leaf_classes_array)', tf.sigmoid(self.leaf_classes_array), summarize=-1)
                
                #result += tf.sigmoid(self.leaf_classes_array)[leaf_index*2] * path_result_left + tf.sigmoid(self.leaf_classes_array)[leaf_index*2+1] * path_result_right
                result += self.leaf_classes_array[leaf_index*2] * path_result_left + self.leaf_classes_array[leaf_index*2+1] * path_result_right
                #tf.print(result)
            return result
            #tf.print('RESULT', result)

            #function_values_dhdt.append(result)
            #tf.autograph.experimental.set_loop_options(
            #        shape_invariants=[(function_values_dhdt, tf.TensorShape([None]))]
            #    )            
            #function_values_dhdt = tf.concat([function_values_dhdt, [result]], 0)
            #function_values_dhdt[entry_index] = result
            #entry_index += 1
        #function_values_dhdt = tf.stack(function_values_dhdt)
        #tf.print('function_values_dhdt', function_values_dhdt)

        function_values_dhdt = tf.vectorized_map(process, X)
        
        return function_values_dhdt  
           
        
    def predict(self, X):
        return tf.sigmoid(self.forward_hard(X))
        
    def backward(self, x,y):
        #optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)#tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
        with tf.GradientTape(persistent=True) as tape:
            predicted = self.forward(x)
            if self.loss.__name__  == 'binary_crossentropy':
                current_loss = self.loss(y, predicted, from_logits=True)
            else:
                current_loss = self.loss(y, predicted, from_logits=True)
        #tf.print('predicted', predicted)
        #tf.print('current_loss', current_loss, summarize=-1)
        grads = tape.gradient(current_loss, self.leaf_classes_array)
        self.optimizer.apply_gradients(zip([grads], [self.leaf_classes_array]))
        #tf.print('grads', grads, summarize=-1)        
        
        grads = tape.gradient(current_loss, self.split_values)
        self.optimizer.apply_gradients(zip([grads], [self.split_values]))
        #tf.print('grads', grads, summarize=-1)
        grads = tape.gradient(current_loss, self.split_index_array)
        self.optimizer.apply_gradients(zip([grads], [self.split_index_array]))
        #tf.print('grads', grads, summarize=-1)

        #                          global_step=tf.compat.v1.train.get_or_create_global_step())     
        
        return current_loss
        
    def plot(self, normalizer_list=None, path='./dt_plot.png'):
        from anytree import Node, RenderTree
        from anytree.exporter import DotExporter

        internal_node_num_ = 2 ** self.depth - 1 
        
        #split_values = self.split_values
        split_values = sigmoid_squeeze(self.split_values, self.squeeze_factor)
        split_values_list_by_internal_node = tf.split(split_values, internal_node_num_)

        split_index_array = self.split_index_array 
        split_index_list_by_internal_node = tf.split(split_index_array, internal_node_num_)         

        split_index_list_by_internal_node_max = tfa.seq2seq.hardmax(split_index_list_by_internal_node)#tfa.activations.sparsemax(split_index_list_by_internal_node)

        splits = tf.stack(tf.multiply(split_values_list_by_internal_node, split_index_list_by_internal_node_max))

        
        splits = splits.numpy()
        leaf_classes = tf.sigmoid(self.leaf_classes_array).numpy()


        if normalizer_list is not None: 
            transpose = splits.transpose()
            transpose_normalized = []
            for i, column in enumerate(transpose):
                column_new = column
                if len(column_new[column_new != 0]) != 0:
                    column_new[column_new != 0] = normalizer_list[i].inverse_transform(column[column != 0].reshape(-1, 1)).ravel()
                #column_new = normalizer_list[i].inverse_transform(column.reshape(-1, 1)).ravel()
                transpose_normalized.append(column_new)
            splits = np.array(transpose_normalized).transpose()

        splits_by_layer = []
        for i in range(self.depth+1):
            start = 2**i - 1
            end = 2**(i+1) -1
            splits_by_layer.append(splits[start:end])

        nodes = {
        }
        #tree = Tree()
        for i, splits in enumerate(splits_by_layer):
            for j, split in enumerate(splits):
                if i == 0:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, data=None)            
                else:
                    current_node_id = int(2**i - 1 + j)
                    name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
                    parent_node_id = int(np.floor((current_node_id-1)/2))
                    parent_name = 'n' + str(parent_node_id)
                    split_variable = np.argmax(np.abs(split))
                    split_value = np.round(split[split_variable], 3)
                    split_description = 'x' + str(split_variable) + ' <= '  + str(split_value)

                    nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)

                    #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)

        for j, leaf_class in enumerate(leaf_classes):
            i = self.depth
            current_node_id = int(2**i - 1 + j)
            name = 'n' + str(current_node_id)#'l' + str(i) + 'n' + str(j)
            parent_node_id = int(np.floor((current_node_id-1)/2))
            parent_name = 'n' + str(parent_node_id)
            #split_variable = np.argmax(np.abs(split))
            #split_value = np.round(split[split_variable], 3)
            split_description = str(np.round((leaf_class), 3))#'x' + str(split_variable) + ' <= '  + str(split_value)
            nodes[name] = Node(name=name, parent=nodes[parent_name], display_name=split_description)
            #tree.create_node(tag=split_description, identifier=name, parent=parent_name, data=None)        

            DotExporter(nodes['n0'], nodeattrfunc=lambda node: 'label="{}"'.format(node.display_name)).to_picture(path)


        return Image(path)#, nodes#nodes#tree        

        
    

In [7]:
X, y = make_classification(
    n_samples=10_000, n_features=5, n_informative=2, n_redundant=2, random_state=42
)

#todo: anpassen, dass nur basierend auf train data normalized
X, normalizer_list = normalize_real_world_data(X)

train_samples = 1_000#1000  # Samples used for training the models


X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    shuffle=False,
    test_size=10_000 - train_samples,
    random_state=42
)

In [8]:
model_sklearn = DecisionTreeClassifier(max_depth=3, random_state=42)

model_sklearn.fit(X_train, y_train)

model_sklearn.score(X_test, y_test)

0.8867777777777778

In [13]:
model_dhdt = DHDT(
            depth=3,
            number_of_variables = 5,
            learning_rate=1e-3,
            squeeze_factor = 5,
            loss='binary_crossentropy',#'binary_crossentropy',
            random_seed=40,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=500, early_stopping_epochs=20)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

[-0.0153064132 -0.0897364467 -0.255949974 0.102142185 -0.0305482149 0.0117638409 -0.0961396396 -0.0327639878 -0.00911542773 -0.276941836 -0.288011432 -0.149366185 0.0131245553 0.0817540586 0.171625912 0.0424082279 -0.147876471 0.215894341 -0.150019452 0.277546763 -0.122481659 -0.0410619676 0.288627446 0.134136051 0.21903801 0.232999921 -0.234535635 0.0170893669 -0.0714436173 -0.206922442 -0.241696537 -0.194186896 -0.0684360713 -0.195885241 -0.151363343]
[0.189929694 -0.124219164 -0.239768669 -0.128166318 0.201738775 0.202801287 -0.085945785 0.107319593 -0.23032847 0.0156517923 0.281928062 -0.253920019 -0.171836779 -0.241361693 -0.0246061087 0.24662739 -0.0789196789 -0.230450898 -0.0202791095 0.225266933 0.24908185 0.0753646791 0.155470759 0.142386407 -0.187254876 -0.0962447673 0.218839943 -0.243292749 -0.0288981497 0.156297654 -0.245937482 -0.0587791353 0.0103982389 -0.290770262 0.283521712]
[-0.107723773 0.0664796233 -0.384002119 0.42544347 -0.265771687 -0.113815188 -0.606380284 -0.57

  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 00 | Loss: 0.76033 |
Epoch: 01 | Loss: 0.73876 |
Epoch: 02 | Loss: 0.71627 |
Epoch: 03 | Loss: 0.69645 |
Epoch: 04 | Loss: 0.67766 |
Epoch: 05 | Loss: 0.66109 |
Epoch: 06 | Loss: 0.64623 |
Epoch: 07 | Loss: 0.63128 |
Epoch: 08 | Loss: 0.61912 |
Epoch: 09 | Loss: 0.60673 |
Epoch: 10 | Loss: 0.59432 |
Epoch: 11 | Loss: 0.58153 |
Epoch: 12 | Loss: 0.57466 |
Epoch: 13 | Loss: 0.56440 |
Epoch: 14 | Loss: 0.55809 |
Epoch: 15 | Loss: 0.55187 |
Epoch: 16 | Loss: 0.54527 |
Epoch: 17 | Loss: 0.53455 |
Epoch: 18 | Loss: 0.52688 |
Epoch: 19 | Loss: 0.51489 |
Epoch: 20 | Loss: 0.50788 |
Epoch: 21 | Loss: 0.50093 |
Epoch: 22 | Loss: 0.49079 |
Epoch: 23 | Loss: 0.48354 |
Epoch: 24 | Loss: 0.48060 |
Epoch: 25 | Loss: 0.47382 |
Epoch: 26 | Loss: 0.46606 |
Epoch: 27 | Loss: 0.46154 |
Epoch: 28 | Loss: 0.45523 |
Epoch: 29 | Loss: 0.45242 |
Epoch: 30 | Loss: 0.44982 |
Epoch: 31 | Loss: 0.44446 |
Epoch: 32 | Loss: 0.44293 |
Epoch: 33 | Loss: 0.43974 |
Epoch: 34 | Loss: 0.43670 |
Epoch: 35 | Loss: 0.

In [14]:
model_dhdt = DHDT(
            depth=3,
            number_of_variables = 5,
            learning_rate=1e-3,
            squeeze_factor = 5,
            loss='binary_crossentropy',#'binary_crossentropy',
            random_seed=41,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=500, early_stopping_epochs=20)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

[-0.220465124 -0.255536348 0.0976486206 -0.00916129351 -0.261932015 0.0230374038 0.288691342 0.170627534 -0.0870554894 0.0270647407 0.110997587 0.0621287823 0.132869 0.00538137555 0.148039192 -0.0770659447 0.265260041 0.0907332 -0.18583867 -0.235048532 0.178994954 -0.00596177578 -0.0612244904 0.125658631 0.137933135 0.250172377 -0.129651144 0.209979832 -0.282800853 0.132009596 -0.253193587 0.117978752 0.0385412872 0.142327785 0.246568859]
[0.126232684 -0.0315164328 -0.292304039 -0.138535246 0.243391097 -0.034647882 -0.114259422 -0.0921471864 0.264761925 0.0376400054 -0.0463365316 -0.14340657 -0.104664683 0.204574943 -0.118991494 0.0154892206 -0.154772684 -0.113403723 -0.278707117 0.134550452 0.209358096 -0.1278 0.00668478 -0.142500341 -0.109868616 -0.00778451562 0.0010073185 0.228656173 -0.0195489824 -0.0541208386 0.047627449 0.287532687 0.0835378468 0.209075212 -0.138658658]
[0.0462907553 -0.342950016 0.22330308 0.216890454 0.582392871 -0.526290298 -0.610656202 -0.54491812]


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 00 | Loss: 0.71917 |
Epoch: 01 | Loss: 0.71401 |
Epoch: 02 | Loss: 0.70973 |
Epoch: 03 | Loss: 0.70609 |
Epoch: 04 | Loss: 0.70351 |
Epoch: 05 | Loss: 0.70050 |
Epoch: 06 | Loss: 0.69879 |
Epoch: 07 | Loss: 0.69634 |
Epoch: 08 | Loss: 0.69463 |
Epoch: 09 | Loss: 0.69227 |
Epoch: 10 | Loss: 0.69054 |
Epoch: 11 | Loss: 0.68854 |
Epoch: 12 | Loss: 0.68682 |
Epoch: 13 | Loss: 0.68586 |
Epoch: 14 | Loss: 0.68366 |
Epoch: 15 | Loss: 0.68248 |
Epoch: 16 | Loss: 0.68033 |
Epoch: 17 | Loss: 0.67910 |
Epoch: 18 | Loss: 0.67734 |
Epoch: 19 | Loss: 0.67525 |
Epoch: 20 | Loss: 0.67403 |
Epoch: 21 | Loss: 0.67166 |
Epoch: 22 | Loss: 0.67065 |
Epoch: 23 | Loss: 0.66849 |
Epoch: 24 | Loss: 0.66705 |
Epoch: 25 | Loss: 0.66564 |
Epoch: 26 | Loss: 0.66349 |
Epoch: 27 | Loss: 0.66179 |
Epoch: 28 | Loss: 0.66071 |
Epoch: 29 | Loss: 0.65924 |
Epoch: 30 | Loss: 0.65671 |
Epoch: 31 | Loss: 0.65493 |
Epoch: 32 | Loss: 0.65332 |
Epoch: 33 | Loss: 0.65131 |
Epoch: 34 | Loss: 0.64846 |
Epoch: 35 | Loss: 0.

SystemError: PyEval_EvalFrameEx returned a result with an error set

In [9]:
model_dhdt = DHDT(
            depth=3,
            number_of_variables = 5,
            learning_rate=1e-3,
            squeeze_factor = 5,
            loss='binary_crossentropy',#'binary_crossentropy',
            random_seed=40,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=500, early_stopping_epochs=20)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

[0.10405615 -0.0325394273 -0.0746977776 0.00976884365 0.0798775554 -0.0226446092 0.199392498 -0.0884800851 -0.157206744 -0.210958183 -0.283841044 0.250602424 -0.0695579201 -0.157768637 0.203923196 0.07348001 0.08279562 0.12547636 -0.187254116 0.280626178 -0.0511720628 -0.0946338 0.0703919232 0.0186030269 0.0230322182 -0.0131372511 0.0924431384 0.149752051 -0.209223688 -0.232426018 0.0220719576 0.25241524 -0.0870755315 -0.163386226 -0.23524265]
[-0.193196684 -0.147729188 -0.171274662 0.00118607283 0.0514366925 0.269179761 -0.0124358237 -0.198199391 0.201756209 -0.0928686559 0.209533036 0.285333335 -0.23075369 0.0342013538 0.0404126644 0.258410096 0.0399357677 -2.68816948e-05 0.18228367 -0.180432796 0.224031985 -0.0702865124 0.262633741 0.130812496 0.264650702 -0.112750098 0.14068687 0.110420108 0.264017045 -0.141295969 -0.276891381 -0.0761598498 0.0672431588 0.127154201 0.223339498]
[0.579931796 -0.0352647901 0.427904189 -0.24518615 -0.0250062943 -0.300857484 0.514985263 0.309282303]


  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 00 | Loss: 0.63631 |
Epoch: 01 | Loss: 0.62735 |
Epoch: 02 | Loss: 0.61837 |
Epoch: 03 | Loss: 0.60899 |
Epoch: 04 | Loss: 0.60255 |
Epoch: 05 | Loss: 0.59557 |
Epoch: 06 | Loss: 0.58913 |
Epoch: 07 | Loss: 0.58325 |
Epoch: 08 | Loss: 0.57815 |
Epoch: 09 | Loss: 0.57276 |
Epoch: 10 | Loss: 0.56644 |
Epoch: 11 | Loss: 0.56276 |
Epoch: 12 | Loss: 0.55956 |
Epoch: 13 | Loss: 0.55407 |
Epoch: 14 | Loss: 0.55059 |
Epoch: 15 | Loss: 0.54930 |
Epoch: 16 | Loss: 0.54421 |
Epoch: 17 | Loss: 0.54221 |
Epoch: 18 | Loss: 0.54021 |
Epoch: 19 | Loss: 0.53661 |
Epoch: 20 | Loss: 0.53426 |
Epoch: 21 | Loss: 0.53008 |
Epoch: 22 | Loss: 0.52868 |
Epoch: 23 | Loss: 0.52588 |
Epoch: 24 | Loss: 0.52580 |
Epoch: 25 | Loss: 0.52182 |
Epoch: 26 | Loss: 0.51721 |
Epoch: 27 | Loss: 0.51485 |
Epoch: 28 | Loss: 0.51376 |
Epoch: 29 | Loss: 0.51153 |
Epoch: 30 | Loss: 0.50900 |
Epoch: 31 | Loss: 0.50688 |
Epoch: 32 | Loss: 0.50597 |
Epoch: 33 | Loss: 0.50317 |
Epoch: 34 | Loss: 0.50192 |
Epoch: 35 | Loss: 0.

In [10]:
model_dhdt = DHDT(
            depth=3,
            number_of_variables = 5,
            learning_rate=1e-3,
            squeeze_factor = 5,
            loss='binary_crossentropy',#'binary_crossentropy',
            random_seed=41,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=500, early_stopping_epochs=20)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

[0.230341315 -0.290073931 -0.265134513 0.0122451186 -0.234274849 0.0229974687 -0.0359121859 -0.160120264 -0.253641635 0.0109185278 0.264867 -0.0410477817 0.00220483541 0.255836725 -0.017172426 0.196480244 0.170074552 0.259053111 0.00863462687 0.206366539 -0.206700683 0.0341176689 -0.0680171251 -0.0647429228 0.120285153 -0.275357276 0.00239357352 -0.0682737082 0.102100372 0.288813055 0.0274158418 0.244256496 0.186001599 0.029227972 -0.226596504]
[0.256135583 0.0570820272 -0.140065849 -0.265346497 -0.0383561552 -0.279242098 0.114590496 -0.160129622 0.250803411 -0.226013869 -0.100860551 -0.000622838736 -0.274319649 -0.0326150954 -0.00306400657 -0.243238807 -0.0329400897 0.135204017 -0.194108516 -0.114808559 -0.128485322 -0.20707804 -0.277358353 -0.236792535 0.107991159 -0.0141676664 -0.135069653 -0.000762105 0.000439673662 -0.0071644485 0.243114769 0.0424708426 -0.104558229 -0.0628411 0.212780595]
[0.41184479 0.347100198 -0.502791405 0.129827738 0.468471467 -0.0732898712 0.269770801 0.351

  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 00 | Loss: 0.74299 |
Epoch: 01 | Loss: 0.72826 |
Epoch: 02 | Loss: 0.71319 |
Epoch: 03 | Loss: 0.69539 |
Epoch: 04 | Loss: 0.67779 |
Epoch: 05 | Loss: 0.66187 |
Epoch: 06 | Loss: 0.64740 |
Epoch: 07 | Loss: 0.63489 |
Epoch: 08 | Loss: 0.62193 |
Epoch: 09 | Loss: 0.60977 |
Epoch: 10 | Loss: 0.59580 |
Epoch: 11 | Loss: 0.58325 |
Epoch: 12 | Loss: 0.57173 |
Epoch: 13 | Loss: 0.56155 |
Epoch: 14 | Loss: 0.54962 |
Epoch: 15 | Loss: 0.54021 |
Epoch: 16 | Loss: 0.53216 |
Epoch: 17 | Loss: 0.52412 |
Epoch: 18 | Loss: 0.51492 |
Epoch: 19 | Loss: 0.50763 |
Epoch: 20 | Loss: 0.50130 |
Epoch: 21 | Loss: 0.49350 |
Epoch: 22 | Loss: 0.48797 |
Epoch: 23 | Loss: 0.48162 |
Epoch: 24 | Loss: 0.47680 |
Epoch: 25 | Loss: 0.47269 |
Epoch: 26 | Loss: 0.46577 |
Epoch: 27 | Loss: 0.46136 |
Epoch: 28 | Loss: 0.45699 |
Epoch: 29 | Loss: 0.45272 |
Epoch: 30 | Loss: 0.44699 |
Epoch: 31 | Loss: 0.44236 |
Epoch: 32 | Loss: 0.43929 |
Epoch: 33 | Loss: 0.43586 |
Epoch: 34 | Loss: 0.43399 |
Epoch: 35 | Loss: 0.

In [11]:
z

NameError: name 'z' is not defined

In [None]:
model_dhdt = DHDT(
            depth=3,
            number_of_variables = 5,
            learning_rate=1e-3,
            squeeze_factor = 5,
            loss='binary_crossentropy',#'binary_crossentropy',
            random_seed=41,
            verbosity=0)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=500, early_stopping_epochs=20)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

In [None]:
model_dhdt = DHDT(
            depth=3,
            number_of_variables = 5,
            learning_rate=1e-3,
            squeeze_factor = 5,
            loss='binary_crossentropy',#'binary_crossentropy',
            random_seed=41,
            verbosity=1)

model_dhdt.fit(X_train, y_train, batch_size=64, epochs=500, early_stopping_epochs=20)

y_test_model = model_dhdt.predict(X_test)
score_dhdt = accuracy_score(y_test, np.round(y_test_model))

print('Test Accuracy', score_dhdt)

In [None]:
plt.figure(figsize=(15,8))
image = model_dhdt.plot()
display(image)

plt.figure(figsize=(15,8))
plot_tree(model_sklearn, fontsize=10) 
plt.show()

In [None]:
X_test[:5]

In [None]:
model_dhdt.dt_params

In [None]:
tf.sigmoid(model_dhdt.dt_params)

In [None]:
model_dhdt.predict(X_test[:5])

In [None]:
plt.figure(figsize=(15,8))
image = model_dhdt.plot()
display(image)

plt.figure(figsize=(15,8))
plot_tree(model_sklearn, fontsize=10) 
plt.show()