<pre>
                |
                |
                A
              _/X\_
              \/X\/
               |V|
               |A|
               |V|
              /XXX\
              |\/\|
              |/\/|
              |\/\|
              |/\/|
              |\/\|
              |/\/|
             IIIIIII
             |\/_\/|
            /\// \\/\
            |/|   |\|
           /\X/___\X/\
          IIIIIIIIIIIII
         /`-\/XXXXX\/-`\
       /`.-'/\|/I\|/\'-.`\
      /`\-/_.-"` `"-._ \-/\
     /.-'.'           '.'-.\
   /`\-/               \-/`\
 _/`-'/`_               _`\'-`\_
`"""""""`                `""""""`

Paris Housing ANN + Decision Forest
by <b>Alin Cijov</b>
</pre>

In [None]:
import math
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import initializers

from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

<h1 id="dataset" style="color:black; background:white; border:0.5px dotted black;"> 
    <left>Dataset
        <a class="anchor-link" href="#dataset" target="_self">¶</a>
    </left>
</h1>

In [None]:
def get_data():

    df = pd.read_csv('../input/paris-housing-classification/ParisHousingClass.csv')

    category_to_idx = {v:k for k,v in enumerate(df['category'].unique())}
    idx_to_category = {k:v for k,v in enumerate(df['category'].unique())}

    features = df.iloc[:,:-2]
    target = df['category'].replace(category_to_idx)
    targets = pd.get_dummies(df['category'])

    return df, features, target, targets

In [None]:
df, features, target, targets = get_data()
df.head()

In [None]:
train, test = train_test_split(df, test_size=0.33, random_state=42)

<h1 id="tree" style="color:black; background:white; border:0.5px dotted black;"> 
    <left>Decision Forest
        <a class="anchor-link" href="#tree" target="_self">¶</a>
    </left>
</h1>

In [None]:
class NeuralDecisionTree(keras.Model):
    def __init__(self, depth, num_features, used_features_rate, num_classes):
        super(NeuralDecisionTree, self).__init__()
        self.depth = depth
        self.num_leaves = 2 ** depth
        self.num_classes = num_classes

        # Create a mask for the randomly selected features.
        num_used_features = int(num_features * used_features_rate)
        one_hot = np.eye(num_features)
        sampled_feature_indicies = np.random.choice(
            np.arange(num_features), num_used_features, replace=False
        )
        self.used_features_mask = one_hot[sampled_feature_indicies]

        # Initialize the weights of the classes in leaves.
        self.pi = tf.Variable(
            initial_value=tf.random_normal_initializer()(
                shape=[self.num_leaves, self.num_classes]
            ),
            dtype="float32",
            trainable=True,
        )

        # Initialize the stochastic routing layer.
        self.decision_fn = layers.Dense(
            units=self.num_leaves, activation="sigmoid", name="decision"
        )

    def call(self, features):
        batch_size = tf.shape(features)[0]

        # Apply the feature mask to the input features.
        features = tf.matmul(
            features, self.used_features_mask, transpose_b=True
        )  # [batch_size, num_used_features]
        # Compute the routing probabilities.
        decisions = tf.expand_dims(
            self.decision_fn(features), axis=2
        )  # [batch_size, num_leaves, 1]
        # Concatenate the routing probabilities with their complements.
        decisions = layers.concatenate(
            [decisions, 1 - decisions], axis=2
        )  # [batch_size, num_leaves, 2]

        mu = tf.ones([batch_size, 1, 1])

        begin_idx = 1
        end_idx = 2
        # Traverse the tree in breadth-first order.
        for level in range(self.depth):
            mu = tf.reshape(mu, [batch_size, -1, 1])  # [batch_size, 2 ** level, 1]
            mu = tf.tile(mu, (1, 1, 2))  # [batch_size, 2 ** level, 2]
            level_decisions = decisions[
                :, begin_idx:end_idx, :
            ]  # [batch_size, 2 ** level, 2]
            mu = mu * level_decisions  # [batch_size, 2**level, 2]
            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (level + 1)

        mu = tf.reshape(mu, [batch_size, self.num_leaves])  # [batch_size, num_leaves]
        probabilities = keras.activations.softmax(self.pi)  # [num_leaves, num_classes]
        outputs = tf.matmul(mu, probabilities)  # [batch_size, num_classes]
        return outputs

In [None]:
class NeuralDecisionForest(keras.Model):
    def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):
        super(NeuralDecisionForest, self).__init__()
        self.ensemble = []
        # Initialize the ensemble by adding NeuralDecisionTree instances.
        # Each tree will have its own randomly selected input features to use.
        for _ in range(num_trees):
            self.ensemble.append(
                NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)
            )

    def call(self, inputs):
        # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.
        batch_size = tf.shape(inputs)[0]
        outputs = tf.zeros([batch_size, num_classes])

        # Aggregate the outputs of trees in the ensemble.
        for tree in self.ensemble:
            outputs += tree(inputs)
        # Divide the outputs by the ensemble size to get the average.
        outputs /= len(self.ensemble)
        return outputs

<h1 id="params" style="color:black; background:white; border:0.5px dotted black;"> 
    <left>Hyperparameters
        <a class="anchor-link" href="#params" target="_self">¶</a>
    </left>
</h1>

In [None]:
learning_rate = 0.01
batch_size = 265
num_epochs = 10
hidden_units = [64, 64]

In [None]:
num_trees = 10
depth = 10
used_features_rate = 1.0
num_classes = targets.shape[1]
num_features = features.shape[1]

forest_model = NeuralDecisionForest(
    num_trees, depth, num_features, used_features_rate, num_classes
)

<h1 id="model" style="color:black; background:white; border:0.5px dotted black;"> 
    <left>ANN Model
        <a class="anchor-link" href="#model" target="_self">¶</a>
    </left>
</h1>

In [None]:
def get_embed_layer():
    return layers.Embedding(100000, 1, input_length=16)

In [None]:
def get_model(embed_layer):
    
    inputs = layers.Input(shape = (16,))
    x = embed_layer(inputs)
    x = layers.Flatten()(x)

    dense = layers.Dense(16, 
                       activation='relu',
                       kernel_initializer='random_uniform',
                       bias_initializer=initializers.Constant(0.1))(x)

    x = layers.Dropout(0.3)(dense)
    x = layers.Dense(50, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(20, activation = 'relu')(x)
    output = layers.Dense(2, activation = 'softmax')(x)
    
    return inputs, dense, output

<h1 id="add" style="color:black; background:white; border:0.5px dotted black;"> 
    <left>Additional Functions
        <a class="anchor-link" href="#add" target="_self">¶</a>
    </left>
</h1>

In [None]:
metrics = [tf.keras.metrics.CategoricalCrossentropy()]
loss = tf.keras.losses.CategoricalCrossentropy()

es = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0.0000001, patience=2, verbose=0,
    mode='min', baseline=None, restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=2, verbose=0,
    mode='min', min_delta=0.0000001, cooldown=0, min_lr=10e-7)

<h1 id="train" style="color:black; background:white; border:0.5px dotted black;"> 
    <left>Training
        <a class="anchor-link" href="#train" target="_self">¶</a>
    </left>
</h1>

In [None]:
N_FOLDS = 3
SEED = 2021

oof_embedding = np.zeros((train.shape[0],2))
pred_embedding = np.zeros((test.shape[0],2))

oof_forest = np.zeros((train.shape[0],2))
pred_forest = np.zeros((test.shape[0],2))

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

In [None]:
for fold, (tr_idx, ts_idx) in enumerate(skf.split(train,train.iloc[:,-1])):
    print("\n - - - -      Fold: %2d      - - - - \n" % (fold + 1))
       
    X_train = train.iloc[:,:-2].iloc[tr_idx]
    y_train = targets.iloc[tr_idx]
    X_test = train.iloc[:,:-2].iloc[ts_idx]
    y_test = targets.iloc[ts_idx]
    
    test_embeds = get_embed_layer()(test.iloc[:,:-2].values).numpy()
    
    inputs, dense, output = get_model(get_embed_layer())
    
    model_embedding = tf.keras.Model(inputs,output)
    model_forest = tf.keras.Model(inputs,forest_model(dense))
    
    # ANN
    model_embedding.compile(tf.keras.optimizers.Adam(learning_rate=0.0001),
            loss = loss ,
            metrics = metrics)

    model_embedding.fit(X_train,y_train,
            validation_data=(X_test,y_test),
            epochs=50,
            verbose=0,
            batch_size = 256,
            callbacks=[es,plateau])
    
    oof_embedding[ts_idx] = model_embedding.predict(X_test)
    score_embedding = log_loss(y_test, oof_embedding[ts_idx])
    print("Fold %2d, Loss for ANN model: %1.3f" % ((fold + 1), score_embedding))
    pred_embedding += model_embedding.predict(test_embeds) / N_FOLDS
    
    # Decision Forest
    model_forest.compile(tf.keras.optimizers.Adam(learning_rate=0.001),
            loss = loss,
            metrics = metrics)
    
    model_forest.fit(X_train,y_train,
                    validation_data = (X_test,y_test),
                    batch_size = 256,
                    epochs = 50,
                    verbose = 0,
                    callbacks = [es,plateau])
    
    oof_forest[ts_idx] = model_forest.predict(X_test)
    score_forest = log_loss(y_test, oof_forest[ts_idx])
    print("Fold %2d, Loss for decision forest: %1.3f" % ((fold + 1), score_forest))
    
    pred_forest += model_forest.predict(test_embeds) / N_FOLDS

<h1 id="result" style="color:black; background:white; border:0.5px dotted black;"> 
    <left>Results
        <a class="anchor-link" href="#result" target="_self">¶</a>
    </left>
</h1>

## Losses

In [None]:
one_hot_test = pd.get_dummies(test.iloc[:,-1])

score_embedding = log_loss(one_hot_test, pred_embedding)
print("ANN Model Loss : %1.3f" % score_embedding)

score_forest = log_loss(one_hot_test, pred_forest)
print("Decision Forest Loss: %1.3f" % score_forest)

## Accuracies

In [None]:
def get_accuracy(test, pred):
    return accuracy_score(np.argmax(test.values, axis=1), np.argmax(pred, axis=1))

In [None]:
acc_embedding = get_accuracy(one_hot_test, pred_embedding)
print("ANN Model Accuracy : %1.3f" % acc_embedding)

acc_forest = get_accuracy(one_hot_test, pred_forest)
print("Decision Forest Accuracy: %1.3f" % acc_forest)