In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import os
import json
import datetime
import dill
import tqdm
from queue import PriorityQueue

In [2]:
class hier_softmax:
    def __init__(self, tree, contex_size, model):
        self._tree_tools = TreeTools()
        self.str2weight = {}
        #create a weight matrix and bias vector for each node in the tree
        for i, subtree in enumerate(self._tree_tools._get_subtrees(tree)):
            self.str2weight["softmax_node_"+str(i)+"_w"] = model.add_parameters((len(subtree), contex_size))
            self.str2weight["softmax_node_" + str(i) + "_b"] = model.add_parameters(len(subtree))
        
        #create a dictionary from each value to its path
        value_to_path_and_nodes_dict = {}
        for path, value in self._tree_tools._get_leaves_paths(tree):
            nodes = self._tree_tools._get_nodes(tree, path)
            value_to_path_and_nodes_dict[data.char2int[value]] = path, nodes
        self.value_to_path_and_nodes_dict = value_to_path_and_nodes_dict
        self.model = model
        self.tree = tree
    
    #get the loss on a given value (for training)
    def get_loss(self, context, value):
        loss = []
        path, nodes = self.value_to_path_and_nodes_dict[value]
        for p, n in zip(path, nodes):
            w = dy.parameter(self.str2weight["softmax_node_"+str(n)+"_w"])
            b = dy.parameter(self.str2weight["softmax_node_" + str(n) + "_b"])
            probs = tf.nn.softmax(w*context+b)
            #loss.append(-tf.math.log(dy.pick(probs, p)))
            print(probs)
            print(p)
        #return dy.esum(loss)

    #get the most likely
    def generate(self, context):
        best_value = None
        best_loss = float(100000)
        for value in self.value_to_path_and_nodes_dict:
            loss = self.get_loss(context, value)
            if loss < best_loss:
                best_loss = loss
                best_value = value
        return best_value

## NOTE
Intermediate Nodes are indexed by **Preorder Traversal**

In [3]:
vocab_size = 16293
num_of_nodes = vocab_size - 1   # num of intermediate nodes

In [4]:
with open("outputs/tree_mapping.json", "r") as j:
    tree_mapping = json.load(j)     # category : [path, nodes in path]
tree_mapping = {int(i):j for i, j in tree_mapping.items()}      # As JSON converts key values to string
dict(list(tree_mapping.items())[:5]) 

{2192: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]],
 2396: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]],
 2647: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14]],
 1579: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14]],
 2247: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16]]}

In [19]:
train_set = np.genfromtxt("data/SEG_Wavenet_train_set.csv", delimiter="\n", dtype=np.float32)
val_set = np.genfromtxt("data/SEG_Wavenet_val_set.csv", delimiter="\n", dtype=np.float32)
dataset = np.r_[train_set, val_set][:5000]

In [20]:
dataset

array([0., 0., 0., ..., 1., 1., 1.], dtype=float32)

In [21]:
x_train = tf.data.Dataset.from_tensor_slices(dataset)
x_train = x_train.batch(1)

In [24]:
y_train = tf.data.Dataset.from_tensor_slices(dataset.astype(np.int32))
y_train = y_train.batch(1)

In [25]:
train_data = tf.data.Dataset.zip((x_train, y_train))

    @tf.function
    def call(self, inputs, training=False):
        self.cache = tf.zeros([num_nodes])

        total_loss = tf.TensorArray(dtype=tf.float32, size=vocab_size)
        for i in tf.range(self.vocab_size):
            loss = self.get_loss(inputs, i)
            total_loss = total_loss.write(i, tf.reduce_prod(loss, axis=-1))
        return total_loss.stack()
    
    @tf.function
    def get_loss(self, x, category):
        path = tf.gather(self.path_matrix, category)
        nodes = tf.gather(self.nodes_matrix, category)

        loss = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
        for i in tf.range(self.num_nodes):
            n = tf.gather(nodes, i)
            if n == -1:
                break
            w = tf.gather(self.softmax_weights, n)
            d = self.decision_function.lookup(tf.gather(path, i))
            sigma = tf.math.sigmoid(tf.tensordot(tf.matmul(x, w), d, axes=0))
            loss = loss.write(i, sigma)
        return tf.reduce_prod(loss.stack(), axis=0)

In [26]:
class HierachicalSoftmax(keras.Model):
    def __init__(self, hidden_shape, tree_mapping, vocab_size):
        super().__init__()

        self.hidden_shape = hidden_shape        # == input_shape[-1]
        self.vocab_size = vocab_size
        self.num_nodes = self.vocab_size - 1

        path_matrix = np.full([self.vocab_size, self.num_nodes], 0, dtype=np.float32)
        nodes_matrix = np.full([self.vocab_size, self.num_nodes], -1, dtype=np.int32)

        decision_function = {1:1, 0:-1}
        for category, (path, nodes) in tree_mapping.items():
            nodes_matrix[category][:len(path)] = nodes
            for i, p in enumerate(path):
                path_matrix[category][i] = decision_function[p]

        self.path_matrix = tf.constant(path_matrix)
        self.nodes_matrix = tf.constant(nodes_matrix)

    #def build(self, input_shape):
        self.initializer = keras.initializers.GlorotNormal()
        self.softmax_weights = tf.Variable(self.initializer(shape=(self.num_nodes, self.hidden_shape)))
    
    @tf.function
    def call(self, inputs, training=False):
        total_loss = tf.TensorArray(dtype=tf.float32, size=vocab_size)
        cache = tf.zeros([self.num_nodes])

        for category in tf.range(self.vocab_size):
            path = tf.gather(self.path_matrix, category)
            nodes = tf.gather(self.nodes_matrix, category)

            loss = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
            for i in tf.range(self.num_nodes):
                n = tf.gather(nodes, i)
                if n == -1:
                    break

                c = tf.gather(cache, n)
                if c == 0:
                    w = tf.gather(self.softmax_weights, n)
                    d = tf.gather(path, i)
                    sigma = tf.squeeze(tf.math.sigmoid(tf.tensordot(tf.matmul(inputs, w), d, axes=0)), axis=0)
                    cache = tf.tensor_scatter_nd_update(cache, [[n]], sigma)
                else:
                    sigma = tf.expand_dims(tf.gather(cache, n), axis=0)
                loss = loss.write(i, sigma)
            t = tf.reduce_prod(loss.stack(), axis=0)
            tf.print(t)
            total_loss = total_loss.write(i, t)
        return total_loss.stack()

    @tf.function
    def get_loss(self, x, category):
        path = tf.gather(self.path_matrix, category)
        nodes = tf.gather(self.nodes_matrix, category)

        loss = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
        for i in tf.range(self.num_nodes):
            n = tf.gather(nodes, i)
            if n == -1:
                break

            w = tf.gather(self.softmax_weights, n)
            d = tf.gather(path, i)
            sigma = tf.math.sigmoid(tf.tensordot(tf.matmul(x, w), d, axes=0))
            loss = loss.write(i, sigma)
        return tf.reduce_prod(loss.stack(), axis=0)

    def train_step(self, data):
        x, y = data
        y_true = tf.cast(y, tf.float32)

        with tf.GradientTape() as tape:
            y_pred = self.get_loss(x, y)
            loss = self.compiled_loss(y_true, y_pred)
            
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y, y_pred)

        return {m.name: m.result() for m in self.metrics}

In [27]:
hsm = HierachicalSoftmax(1, tree_mapping, vocab_size)

import time
start = time.time()
hsm(x)
time.time() - start()

In [32]:
hsm(tf.constant(1, dtype=tf.float32, shape=[1, 1]))

ValueError: in user code:

    <ipython-input-26-437bb1fc5528>:44 call  *
        sigma = tf.squeeze(tf.math.sigmoid(tf.tensordot(tf.matmul(inputs, w), d, axes=0)), axis=0)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:3254 matmul
        a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:5642 mat_mul
        name=name)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py:593 _create_op_internal
        compute_device)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:3485 _create_op_internal
        op_def=op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1975 __init__
        control_input_ops, op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Shape must be rank 2 but is rank 1 for '{{node while/while/cond_1/cond/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](while/while/cond_1/cond/MatMul/inputs, while/while/cond_1/cond/Identity)' with input shapes: [1,1], [1].


In [12]:
for x in x_train.take(1):
    print(hsm.get_loss(x, tf.constant(1, dtype=tf.int32, shape=[1])))

ValueError: in user code:

    <ipython-input-9-437bb1fc5528>:67 get_loss  *
        sigma = tf.math.sigmoid(tf.tensordot(tf.matmul(x, w), d, axes=0))
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:3254 matmul
        a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:5642 mat_mul
        name=name)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py:593 _create_op_internal
        compute_device)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:3485 _create_op_internal
        op_def=op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1975 __init__
        control_input_ops, op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Shape must be rank 2 but is rank 1 for '{{node while/cond_1/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](while/cond_1/MatMul/x, while/cond_1/Identity)' with input shapes: [1], [16292,1].


In [28]:
hsm.compile(optimizer=keras.optimizers.Nadam(), loss="categorical_crossentropy")

In [29]:
hsm.fit(train_data, epochs=1)

ValueError: in user code:

    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    <ipython-input-26-437bb1fc5528>:67 get_loss  *
        sigma = tf.math.sigmoid(tf.tensordot(tf.matmul(x, w), d, axes=0))
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:3254 matmul
        a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:5642 mat_mul
        name=name)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py:593 _create_op_internal
        compute_device)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:3485 _create_op_internal
        op_def=op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1975 __init__
        control_input_ops, op_def)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Shape must be rank 2 but is rank 1 for '{{node while/cond_1/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](while/cond_1/MatMul/x, while/cond_1/Identity)' with input shapes: [?], [16292,1].
