In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


from src.features.sequences import SequenceHandler
from src.features.knowledge import HierarchyKnowledge
from src.models import GramEmbedding

In [2]:
sequence_df = pd.DataFrame(data={
    'sequence': [
        [ # sequence1
            ['a', 'b'], # visit1
            ['a', 'c'], # visit2
        ], 
        [ # sequence2
            ['a', 'b', 'c'],
            ['a'],
            ['d'],
        ],
        [ # sequence3
            ['a', 'b'], 
            ['a', 'd'], 
        ], 
    ]
})
handler = SequenceHandler(flatten=True)
split = handler.transform_train_test_split(sequence_df, 'sequence')
combined_x = tf.concat([split.train_x, split.test_x], axis=0)
combined_y = tf.concat([split.train_y, split.test_y], axis=0)

print(combined_x.shape) # (dataset_size, max_length, feature_size)
print(combined_y.shape) # (dataset_size, 1, feature_size)

Transforming splitted sequences to tensors: 100%|██████████| 1/1 [00:00<00:00,  4.99it/s]
Transforming splitted sequences to tensors: 100%|██████████| 2/2 [00:00<00:00, 1000.43it/s](3, 2, 4)
(3, 1, 4)



In [3]:
hierarchy_df = pd.DataFrame(
    data={
        'parent': ['a1', 'b1', 'cd1', 'cd1', 'ab2', 'ab2', 'abcd3', 'abcd3'],
        'child': ['a', 'b', 'c', 'd', 'a1', 'b1', 'cd1', 'ab2']
    }
)

knowledge = HierarchyKnowledge()
knowledge.build_hierarchy_from_df(hierarchy_df, split.vocab)

print(knowledge.extended_vocab)
print('\n'.join([str(node) for node in knowledge.nodes.values()]))

Building Hierarchy from df: 8it [00:00, 2673.66it/s]{'a': 0, 'd': 1, 'b': 2, 'c': 3, 'a1': 4, 'cd1': 5, 'ab2': 6, 'b1': 7, 'abcd3': 8}
Node for idx 0 (label: a)
<-Parent nodes: 4(a1)
->Child nodes: 
Node for idx 1 (label: d)
<-Parent nodes: 5(cd1)
->Child nodes: 
Node for idx 2 (label: b)
<-Parent nodes: 7(b1)
->Child nodes: 
Node for idx 3 (label: c)
<-Parent nodes: 5(cd1)
->Child nodes: 
Node for idx 4 (label: a1)
<-Parent nodes: 6(ab2)
->Child nodes: 0(a)
Node for idx 5 (label: cd1)
<-Parent nodes: 8(abcd3)
->Child nodes: 3(c),1(d)
Node for idx 6 (label: ab2)
<-Parent nodes: 8(abcd3)
->Child nodes: 4(a1),7(b1)
Node for idx 7 (label: b1)
<-Parent nodes: 6(ab2)
->Child nodes: 2(b)
Node for idx 8 (label: abcd3)
<-Parent nodes: 
->Child nodes: 5(cd1),6(ab2)



In [4]:
embeddings = {}
embedding_size = 8
for name, idx in knowledge.extended_vocab.items():
    embeddings[idx] = tf.Variable(
        initial_value=tf.random.normal(shape=(1,embedding_size)),
        trainable=True,
        name=name,
    )

all_embeddings = [embeddings[node.label_idx] for node in knowledge.nodes.values() if node.is_leaf()]
concatenated_embeddings = tf.concat(all_embeddings, axis=0)
concatenated_embeddings.shape # (num_leaf_nodes, embedding_size)

TensorShape([4, 8])

In [5]:
ancestor_embeddings = {}
for idx, node in knowledge.nodes.items():
    if not node.is_leaf(): continue
    ancestor_idxs = set(node.get_ancestor_label_idxs() + [idx])
    id_ancestor_embeddings = [
        embeddings[x]  if (x in ancestor_idxs) 
        else tf.constant(0, shape=(embeddings[0].shape), dtype='float32')
        for x in range(len(knowledge.extended_vocab))
    ]
    ancestor_embeddings[idx] = tf.concat(id_ancestor_embeddings, axis=0)

print(ancestor_embeddings[0].shape) # shape: (num_nodes, embedding_size)
all_ancestor_embeddings = [
    ancestor_embeddings[node.label_idx] for node in knowledge.nodes.values() if node.is_leaf()
]
concatenated_ancestor_embeddings = tf.concat([all_ancestor_embeddings], axis=1)
concatenated_ancestor_embeddings.shape # (num_leaf_nodes, num_nodes, embedding_size)

(9, 8)


TensorShape([4, 9, 8])

In [6]:
w1 = tf.keras.layers.Dense(units=16)
w2 = tf.keras.layers.Dense(units=16)
u = tf.keras.layers.Dense(1)

In [7]:
con2 = tf.expand_dims(concatenated_embeddings, 1)
score = u(tf.nn.tanh(
    w1(con2) + w2(concatenated_ancestor_embeddings)
))
print(score.shape)
attention_weights = tf.nn.softmax(score, axis=0)
print(attention_weights.shape) # (leaf_nodes, all_nodes, 1)
context_vector = attention_weights * concatenated_ancestor_embeddings
print(context_vector.shape) # (leaf_nodes, all_nodes, embedding_size)
context_vector = tf.reduce_sum(context_vector, axis=1) 
context_vector # shape: (leaf_nodes, embedding_size)

(4, 9, 1)
(4, 9, 1)
(4, 9, 8)


<tf.Tensor: shape=(4, 8), dtype=float32, numpy=
array([[ 0.13523413, -0.26464623, -0.68057   , -0.20489092, -0.13474315,
         0.08108522, -0.2112375 ,  0.10981014],
       [-0.04848791, -0.31979886,  0.27716798,  0.16883737,  0.3178332 ,
        -0.49723762,  0.4665865 ,  0.8277112 ],
       [-0.11086635, -0.27435493, -0.29858112,  1.2453539 ,  0.11035765,
        -0.1973583 ,  0.25268143, -0.83086956],
       [-0.4768219 , -0.7192046 ,  0.5580113 ,  0.27178335,  0.2834713 ,
        -1.1895331 ,  0.38526332,  0.662086  ]], dtype=float32)>

In [8]:
combined_x[0]
context_vector[0] + context_vector[2]

<tf.Tensor: shape=(8,), dtype=float32, numpy=
array([ 0.02436779, -0.53900117, -0.9791511 ,  1.040463  , -0.0243855 ,
       -0.11627308,  0.04144393, -0.72105944], dtype=float32)>

In [9]:

print(combined_x.shape)
tf.linalg.matmul(combined_x, context_vector) # shape: (dataset_size, max_length, embedding_size)


(3, 2, 4)


<tf.Tensor: shape=(3, 2, 8), dtype=float32, numpy=
array([[[ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.02436779, -0.53900117, -0.9791511 ,  1.040463  ,
         -0.0243855 , -0.11627308,  0.04144393, -0.72105944]],

       [[ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [-0.45245412, -1.2582058 , -0.42113984,  1.3122463 ,
          0.25908577, -1.3058062 ,  0.42670727, -0.05897343]],

       [[-0.45245412, -1.2582058 , -0.42113984,  1.3122463 ,
          0.25908577, -1.3058062 ,  0.42670727, -0.05897343],
        [ 0.13523413, -0.26464623, -0.68057   , -0.20489092,
         -0.13474315,  0.08108522, -0.2112375 ,  0.10981014]]],
      dtype=float32)>

In [12]:
max_length = 2
vocab_size = len(split.vocab)

input_layer = tf.keras.layers.Input(shape=(max_length, vocab_size))
embedding_layer = GramEmbedding(knowledge)
prediction_model = tf.keras.models.Sequential([
    input_layer,
    embedding_layer,
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(vocab_size, activation='relu'),
])
prediction_model.compile(
            loss=tf.keras.losses.BinaryCrossentropy(), 
            optimizer=tf.optimizers.Adam(), 
            metrics=['CategoricalAccuracy'])

In [13]:
prediction_model.fit(x=split.train_x, y=split.train_y, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x221c25dd4c0>