In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


from src.features.sequences import SequenceHandler
from src.features.knowledge import HierarchyKnowledge
from src.models import GramEmbedding

In [2]:
sequence_df = pd.DataFrame(data={
    'sequence': [
        [ # sequence1
            ['a', 'b'], # visit1
            ['a', 'c'], # visit2
        ], 
        [ # sequence2
            ['a', 'b', 'c'],
            ['a'],
            ['d'],
        ],
        [ # sequence3
            ['a', 'b'], 
            ['a', 'd'], 
        ], 
    ]
})
handler = SequenceHandler(flatten=True)
split = handler.transform_train_test_split(sequence_df, 'sequence')
combined_x = tf.concat([split.train_x, split.test_x], axis=0)
combined_y = tf.concat([split.train_y, split.test_y], axis=0)

print(combined_x.shape) # (dataset_size, max_length, feature_size)
print(combined_y.shape) # (dataset_size, 1, feature_size)

Transforming splitted sequences to tensors: 100%|██████████| 3/3 [00:00<00:00, 15.51it/s]
Transforming splitted sequences to tensors: 100%|██████████| 1/1 [00:00<?, ?it/s](4, 2, 4)
(4, 1, 4)



In [3]:
hierarchy_df = pd.DataFrame(
    data={
        'parent': ['a1', 'b1', 'cd1', 'cd1', 'ab2', 'ab2', 'abcd3', 'abcd3'],
        'child': ['a', 'b', 'c', 'd', 'a1', 'b1', 'cd1', 'ab2']
    }
)

knowledge = HierarchyKnowledge()
knowledge.build_hierarchy_from_df(hierarchy_df, split.vocab)

print(knowledge.extended_vocab)
print('\n'.join([str(node) for node in knowledge.nodes.values()]))

Building Hierarchy from df: 8it [00:00, 8013.96it/s]{'a': 0, 'd': 1, 'b': 2, 'c': 3, 'ab2': 4, 'b1': 5, 'a1': 6, 'cd1': 7, 'abcd3': 8}
Node for idx 0 (label: a)
<-Parent nodes: 6(a1)
->Child nodes: 
Node for idx 1 (label: d)
<-Parent nodes: 7(cd1)
->Child nodes: 
Node for idx 2 (label: b)
<-Parent nodes: 5(b1)
->Child nodes: 
Node for idx 3 (label: c)
<-Parent nodes: 7(cd1)
->Child nodes: 
Node for idx 4 (label: ab2)
<-Parent nodes: 8(abcd3)
->Child nodes: 6(a1),5(b1)
Node for idx 5 (label: b1)
<-Parent nodes: 4(ab2)
->Child nodes: 2(b)
Node for idx 6 (label: a1)
<-Parent nodes: 4(ab2)
->Child nodes: 0(a)
Node for idx 7 (label: cd1)
<-Parent nodes: 8(abcd3)
->Child nodes: 3(c),1(d)
Node for idx 8 (label: abcd3)
<-Parent nodes: 
->Child nodes: 7(cd1),4(ab2)



In [4]:
embeddings = {}
embedding_size = 8
for name, idx in knowledge.extended_vocab.items():
    embeddings[idx] = tf.Variable(
        initial_value=tf.random.normal(shape=(1,embedding_size)),
        trainable=True,
        name=name,
    )

all_embeddings = [embeddings[node.label_idx] for node in knowledge.nodes.values() if node.is_leaf()]
concatenated_embeddings = tf.concat(all_embeddings, axis=0)
concatenated_embeddings.shape # (num_leaf_nodes, embedding_size)

TensorShape([4, 8])

In [5]:
ancestor_embeddings = {}
for idx, node in knowledge.nodes.items():
    if not node.is_leaf(): continue
    ancestor_idxs = set(node.get_ancestor_label_idxs() + [idx])
    id_ancestor_embeddings = [
        embeddings[x]  if (x in ancestor_idxs) 
        else tf.constant(0, shape=(embeddings[0].shape), dtype='float32')
        for x in range(len(knowledge.extended_vocab))
    ]
    ancestor_embeddings[idx] = tf.concat(id_ancestor_embeddings, axis=0)

print(ancestor_embeddings[0].shape) # shape: (num_nodes, embedding_size)
all_ancestor_embeddings = [
    ancestor_embeddings[node.label_idx] for node in knowledge.nodes.values() if node.is_leaf()
]
concatenated_ancestor_embeddings = tf.concat([all_ancestor_embeddings], axis=1)
concatenated_ancestor_embeddings.shape # (num_leaf_nodes, num_nodes, embedding_size)

(9, 8)


TensorShape([4, 9, 8])

In [6]:
w1 = tf.keras.layers.Dense(units=16)
w2 = tf.keras.layers.Dense(units=16)
u = tf.keras.layers.Dense(1)

In [7]:
con2 = tf.expand_dims(concatenated_embeddings, 1)
score = u(tf.nn.tanh(
    w1(con2) + w2(concatenated_ancestor_embeddings)
))
print(score.shape)
attention_weights = tf.nn.softmax(score, axis=0)
print(attention_weights.shape) # (leaf_nodes, all_nodes, 1)
context_vector = attention_weights * concatenated_ancestor_embeddings
print(context_vector.shape) # (leaf_nodes, all_nodes, embedding_size)
context_vector = tf.reduce_sum(context_vector, axis=1) 
context_vector # shape: (leaf_nodes, embedding_size)

(4, 9, 1)
(4, 9, 1)
(4, 9, 8)


<tf.Tensor: shape=(4, 8), dtype=float32, numpy=
array([[ 0.4391212 , -0.69954044, -0.69012654,  0.166058  ,  0.5658248 ,
         0.1925041 ,  0.28310868,  0.47520086],
       [ 0.15381241, -0.22646004,  0.2454637 ,  0.12253262, -0.3956595 ,
         0.08820544, -0.09927468,  0.55573636],
       [ 0.3088606 , -0.7868706 , -0.0760816 ,  0.47396648,  0.77842563,
         0.29273698,  0.7843944 ,  0.32635233],
       [-0.30848473, -0.15757278,  0.33354178, -0.14685214, -0.33262885,
         0.20864192, -0.01028858,  0.7099187 ]], dtype=float32)>

In [8]:
combined_x[0]
context_vector[0] + context_vector[2]

<tf.Tensor: shape=(8,), dtype=float32, numpy=
array([ 0.7479818 , -1.4864111 , -0.7662082 ,  0.6400245 ,  1.3442504 ,
        0.48524106,  1.0675031 ,  0.8015532 ], dtype=float32)>

In [9]:

print(combined_x.shape)
tf.linalg.matmul(combined_x, context_vector) # shape: (dataset_size, max_length, embedding_size)


(4, 2, 4)


<tf.Tensor: shape=(4, 2, 8), dtype=float32, numpy=
array([[[ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.43949705, -1.6439838 , -0.4326664 ,  0.49317235,
          1.0116216 ,  0.69388294,  1.0572145 ,  1.5114719 ]],

       [[ 0.43949705, -1.6439838 , -0.4326664 ,  0.49317235,
          1.0116216 ,  0.69388294,  1.0572145 ,  1.5114719 ],
        [ 0.4391212 , -0.69954044, -0.69012654,  0.166058  ,
          0.5658248 ,  0.1925041 ,  0.28310868,  0.47520086]],

       [[ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.7479818 , -1.4864111 , -0.7662082 ,  0.6400245 ,
          1.3442504 ,  0.48524106,  1.0675031 ,  0.8015532 ]],

       [[ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.7479818 , -1.4864111 , -0.7662082 ,  0.6400245 ,
          1.3442504 ,

In [10]:
max_length = 2
vocab_size = len(split.vocab)

input_layer = tf.keras.layers.Input(shape=(max_length, vocab_size))
embedding_layer = GramEmbedding(knowledge)
prediction_model = tf.keras.models.Sequential([
    input_layer,
    embedding_layer,
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(vocab_size, activation='relu'),
])
prediction_model.compile(
            loss=tf.keras.losses.BinaryCrossentropy(), 
            optimizer=tf.optimizers.Adam(), 
            metrics=['CategoricalAccuracy'])

In [11]:
len(embedding_layer.trainable_variables)

8

In [15]:
(old_ctx, old_weights) = embedding_layer._calculate_attention_embeddings()
old_embs = tf.constant(embedding_layer.concatenated_embeddings.value())
old_anc_embs = tf.constant(embedding_layer.concatenated_ancestor_embeddings.value())
old_embs

<tf.Tensor: shape=(4, 1, 16), dtype=float32, numpy=
array([[[ 0.45860285, -0.8094061 , -0.8881423 ,  0.6915797 ,
          0.7778867 , -0.16019487, -0.42384362,  0.9732113 ,
         -1.0849925 ,  0.12832978, -0.16205223, -0.05826806,
         -0.6269017 ,  0.6797151 ,  0.4429526 ,  0.88718   ]],

       [[ 1.742705  , -0.33816293, -0.32940295, -0.20686728,
         -0.33488658,  1.2455566 ,  0.3894918 , -0.06009046,
         -0.5491302 , -0.2999603 ,  1.2351984 , -0.0372693 ,
         -0.26207778, -0.768544  ,  0.18959118, -0.55703205]],

       [[ 0.23887655, -1.1413323 ,  0.34748358,  0.83838624,
          0.93078667, -0.51384854, -1.3149151 ,  0.7438372 ,
         -0.73259443,  0.32347718, -0.5855996 ,  0.34120414,
          1.5915494 , -0.46074736, -2.0714567 , -0.37715694]],

       [[ 0.38439468, -0.12188402,  0.43615142, -2.144266  ,
         -0.26360708, -0.0507897 , -1.078655  , -0.08635159,
         -1.0205619 , -0.20950972, -1.3936907 ,  0.69263345,
         -1.3156145 ,  0

In [16]:
prediction_model.fit(x=split.train_x, y=split.train_y, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x1d509057c10>

In [18]:
(new_ctx, new_weights) = embedding_layer._calculate_attention_embeddings()
new_embs = tf.constant(embedding_layer.concatenated_embeddings.value())
new_anc_embs = tf.constant(embedding_layer.concatenated_ancestor_embeddings.value())
new_embs

<tf.Tensor: shape=(4, 1, 16), dtype=float32, numpy=
array([[[ 0.467861  , -0.81769806, -0.87941426,  0.6844128 ,
          0.77731776, -0.15368856, -0.43322623,  0.96499074,
         -1.0764097 ,  0.1366386 , -0.152081  , -0.05109737,
         -0.6203225 ,  0.6825094 ,  0.4504309 ,  0.8964409 ]],

       [[ 1.73377   , -0.32968634, -0.3297837 , -0.19798316,
         -0.3440358 ,  1.2542542 ,  0.3801561 , -0.06910255,
         -0.540053  , -0.30874735,  1.2443237 , -0.04612953,
         -0.27087656, -0.77742785,  0.19920804, -0.5474403 ]],

       [[ 0.23195578, -1.1435264 ,  0.33953387,  0.84111685,
          0.93766993, -0.506261  , -1.3061062 ,  0.75275457,
         -0.7328707 ,  0.3138227 , -0.5791674 ,  0.33276343,
          1.5857468 , -0.46848828, -2.06588   , -0.38590994]],

       [[ 0.39466083, -0.13193208,  0.42709947, -2.1541717 ,
         -0.2734999 , -0.06069435, -1.0889127 , -0.07704251,
         -1.0301846 , -0.19982052, -1.4033588 ,  0.7023139 ,
         -1.305849  ,  0

In [22]:
old_embs - new_embs

<tf.Tensor: shape=(4, 1, 16), dtype=float32, numpy=
array([[[-0.00925815,  0.00829196, -0.00872803,  0.00716692,
          0.00056893, -0.00650631,  0.00938261,  0.00822055,
         -0.00858283, -0.00830881, -0.00997123, -0.00717069,
         -0.00657916, -0.00279433, -0.0074783 , -0.00926095]],

       [[ 0.00893497, -0.00847659,  0.00038075, -0.00888412,
          0.00914922, -0.00869763,  0.0093357 ,  0.00901208,
         -0.00907719,  0.00878707, -0.00912535,  0.00886023,
          0.00879878,  0.00888383, -0.00961685, -0.00959176]],

       [[ 0.00692077,  0.00219417,  0.00794971, -0.00273061,
         -0.00688326, -0.00758755, -0.00880885, -0.00891739,
          0.00027627,  0.00965449, -0.00643218,  0.0084407 ,
          0.00580263,  0.00774091, -0.00557661,  0.008753  ]],

       [[-0.01026616,  0.01004806,  0.00905195,  0.00990582,
          0.00989282,  0.00990465,  0.01025772, -0.00930908,
          0.00962269, -0.0096892 ,  0.00966811, -0.00968045,
         -0.00976551, -0