In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


from src.features.sequences import SequenceHandler
from src.features.knowledge import HierarchyKnowledge
from src.models import GramEmbedding

In [None]:
sequence_df = pd.DataFrame(data={
    'sequence': [
        [ # sequence1
            ['a', 'b'], # visit1
            ['a', 'c'], # visit2
        ], 
        [ # sequence2
            ['a', 'b', 'c'],
            ['a'],
            ['d'],
        ],
        [ # sequence3
            ['a', 'b'], 
            ['a', 'd'], 
        ], 
    ]
})
handler = SequenceHandler(flatten=True)
split = handler.transform_train_test_split(sequence_df, 'sequence')
combined_x = tf.concat([split.train_x, split.test_x], axis=0)
combined_y = tf.concat([split.train_y, split.test_y], axis=0)

print(combined_x.shape) # (dataset_size, max_length, feature_size)
print(combined_y.shape) # (dataset_size, 1, feature_size)

In [None]:
hierarchy_df = pd.DataFrame(
    data={
        'parent': ['a1', 'b1', 'cd1', 'cd1', 'ab2', 'ab2', 'abcd3', 'abcd3'],
        'child': ['a', 'b', 'c', 'd', 'a1', 'b1', 'cd1', 'ab2']
    }
)

knowledge = HierarchyKnowledge()
knowledge.build_hierarchy_from_df(hierarchy_df, split.vocab)

print(knowledge.extended_vocab)
print('\n'.join([str(node) for node in knowledge.nodes.values()]))

In [None]:
embeddings = {}
embedding_size = 8
for name, idx in knowledge.extended_vocab.items():
    embeddings[idx] = tf.Variable(
        initial_value=tf.random.normal(shape=(1,embedding_size)),
        trainable=True,
        name=name,
    )

all_embeddings = [embeddings[node.label_idx] for node in knowledge.nodes.values() if node.is_leaf()]
concatenated_embeddings = tf.concat(all_embeddings, axis=0)
concatenated_embeddings.shape # (num_leaf_nodes, embedding_size)

In [None]:
ancestor_embeddings = {}
for idx, node in knowledge.nodes.items():
    if not node.is_leaf(): continue
    ancestor_idxs = set(node.get_ancestor_label_idxs() + [idx])
    id_ancestor_embeddings = [
        embeddings[x]  if (x in ancestor_idxs) 
        else tf.constant(0, shape=(embeddings[0].shape), dtype='float32')
        for x in range(len(knowledge.extended_vocab))
    ]
    ancestor_embeddings[idx] = tf.concat(id_ancestor_embeddings, axis=0)

print(ancestor_embeddings[0].shape) # shape: (num_nodes, embedding_size)
all_ancestor_embeddings = [
    ancestor_embeddings[node.label_idx] for node in knowledge.nodes.values() if node.is_leaf()
]
concatenated_ancestor_embeddings = tf.concat([all_ancestor_embeddings], axis=1)
concatenated_ancestor_embeddings.shape # (num_leaf_nodes, num_nodes, embedding_size)

In [None]:
w1 = tf.keras.layers.Dense(units=16)
w2 = tf.keras.layers.Dense(units=16)
u = tf.keras.layers.Dense(1)

In [None]:
con2 = tf.expand_dims(concatenated_embeddings, 1)
score = u(tf.nn.tanh(
    w1(con2) + w2(concatenated_ancestor_embeddings)
))
print(score.shape)
attention_weights = tf.nn.softmax(score, axis=0)
print(attention_weights.shape) # (leaf_nodes, all_nodes, 1)
context_vector = attention_weights * concatenated_ancestor_embeddings
print(context_vector.shape) # (leaf_nodes, all_nodes, embedding_size)
context_vector = tf.reduce_sum(context_vector, axis=1) 
context_vector # shape: (leaf_nodes, embedding_size)

In [None]:
combined_x[0]
context_vector[0] + context_vector[2]

In [None]:

print(combined_x.shape)
tf.linalg.matmul(combined_x, context_vector) # shape: (dataset_size, max_length, embedding_size)


In [None]:
max_length = 2
vocab_size = len(split.vocab)

input_layer = tf.keras.layers.Input(shape=(max_length, vocab_size))
embedding_layer = GramEmbedding(knowledge)
prediction_model = tf.keras.models.Sequential([
    input_layer,
    embedding_layer,
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(vocab_size, activation='relu'),
])
prediction_model.compile(
            loss=tf.keras.losses.BinaryCrossentropy(), 
            optimizer=tf.optimizers.Adam(), 
            metrics=['CategoricalAccuracy'])

In [None]:
len(embedding_layer.trainable_variables)

In [None]:
(old_ctx, old_weights) = embedding_layer._calculate_attention_embeddings()
old_embs = tf.constant(embedding_layer.concatenated_embeddings.value())
old_anc_embs = tf.constant(embedding_layer.concatenated_ancestor_embeddings.value())
old_embs

In [None]:
prediction_model.fit(x=split.train_x, y=split.train_y, epochs=100)

In [None]:
(new_ctx, new_weights) = embedding_layer._calculate_attention_embeddings()
new_embs = tf.constant(embedding_layer.concatenated_embeddings.value())
new_anc_embs = tf.constant(embedding_layer.concatenated_ancestor_embeddings.value())
new_embs

In [None]:
old_embs - new_embs