In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import keras
from keras import layers
import tensorflow as tf
from keras.callbacks import ModelCheckpoint

Loading embedded data and

In [60]:
loaded_data = np.load("/content/drive/MyDrive/Colab files/Hackaton/embedded_data.npz", allow_pickle=True)
train_data = pd.DataFrame({"question": loaded_data["question"], "raw_data": loaded_data["raw_data"]})

def add_mask(df_column):
  max_length = df_column.apply(lambda x: len(x[0])).max()
  return df_column.apply(lambda x: np.concatenate((x[0], np.array((max_length - len(x[0])) * [1024 * [0]])), axis=0) if len(x[0]) < max_length else x[0])

train_data["question"] = add_mask(train_data["question"])
train_data["raw_data"] = add_mask(train_data["raw_data"])

Defining Transformer Block

In [8]:
embed_dim = 1024

class TransformerBlock(layers.Layer):
  def __init__(self, num_heads, key_dim, dropout):
    super().__init__()
    self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)
    self.feed_forward = keras.Sequential(
        [layers.Dense(2048, activation="relu"),
         layers.Dense(embed_dim)]
    )
    self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
    self.dropout1 = layers.Dropout(0.1)
    self.dropout2 = layers.Dropout(0.1)

  def call(self, inputs):
    attention_output = self.dropout1(self.attention(inputs, inputs))
    output1 = self.layernorm1(inputs + attention_output)
    feed_forward_output = self.dropout2(self.feed_forward(output1))
    return self.layernorm2(output1 + feed_forward_output)

class PositionEmbedding(layers.Layer):
  def __init__(self, input_dim):
    super().__init__()
    self.input_dim = input_dim
    self.pos_emb = layers.Embedding(input_dim=self.input_dim, output_dim=embed_dim)

  def call(self, X):
    return X + self.pos_emb(np.arange(0, self.input_dim, 1))

In [None]:
input_layer = layers.Input(shape=[None, 1024])
embedding_layer = PositionEmbedding()(input_layer)
x = TransformerBlock(8, 1024, 0.4)(embedding_layer)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dense(512, activation="relu")(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(32, activation="softmax")(x)

model = keras.Model(input_layer, outputs)
model.compile(loss="SparseCategoricalCrossentropy", optimizer=keras.optimizers.Adam(), metrics=["accuracy"])
keras.utils.plot_model(model, show_shapes=True)