## Import Libraries

In [None]:
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Bidirectional, TimeDistributed   #layers required for network
from tensorflow.keras.layers import Layer, Conv1D, Softmax, Concatenate ,Dropout, MaxPool1D        #layers required for network
from tensorflow.keras.backend import expand_dims, tile, concatenate, shape, batch_dot, squeeze     #functions required for network
import tensorflow.keras.backend as K                                                               #to build metric
from tensorflow.keras.models import Model                                                          #to build model
from tensorflow.keras.callbacks import TensorBoard                                                 #tensorboard
import tensorflow as tf                                                                            #other functions
from tqdm import tqdm                                                                              #track progress
import numpy as np                                                                                 #for numpy operations
import pickle                                                                                      #loading tokenizers

Using TensorFlow backend.
  import pandas.util.testing as tm


In [None]:
print(tf.executing_eagerly())
print(tf.__version__)

True
2.2.0


In [None]:
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8522293018181653761
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 2748057066765156713
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 11171406046308396574
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15701401920
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6799705826100236329
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"
]


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#referred from preproceesing
question_max = 32
context_max = 340
char_max = 15

## Loading all the Required Variables from disk

In [None]:
#train input arrays
train_context_word_padded = load("drive/My Drive/Colab Notebooks/dataset/train_arrays/train_context_word_padded.npy")
train_question_word_padded = load("drive/My Drive/Colab Notebooks/dataset/train_arrays/train_question_word_padded.npy")
train_context_char_padded = load("drive/My Drive/Colab Notebooks/dataset/train_arrays/train_context_char_padded.npy")
train_question_char_padded = load("drive/My Drive/Colab Notebooks/dataset/train_arrays/train_question_char_padded.npy")

In [None]:
#test input arrays
test_context_word_padded = load("drive/My Drive/Colab Notebooks/dataset/test_arrays/test_context_word_padded.npy")
test_question_word_padded = load("drive/My Drive/Colab Notebooks/dataset/test_arrays/test_question_word_padded.npy")
test_context_char_padded = load("drive/My Drive/Colab Notebooks/dataset/test_arrays/test_context_char_padded.npy")
test_question_char_padded = load("drive/My Drive/Colab Notebooks/dataset/test_arrays/test_question_char_padded.npy")

In [None]:
#output arrays
y_start_train = load("drive/My Drive/Colab Notebooks/dataset/train_arrays/y_start_train.npy")
y_end_train = load("drive/My Drive/Colab Notebooks/dataset/train_arrays/y_end_train.npy")
y_start_test = load("drive/My Drive/Colab Notebooks/dataset/test_arrays/y_start_test.npy")
y_end_test = load("drive/My Drive/Colab Notebooks/dataset/test_arrays/y_end_test.npy")

In [None]:
#word tokenizer
with open('drive/My Drive/Colab Notebooks/dataset/word_tokenizer.pickle', 'rb') as handle:
 word_tokenizer = pickle.load(handle)

In [None]:
#character tokenizer
with open('drive/My Drive/Colab Notebooks/dataset/char_tokenizer.pickle', 'rb') as handle:
 char_tokenizer = pickle.load(handle)

## Defining Embedding Matrix For Word

In [None]:
enc_data = {}
with open("/content/drive/My Drive/Colab Notebooks/glove.6B.100d.txt",'rb') as f:
    for line in tqdm(f):
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], "float32")
        enc_data[word.decode('utf-8')] = vector
    glove_words = set(enc_data.keys())

400000it [00:16, 24229.76it/s]


In [None]:
count = 0
embedding_matrix_word = np.zeros((len(word_tokenizer)+1, 100))
for word, i in word_tokenizer.items():
    embedding_vector = enc_data.get(word)
    if embedding_vector is not None:
        count += 1
        embedding_matrix_word[i] = embedding_vector

In [None]:
print("Percentage of words covered by Glove vectors:", count/len(word_tokenizer)*100)

Percentage of words covered by Glove vectors: 40.99284551060365


## Defining Embedding Matrix For Characters

In [None]:
embedding_matrix_char = []
embedding_matrix_char.append(np.zeros(len(char_tokenizer.word_index)))

for char, i in char_tokenizer.word_index.items():
    onehot = np.zeros(len(char_tokenizer.word_index))
    onehot[i-1] = 1
    embedding_matrix_char.append(onehot)

embedding_matrix_char = np.array(embedding_matrix_char)

## Defining the Layers of the Model

### Word Embedding Layer

In [None]:
class word_embedding_layer(Layer):
    
    def __init__(self, input_dim, output_dim, input_len):
        
        super(word_embedding_layer, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.input_len = input_len
        self.word_embed = Embedding(self.input_dim, self.output_dim, weights = [embedding_matrix_word], 
                               input_length = input_len, trainable = False, name = self._name+"_layer")

    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        question, context = inputs
        return self.word_embed(question), self.word_embed(context) 
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'input_dim': self.input_dim,
            'output_dim': self.output_dim,
            'input_len': self.input_len
            
        })
        
        return config

### Character Embedding Layer

In [None]:
class char_embedding_layer(Layer):
    
    def __init__(self, input_dim, output_dim, input_len):
        
        super(char_embedding_layer, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.input_len = input_len
        self.char_embed = Embedding(self.input_dim, self.output_dim, weights = [embedding_matrix_char], 
                               input_length = input_len, trainable = False)
        self.timed = TimeDistributed(self.char_embed)
        

    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        question, context = inputs
        return self.timed(question), self.timed(context)
            
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'input_dim': self.input_dim,
            'output_dim': self.output_dim,
            'input_len': self.input_len
            
        })
        
        return config

In [None]:
class char_cnn_layer(Layer):
    
    def __init__(self, n_filters, filter_width):
        
        super(char_cnn_layer, self).__init__()
        self.n_filters = n_filters
        self.filter_width = filter_width
        self.conv = Conv1D(self.n_filters, self.filter_width)
        self.timed = TimeDistributed(self.conv)
          
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        question, context = inputs
        return tf.math.reduce_max(self.timed(question), 2), tf.math.reduce_max(self.timed(context), 2)
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_filters': self.n_filters,
            'filter_width': self.filter_width
            
        })
        
        return config

### Highway Layer

In [None]:
class highway_input_layer(Layer):
    
    def __init__(self):
        
        super(highway_input_layer, self).__init__()
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):        
        q_w, c_w, q_c, c_c = inputs
        question = concatenate([q_w, q_c], axis=2)  
        context = concatenate([c_w, c_c], axis=2)  
        
        return context, question

In [None]:
class highway_layer(Layer):
    
    def __init__(self, name):
        
        super(highway_layer, self).__init__()
        self._name = name
        self.normal = Dense(200, activation = "relu")
        self.gate = Dense(200, activation = "sigmoid")
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):        
        
        n = self.normal(inputs)
        g = self.gate(inputs)
        x = g*n + (1-g)*inputs
        
        return x

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'name': self._name
            
        })
        
        return config

### Contextual Layer

In [None]:
class contextual_layer(Layer):
    
    def __init__(self, output_dim, name):
        
        super(contextual_layer, self).__init__()
        self.output_dim = output_dim
        self._name = name       
        self.contextual = Bidirectional(GRU(self.output_dim, return_sequences=True, dropout=0.2, kernel_initializer=tf.keras.initializers.glorot_uniform(seed=67)))

    def build(self, input_shape):
        self.built = True 

    def call(self, inputs):
        return self.contextual(inputs)
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'output_dim': self.output_dim,
            'name': self._name
        })
        
        return config

### Attention Layer

In [None]:
class attention_input_layer(Layer):
    
    def __init__(self):
        
        super(attention_input_layer, self).__init__()
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        H,U = inputs
        
        expand_h = concatenate([[1,1],[shape(U)[1]],[1]],0)
        expand_u = concatenate([[1],[shape(H)[1]],[1,1]],0)
    
        h = tile(expand_dims(H, axis=2), expand_h)
        u = tile(expand_dims(U, axis=1), expand_u)
        h_u = h * u
        
        return concatenate([h,u,h_u], axis=-1)  

In [None]:
class attention_layer(Layer):
    
    def __init__(self):
        
        super(attention_layer, self).__init__()
        self.dense = Dense(1, activation = "linear", kernel_initializer=tf.keras.initializers.glorot_uniform(seed=54))
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        sim_matrix = self.dense(inputs)
        sim_matrix = squeeze(sim_matrix, 3)
        
        return sim_matrix

In [None]:
class c2q_q2c_layer(Layer):
    
    def __init__(self):
        
        super(c2q_q2c_layer, self).__init__()
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        sim_matrix, H, U = inputs
    
        c2q = batch_dot(tf.nn.softmax(sim_matrix, -1), U)
        
        q2c = batch_dot(tf.nn.softmax(tf.math.reduce_max(sim_matrix, 2), -1), H)
        q2c = tile(expand_dims(q2c, axis=1),[1,shape(H)[1],1])
        
        return c2q, q2c

### Modelling Layer

In [None]:
class modelling_input_layer(Layer):
    
    def __init__(self):
        
        super(modelling_input_layer, self).__init__()
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        H, c2q, q2c = inputs
        G = concatenate([H, c2q, (H*c2q), (H*q2c)], axis=2)
        
        return G

In [None]:
class modelling_layer(Layer):
    
    def __init__(self, output_dim):
        
        super(modelling_layer, self).__init__()
        self.output_dim = output_dim
        self.modelling1 = Bidirectional(GRU(self.output_dim, return_sequences=True, dropout=0.2))
        self.modelling2 = Bidirectional(GRU(self.output_dim, return_sequences=True, dropout=0.2))
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        return self.modelling2(self.modelling1(inputs))
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'output_dim': self.output_dim,
        })
        
        return config

### Output Layer

In [None]:
class input_to_start(Layer):
    
    def __init__(self):
        
        super(input_to_start, self).__init__()
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        G, M = inputs
        GM = concatenate([G, M], axis=2)
        
        return GM

In [None]:
class output_start(Layer):
    
    def __init__(self):
        
        super(output_start, self).__init__()
        self.dense = Dense(1, activation = "linear", kernel_initializer=tf.keras.initializers.glorot_uniform(seed=35))
        self.dropout = Dropout(0.2)
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        GM = inputs
        start = self.dense(GM)
        start = self.dropout(start)
        p1 = tf.nn.softmax(squeeze(start, axis=2))
        
        return p1

In [None]:
class input_to_end(Layer):
    
    def __init__(self, output_dim):
        
        super(input_to_end, self).__init__()
        self.output_dim = output_dim
        self.end = Bidirectional(GRU(self.output_dim, return_sequences=True, dropout=0.2, kernel_initializer=tf.keras.initializers.glorot_uniform(seed=5)))
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        G, M = inputs
        M2 = self.end(M)
        GM2 = concatenate([G, M2], axis=2)
        return GM2
    
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'output_dim': self.output_dim,
        })
        
        return config

In [None]:
class output_end(Layer):
    
    def __init__(self):
        
        super(output_end, self).__init__()
        self.dense = Dense(1, activation = "linear", kernel_initializer=tf.keras.initializers.glorot_uniform(seed=85))
        self.dropout = Dropout(0.2)
        
    def build(self, input_shape):
        self.built = True

    def call(self, inputs):
        
        GM2 = inputs
        end = self.dense(GM2)
        end = self.dropout(end)
        p2 = tf.nn.softmax(squeeze(end, axis=2))
        
        return p2

## Define Model

In [None]:
#https://medium.com/@aakashgoel12/how-to-add-user-defined-function-get-f1-score-in-keras-metrics-3013f979ce0d
def f1_score(y_true, y_pred):    #taken from old keras source code
    
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    
    return f1_val

In [None]:
def bidaf_model(question_timesteps, context_timesteps, hidden_size, char_vocab, n_filters, filter_width):
    """Function that build the BiDAF model using the custom class layers"""
    
    #inputs
    question_words = Input(shape=(question_timesteps,), name = 'question_word_tokens')
    context_words = Input(shape=(context_timesteps,), name = 'context_word_tokens')    
    question_chars = Input(shape=(question_timesteps,char_max,), name = 'question_char_tokens')
    context_chars = Input(shape=(context_timesteps,char_max,), name = 'context_char_tokens')

    #word embedding layer
    question_word_embedded, context_word_embedded = word_embedding_layer(len(word_tokenizer)+1, hidden_size, question_words.shape[-1])([question_words, context_words])
            
    #character embedding layer
    question_char_embedded, context_char_embedded = char_embedding_layer(len(char_tokenizer.word_index)+1, char_vocab, question_chars.shape[-1])([question_chars, context_chars])
    
    #character CNN
    question_char_embedded, context_char_embedded  = char_cnn_layer(n_filters, filter_width)([question_char_embedded, context_char_embedded])
    
    #highway layer
    context, question = highway_input_layer()([question_word_embedded, context_word_embedded, question_char_embedded, context_char_embedded])
    question_blendrep = highway_layer("question_highway")(question)
    context_blendrep = highway_layer("context_highway")(context)

    #contextual layer
    question_contextual = contextual_layer(hidden_size, "question_contextual")(question_blendrep)
    context_contextual = contextual_layer(hidden_size, "context_contextual")(context_blendrep)

    #attention layer
    attention_in = attention_input_layer()([context_contextual, question_contextual])
    attention_out = attention_layer()(attention_in)
    c2q, q2c = c2q_q2c_layer()([attention_out, context_contextual, question_contextual])
    
    #modelling layer
    G = modelling_input_layer()([context_contextual, c2q, q2c])
    M = modelling_layer(hidden_size)(G)
    
    #output layers
    GM = input_to_start()([G,M])
    start = output_start()(GM)
    GM2 = input_to_end(hidden_size)([G,M])
    end = output_end()(GM2)

    model = Model(inputs=[question_words,context_words,question_chars,context_chars], outputs=[start, end], name="bidaf")
        
    return model

### Build Model

In [None]:
#defining various parameters of the model and building it

#input timesteps
question_timesteps = question_max
context_timesteps = context_max

#output dimensions of word and character embedding
hidden_size = 100
char_vocab = 1218

#character CNN filters and width
n_filters = 100
filter_width = 3

tf.keras.backend.clear_session()
model = bidaf_model(question_timesteps, context_timesteps, hidden_size, char_vocab, n_filters, filter_width)

print(model.summary())

Model: "bidaf"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
question_char_tokens (InputLaye [(None, 32, 15)]     0                                            
__________________________________________________________________________________________________
context_char_tokens (InputLayer [(None, 340, 15)]    0                                            
__________________________________________________________________________________________________
question_word_tokens (InputLaye [(None, 32)]         0                                            
__________________________________________________________________________________________________
context_word_tokens (InputLayer [(None, 340)]        0                                            
______________________________________________________________________________________________

In [None]:
#defining loss function and optimizer
loss_function = tf.keras.losses.CategoricalCrossentropy(reduction='auto')
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.0005)

## Data Pipeline Execution

### Prepare Data Pipeline

In [None]:
##creating input dataset(train and test) using tf.data
train_inputs = tf.data.Dataset.from_tensor_slices((train_question_word_padded, train_context_word_padded, train_question_char_padded, train_context_char_padded))
test_inputs = tf.data.Dataset.from_tensor_slices((test_question_word_padded, test_context_word_padded, test_question_char_padded, test_context_char_padded))

In [None]:
##creating output dataset(train and test) using tf.data
train_targets = tf.data.Dataset.from_tensor_slices((y_start_train, y_end_train))
test_targets = tf.data.Dataset.from_tensor_slices((y_start_test, y_end_test))

In [None]:
#shuffling and split to batches
train_dataset = tf.data.Dataset.zip((train_inputs, train_targets)).shuffle(500).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = tf.data.Dataset.zip((test_inputs, test_targets)).shuffle(500).batch(32).prefetch(tf.data.experimental.AUTOTUNE)

### Model Training

In [None]:
#https://udai.gitbook.io/practical-ml/nn/training-and-debugging-of-nn
#steps to be performed in each training step
@tf.function
def train_step(input_vector, output_vector,loss_fn):
    with tf.GradientTape() as tape:
        #forward propagation
        output_predicted = model(inputs=input_vector, training=True)
        #loss
        loss_start = loss_function(output_vector[0], output_predicted[0])
        loss_end = loss_function(output_vector[1], output_predicted[1])
        loss_final = loss_start + loss_end
    #getting gradients
    gradients = tape.gradient(loss_final, model.trainable_variables)
    #applying gradients
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss_start, loss_end, output_predicted, gradients

In [None]:
#https://udai.gitbook.io/practical-ml/nn/training-and-debugging-of-nn
#steps to be performed in each validation step
@tf.function
def val_step(input_vector, output_vector, loss_fn):
    #getting output of validation data
    output_predicted = model(inputs=input_vector, training=False)
    #loss calculation
    loss_start = loss_function(output_vector[0], output_predicted[0])
    loss_end = loss_function(output_vector[1], output_predicted[1])
    return loss_start, loss_end, output_predicted

In [None]:
#batch size
BATCH_SIZE=32
##number of epochs
EPOCHS=12

In [None]:
#defining functions to compute the mean loss for each epoch
train_start_loss = tf.keras.metrics.Mean(name='train_start_loss')
train_end_loss = tf.keras.metrics.Mean(name='train_end_loss')
val_start_loss = tf.keras.metrics.Mean(name='val_start_loss')
val_end_loss = tf.keras.metrics.Mean(name='val_end_loss')
train_start_f1 = tf.keras.metrics.Mean(name="train_start_f1")
train_end_f1 = tf.keras.metrics.Mean(name="train_end_f1")
val_start_f1 = tf.keras.metrics.Mean(name="val_start_f1")
val_end_f1 = tf.keras.metrics.Mean(name="val_end_f1")

In [None]:
#tensorboard file writers
wtrain = tf.summary.create_file_writer(logdir='logdir_train')
wval = tf.summary.create_file_writer(logdir='logdir_val')

In [None]:
#to get the iteration number for recording logs
iters = math.ceil(64769/32) 
#for model checkpointing
best_loss=100

In [None]:
for epoch in range(EPOCHS):
    
    #resetting the states of the loss and metrics
    train_start_loss.reset_states()
    train_end_loss.reset_states()
    val_start_loss.reset_states()
    val_end_loss.reset_states()
    train_start_f1.reset_states()
    train_end_f1.reset_states()
    val_start_f1.reset_states()
    val_end_f1.reset_states()
    
    ##counter for train loop iteration
    counter = 0
    
    #ietrating over train data batch by batch
    for text_seq, label_seq in tqdm(iterable=train_dataset, total=len(list(train_dataset))):
        #train step
        loss_start_, loss_end_, pred_out, gradients = train_step(text_seq, label_seq, loss_function)
        #adding loss to train loss
        train_start_loss(loss_start_)
        train_end_loss(loss_end_)
        #counting the step number
        temp_step = epoch*iters+counter
        counter = counter + 1
        
        #calculating f1 for batch
        f1_start = f1_score(label_seq[0], pred_out[0])
        f1_end = f1_score(label_seq[1], pred_out[1])
        train_start_f1(f1_start)
        train_end_f1(f1_end)
        
        ##tensorboard 
        with tf.name_scope('per_step_training'):
            with wtrain.as_default():
                tf.summary.scalar("start_loss", loss_start_, step=temp_step)
                tf.summary.scalar("end_loss", loss_end_, step=temp_step)
                tf.summary.scalar('f1_start', f1_start, step=temp_step)
                tf.summary.scalar('f1_end', f1_end, step=temp_step)
        with tf.name_scope("per_batch_gradients"):
            with wtrain.as_default():
                for i in range(len(model.trainable_variables)):
                    name_temp = model.trainable_variables[i].name
                    tf.summary.histogram(name_temp, gradients[i], step=temp_step)
    
    
    #validation data
    for text_seq_val, label_seq_val in test_dataset:
        #getting val output
        loss_val_start, loss_val_end, pred_out_val = val_step(text_seq_val, label_seq_val, loss_function)
        
        val_start_loss(loss_val_start)
        val_end_loss(loss_val_end)
        
        #calculating metric
        f1_start_val = f1_score(label_seq_val[0], pred_out_val[0])
        f1_end_val = f1_score(label_seq_val[1], pred_out_val[1])
        val_start_f1(f1_start_val)
        val_end_f1(f1_end_val)
    
   
    #printing
    template = '''Epoch {}, Train Start Loss: {:0.6f}, Start F1 Score: {:0.5f}, Train End Loss: {:0.6f}, End F1 Score: {:0.5f},
    Val Start Loss: {:0.6f}, Val Start F1 Score: {:0.5f}, Val End Loss: {:0.6f}, Val End F1 Score: {:0.5f}'''

    print(template.format(epoch+1, train_start_loss.result(), train_start_f1.result(), 
                          train_end_loss.result(), train_end_f1.result(),
                          val_start_loss.result(), val_start_f1.result(),
                          val_end_loss.result(), val_end_f1.result()))


    if (val_start_loss.result()+val_end_loss.result())<best_loss:
      model.save("drive/My Drive/Colab Notebooks/new_model/model")
      best_loss=(val_start_loss.result()+val_end_loss.result())
    
    #tensorboard
    with tf.name_scope("per_epoch_loss_metric"):
        with wtrain.as_default():
            tf.summary.scalar("start_loss", train_start_loss.result().numpy(), step=epoch)
            tf.summary.scalar("end_loss", train_end_loss.result().numpy(), step=epoch)
            tf.summary.scalar('start_f1', train_start_f1.result().numpy(), step=epoch)
            tf.summary.scalar('end_f1', train_end_f1.result().numpy(), step=epoch)
        with wval.as_default():
            tf.summary.scalar("start_loss", val_start_loss.result().numpy(), step=epoch)
            tf.summary.scalar("end_loss", val_end_loss.result().numpy(), step=epoch)
            tf.summary.scalar('start_f1', val_start_f1.result().numpy(), step=epoch)
            tf.summary.scalar('end_f1', val_end_f1.result().numpy(), step=epoch)

100%|██████████| 2025/2025 [23:57<00:00,  1.41it/s]


Epoch 1, Train Start Loss: 3.943047, Start F1 Score: 0.04568, Train End Loss: 3.737789, End F1 Score: 0.05230,
    Val Start Loss: 3.040010, Val Start F1 Score: 0.08044, Val End Loss: 2.798793, Val End F1 Score: 0.10350
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:56<00:00,  1.41it/s]


Epoch 2, Train Start Loss: 3.390445, Start F1 Score: 0.12948, Train End Loss: 3.202414, End F1 Score: 0.14917,
    Val Start Loss: 3.023395, Val Start F1 Score: 0.11656, Val End Loss: 2.646866, Val End F1 Score: 0.14655
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [24:47<00:00,  1.36it/s]


Epoch 3, Train Start Loss: 2.928725, Start F1 Score: 0.28020, Train End Loss: 2.746392, End F1 Score: 0.31691,
    Val Start Loss: 2.024346, Val Start F1 Score: 0.36717, Val End Loss: 1.816877, Val End F1 Score: 0.44213
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [24:15<00:00,  1.39it/s]


Epoch 4, Train Start Loss: 2.641253, Start F1 Score: 0.37690, Train End Loss: 2.476184, End F1 Score: 0.41913,
    Val Start Loss: 1.830245, Val Start F1 Score: 0.44721, Val End Loss: 1.644780, Val End F1 Score: 0.50447
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:56<00:00,  1.41it/s]


Epoch 5, Train Start Loss: 2.522749, Start F1 Score: 0.41432, Train End Loss: 2.356189, End F1 Score: 0.45976,
    Val Start Loss: 1.769097, Val Start F1 Score: 0.46996, Val End Loss: 1.606666, Val End F1 Score: 0.51616
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:51<00:00,  1.41it/s]


Epoch 6, Train Start Loss: 2.408644, Start F1 Score: 0.44738, Train End Loss: 2.250348, End F1 Score: 0.49462,
    Val Start Loss: 1.726372, Val Start F1 Score: 0.48292, Val End Loss: 1.540575, Val End F1 Score: 0.54831
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:50<00:00,  1.42it/s]


Epoch 7, Train Start Loss: 2.327476, Start F1 Score: 0.46988, Train End Loss: 2.166238, End F1 Score: 0.51891,
    Val Start Loss: 1.697856, Val Start F1 Score: 0.51075, Val End Loss: 1.529177, Val End F1 Score: 0.56338
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:42<00:00,  1.42it/s]


Epoch 8, Train Start Loss: 2.235740, Start F1 Score: 0.49687, Train End Loss: 2.101116, End F1 Score: 0.54083,
    Val Start Loss: 1.670072, Val Start F1 Score: 0.51971, Val End Loss: 1.508179, Val End F1 Score: 0.57820
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:51<00:00,  1.41it/s]


Epoch 9, Train Start Loss: 2.175614, Start F1 Score: 0.51717, Train End Loss: 2.046705, End F1 Score: 0.55964,
    Val Start Loss: 1.651155, Val Start F1 Score: 0.52256, Val End Loss: 1.483043, Val End F1 Score: 0.57675
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:52<00:00,  1.41it/s]


Epoch 10, Train Start Loss: 2.142618, Start F1 Score: 0.52822, Train End Loss: 1.984587, End F1 Score: 0.57578,
    Val Start Loss: 1.651103, Val Start F1 Score: 0.53271, Val End Loss: 1.479795, Val End F1 Score: 0.58703
INFO:tensorflow:Assets written to: drive/My Drive/Colab Notebooks/new_model/model/assets


100%|██████████| 2025/2025 [23:47<00:00,  1.42it/s]


Epoch 11, Train Start Loss: 2.079690, Start F1 Score: 0.54090, Train End Loss: 1.938480, End F1 Score: 0.59007,
    Val Start Loss: 1.658940, Val Start F1 Score: 0.54067, Val End Loss: 1.515988, Val End F1 Score: 0.59370


100%|██████████| 2025/2025 [23:41<00:00,  1.42it/s]


Epoch 12, Train Start Loss: 2.042344, Start F1 Score: 0.55424, Train End Loss: 1.890511, End F1 Score: 0.60424,
    Val Start Loss: 1.652715, Val Start F1 Score: 0.53162, Val End Loss: 1.498028, Val End F1 Score: 0.59067


In [None]:
#saving tensorboard logs
%cp logdir_train/*.v2 "drive/My Drive/Colab Notebooks/logdir/train/"
%cp logdir_val/*.v2 "drive/My Drive/Colab Notebooks/logdir/val/"

In [None]:
def print_predictions(data_point):
  """Function that takes record numbers as input and predicts the answer for that record"""

  print("Question:")
  for i in test_question_word_padded[data_point]:
    if i==0:
      break
    else:
      print(list(word_tokenizer.keys())[list(word_tokenizer.values()).index(i)], end = ' ')
  print("\nContext:")
  for i in test_context_word_padded[data_point]:
    if i==0:
      break
    else:
      print(list(word_tokenizer.keys())[list(word_tokenizer.values()).index(i)], end = ' ')
  print("\nPredicted Answer:")
  start, end = model.predict([test_question_word_padded[data_point:data_point+1], test_context_word_padded[data_point:data_point+1], test_question_char_padded[15:16], test_context_char_padded[15:16]])
  for i in range(start.argmax(), end.argmax()+1):
    print(list(word_tokenizer.keys())[list(word_tokenizer.values()).index(test_context_word_padded[data_point][i])], end=' ')
  print("\n")

## Sample Predictions

In [None]:
data_points = [8,15,52,152,332]
for i in data_points:
  print_predictions(i)

Question:
What kind of fishing tourism occurs on the island ? 
Context:
The tourist industry is heavily based on the promotion of Napoleon 's imprisonment . A golf course also exists and the possibility for sportfishing tourism is great . Three hotels operate on the island but the arrival of tourists is directly linked to the arrival and departure schedule of the RMS St Helena . Some 3,200 short-term visitors arrived on the island in 2013 . 
Predicted Answer:
sportfishing 

Question:
Who measured the circumference of the Earth ? 
Context:
Hellenistic Geometers such as Archimedes ( c. 287 – 212 BC ) , Apollonius of Perga ( c. 262 – c. 190 BC ) , and Euclid ( c. 325 – 265 BC ) , whose Elements became the most important textbook in mathematics until the 19th century , built upon the work of the Hellenic era Pythagoreans . Euclid developed proofs for the Pythagorean Theorem , for the infinitude of primes , and worked on the ﬁve Platonic solids . Eratosthenes used his knowledge of geometry 

In [None]:
#saving the model to disk
model.save("/content/drive/My Drive/Colab Notebooks/new_model/model")

INFO:tensorflow:Assets written to: /content/drive/My Drive/Colab Notebooks/new_model/model/assets
