## Importing Libraries

In [2]:
import tensorflow as tf                                             #to import the model
import pandas as pd                                                 #to import test data
import pickle                                                       #to load tokenizers
from nltk.tokenize import word_tokenize                             #to tokenize sentences
from keras.preprocessing.sequence import pad_sequences              #to pad sequences to max length

  return f(*args, **kwds)
Using TensorFlow backend.


## Creating Model Class

In [7]:
class bidaf():
    def __init__(self):
        self.model = tf.keras.models.load_model("bidaf/model/")
        with open('bidaf/word_tokenizer.pickle', 'rb') as handle:
            self.word_tokenizer = pickle.load(handle)
        with open('bidaf/char_tokenizer.pickle', 'rb') as handle:
            self.char_tokenizer = pickle.load(handle)
            self.context_max = 340
            self.question_max = 32
            self.char_max = 15
            
    def __get_tokens(self):
        self.question = word_tokenize(self.question)
        self.context = word_tokenize(self.context)
        
        self.question_word_tokens = []
        self.context_word_tokens = []
        self.question_char_tokens = []
        self.context_char_tokens = []
        
        for i in self.question:
            if i in self.word_tokenizer.keys():
                self.question_word_tokens.append(self.word_tokenizer[i])
                self.question_char_tokens.append(self.char_tokenizer.texts_to_sequences([i])[0])
            else:
                self.question_word_tokens.append(self.word_tokenizer['UNK'])
                self.question_char_tokens.append(self.char_tokenizer.texts_to_sequences([i])[0])
                
        for i in self.context:
            if i in self.word_tokenizer.keys():
                self.context_word_tokens.append(self.word_tokenizer[i])
                self.context_char_tokens.append(self.char_tokenizer.texts_to_sequences([i])[0])
            else:
                self.context_word_tokens.append(self.word_tokenizer['UNK'])
                self.context_char_tokens.append(self.char_tokenizer.texts_to_sequences([i])[0])
                
    def __get_padded_word_sequences(self):
        for i in range(len(self.question_word_tokens), self.question_max):
            self.question_word_tokens.append(self.word_tokenizer['PAD'])

        for i in range(len(self.context_word_tokens), self.context_max):
            self.context_word_tokens.append(self.word_tokenizer['PAD'])

        self.question_word_padded = np.array(self.question_word_tokens[:self.question_max], dtype=np.int32)
        self.context_word_padded = np.array(self.context_word_tokens[:self.context_max], dtype=np.int32)
        
        
    def __get_padded_char_sequences(self):
        self.question_char_padded = []
        for i in self.question_char_tokens:
            for j in range(len(i), self.char_max):
                i.append(0)
            self.question_char_padded.append(np.array(i[:self.char_max], dtype=np.int32))

        for i in range(len(self.question_char_padded), self.question_max):
            self.question_char_padded.append(np.zeros(self.char_max, dtype=np.int32))

        self.context_char_padded = []
        for i in self.context_char_tokens:
            for j in range(len(i), self.char_max):
                i.append(0)
            self.context_char_padded.append(np.array(i[:self.char_max], dtype=np.int32))

        for i in range(len(self.context_char_padded), self.context_max):
            self.context_char_padded.append(np.zeros(self.char_max, dtype=np.int32))

        self.question_char_padded = np.array(self.question_char_padded, dtype=np.int32)
        self.context_char_padded = np.array(self.context_char_padded, dtype=np.int32)
                
    def predict(question, context):
        self.question = question
        self.context = context
        self.__get_tokens()
        self.__get_padded_word_sequences()      
        self.__get_padded_char_sequences()
        start, end = self.model.predict([self.question_word_padded, self.context_word_padded, 
                                    elf.question_char_padded, self.context_char_padded])
        
        for i in range(start.argmax(), end.argmax()+1):
            print(self.word_tokenizer[self.context_word_padded[0][i]], end=' ')

## Loading the Dataset to obtain test data

In [None]:
df_answerable = pd.read_pickle("bidaf/df_test.pkl")

In [None]:
# input question and context as strings. Here we take them from the dataset
question = df_answerable['question'][80184]
context = df_answerable['context'][80184]

In [None]:
model = bidaf()



## Prediction

In [5]:
print(question)
print(context)

Who contributed to the American studies programs at Yale and University of Wyoming?
The American studies program reflected the worldwide anti-Communist ideological struggle. Norman Holmes Pearson, who worked for the Office of Strategic Studies in London during World War II, returned to Yale and headed the new American studies program, in which scholarship quickly became an instrument of promoting liberty. Popular among undergraduates, the program sought to instruct them in the fundamentals of American civilization and thereby instill a sense of nationalism and national purpose. Also during the 1940s and 1950s, Wyoming millionaire William Robertson Coe made large contributions to the American studies programs at Yale University and at the University of Wyoming. Coe was concerned to celebrate the 'values' of the Western United States in order to meet the "threat of communism."


In [None]:
model.predict(question, context)