In [None]:
import pandas as pd 
import numpy as np

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Bidirectional, CuDNNLSTM, Dense, Embedding, Dropout, Input, CuDNNGRU, GRU, LSTM
from keras.layers import Conv1D, GlobalMaxPooling1D, MaxPooling1D, GlobalAveragePooling1D, concatenate,AveragePooling1D
from keras.models import Model
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau

from gensim.models import Word2Vec

from sklearn.metrics import confusion_matrix, f1_score 
from sklearn.model_selection import train_test_split

import pickle
import re
import string

In [None]:
!wget http://nlp.stanford.edu/data/glove.840B.300d.zip

In [None]:
!unzip glove.840B.300d.zip

In [None]:
embeddings_index = dict()
with open('glove.840B.300d.txt') as glove:
  for line in glove:
    values = line.split()
    word = values[0]
    try:
      coefs = np.asarray(values[1:], dtype='float32')
      embeddings_index[word] = coefs
    except:
      print(word)
   
print('Found %s word vectors.' % len(embeddings_index))

In [None]:
data = pd.read_csv('schedule_flight.csv')

In [None]:
data = data.sample(frac=1).reset_index(drop=True)

In [None]:
data.head()

In [None]:
dumPREV = pd.get_dummies(data["previous_intent"])
dumPREV = dumPREV.drop(columns = ['no'] , axis = 1)

In [None]:
dumPREV.head()

In [None]:
embed_size = 300 # how big is each word vector

maxlen = 30 # max number of words in a question to use

In [None]:
queries = data["query"].values

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(queries)
queries = tokenizer.texts_to_sequences(queries)
queries = pad_sequences(queries, maxlen)

In [None]:
df = pd.DataFrame(queries)

In [None]:
dataset = dumPREV.join(df, how='outer')

In [None]:
dataset.head()

In [None]:
word_index = tokenizer.word_index
nb_words = len(word_index)
print(nb_words)
word_index

In [None]:
dumCURR = pd.get_dummies(data["current_intent"])

In [None]:
dumCURR.head()

In [None]:
x = dataset.iloc[:, :].values
y = dumCURR.iloc[:, :].values
print("Shape X ", x.shape)
print("Shape Y ", y.shape)

In [None]:
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size = 0.05, random_state = 10)

In [None]:
print("Shape of X, Y train", train_x.shape, train_y.shape)
print("Shape of X, Y test", test_x.shape, test_y.shape)

In [None]:
nb_words = len(word_index)
embedding_matrix = np.zeros((nb_words+1, 300))

In [None]:
for word, i in word_index.items(): 
    if word in embeddings_index.keys():  
        embedding_vector = embeddings_index[word] 
        #print(embedding_vector.shape)
        embedding_matrix[i] = embedding_vector

In [None]:
inp1 = Input(shape = (30,)) 

x = Embedding(nb_words+1, embed_size, weights = [embedding_matrix])(inp1)


biout = Bidirectional(GRU(30, return_sequences=False))(x)
 

inp2 = Input(shape = (4,))
x = concatenate([biout, inp2])

x = Dense(64, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(7, activation="softmax")(x)
model = Model(inputs=[inp1, inp2], outputs=x)

model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])

print(model.summary())

In [None]:
checkpoint = ModelCheckpoint("weights.hdf5", monitor='val_acc', verbose=0, save_best_only=True, mode='max')

callbacks = [checkpoint]

In [None]:
epochs= 8 #select epochs

y_test = [np.argmax(y) for y in test_y]

for e in range(epochs):
    model.fit([train_x[:, 4:], train_x[:, :4]], train_y, batch_size=128, epochs=1, callbacks = callbacks, validation_split=0.1) #fit model
    pred_glove_val_y = model.predict([test_x[:, 4:], test_x[:, :4]], batch_size=128, verbose=1) #make predictions

    y_pred = [np.argmax(y) for y in pred_glove_val_y]
    
    score = f1_score(y_test, y_pred, average = 'micro')
    print(confusion_matrix(y_test, y_pred))
            
    print("Val F1 Score: {:.4f}".format(score))

In [None]:
inp1 = Input(shape = (30,)) 

x = Embedding(nb_words+1, embed_size, weights = [embedding_matrix])(inp1)

x = Conv1D(filters = 100, kernel_size = 3, strides=1)(x)
x = AveragePooling1D()(x)
biout = Bidirectional(GRU(30, return_sequences=False))(x)


inp2 = Input(shape = (4,))
x = concatenate([biout, inp2])

x = Dense(32, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(7, activation="softmax")(x)
modelwithCONV = Model(inputs=[inp1, inp2], outputs=x)

modelwithCONV.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])

print(modelwithCONV.summary())

In [None]:
checkpoint = ModelCheckpoint("weightswithCONV.hdf5", monitor='val_acc', verbose=0, save_best_only=True, mode='max')

callbacks = [checkpoint]

In [None]:
import numpy as np

In [None]:
np.repeat(3,4)

In [None]:
epochs= 10 #select epochs

y_test = [np.argmax(y) for y in test_y]

for e in range(epochs):
    modelwithCONV.fit([train_x[:, 4:], train_x[:, :4]], train_y, batch_size=128, epochs=1, callbacks = callbacks, validation_split=0.1) #fit model
    pred_glove_val_y = modelwithCONV.predict([test_x[:, 4:], test_x[:, :4]], batch_size=128, verbose=1) #make predictions

    y_pred = [np.argmax(y) for y in pred_glove_val_y]
    
    score = f1_score(y_test, y_pred, average = 'micro')
    print(confusion_matrix(y_test, y_pred))
            
    print("Val F1 Score: {:.4f}".format(score))

In [None]:
def transformMP(a):
  one_hot=[0]*4
  if a!=-1:
    one_hot[a-1]=1
  
  return np.array([one_hot])

def transformQ(query):
  query = tokenizer.texts_to_sequences([query])
  query = pad_sequences(query, maxlen)
  return np.array(query)

In [None]:
transformQ("indore")

In [None]:
intent = ['affirmation' ,	'book' ,	'cancel', 	'check-in', 	'greet' ,	'negation' ,	'status']

while True:

  print("book=1; cancel=2; check-in=3; status=4\n")
  query = input("Enter Query ")
  
  try:
    a = int(input("Enter previous intent "))
  except:
    a = -1
    
  oh=transformMP(a)
  q = transformQ(query)
  
  pred_glove_val_y = modelwithCONV.predict([q, oh]) 

  y_pred = np.argmax(pred_glove_val_y[0])
  print(intent[y_pred],  pred_glove_val_y[0][y_pred], "\n\n")