In [1]:
import pandas as pd
import numpy as np
import requests
import urllib.request as request
from sklearn.preprocessing import OneHotEncoder
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from keras.preprocessing.text import Tokenizer
from nltk.stem.lancaster import LancasterStemmer
import json 
import random
import re
import string
from nltk.stem.snowball import SnowballStemmer
from keras.models import Sequential,load_model
from keras.layers import Dense, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
import nltk
nltk.download('stopwords')
stopword = set(stopwords.words('english')) 

Using TensorFlow backend.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\kb\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Loading data
### Loading it directly from GitHub. No need to save in local!

In [2]:
def data_loader(url):
    resp = requests.get(url)
    data = json.loads(resp.text)
    print("Data loaded successfully!")
    print("The dataset has following keys =",data.keys())
    return data

# Data Pre-processing

In [3]:

def data_preprocessing(data):
    # extracting test ,train and validation data from the .json file into dataframe
    full_test_data = pd.DataFrame(data['test'],columns=['text','label'])
    full_train_data = pd.DataFrame(data['train'],columns=['text','label'])
    full_val_data = pd.DataFrame(data['val'],columns=['text','label'])

    #identifying all labels in the dataset and storing it in a list.
    label_list=full_train_data['label'].unique().tolist()

    # Using random getting 20 random labels from the total labels

    k=0
    d={}
    while(k!=20):
        x = random.randint(0,len(label_list))
        if x in d:
            continue
        d[x]=x
        k+=1
    for i in d:
        selected.append(label_list[i-1])

    #Getting the train data with the selected labels
    train_20labels=pd.DataFrame()
    for i in selected:
        df=full_train_data[full_train_data['label']==i]
        frames=[train_20labels,df]
        train_20labels=pd.concat(frames)

    #Getting the test data with the selected labels
    test_20labels=pd.DataFrame()
    for i in selected:
        df=full_test_data[full_test_data['label']==i]
        frames=[test_20labels,df]
        test_20labels=pd.concat(frames)
    
    #Getting the val data with the selected labels
    val_20labels=pd.DataFrame()
    for i in selected:
        df=full_val_data[full_val_data['label']==i]
        frames=[val_20labels,df]
        val_20labels=pd.concat(frames)
    return train_20labels,test_20labels,val_20labels





## Data Cleanising

In [4]:
def data_cleansing():
    # Removing punctuations
    def punctuation_removal(text):
        no_punct=[words for words in text if words not in string.punctuation]
        words_wo_punct=''.join(no_punct)
        return words_wo_punct

    # Tokenize strings
    def tokenize(text):
        split=re.split("\W+",text) 
        return split

    # Remove stopwords
    def remove_stopwords(text):
        text=[word for word in text if word not in stopword]
        return text

    #Defining Stemmer
    def stemmer(text):
        s_stemmer = SnowballStemmer(language='english')
        for word in text:
            text=[s_stemmer.stem(word) for word in text]
        return text
    train_20labels['text']=train_20labels['text'].apply(lambda x: punctuation_removal(x))
    train_20labels['text']=train_20labels['text'].apply(lambda x: tokenize(x.lower()))
    train_20labels['text']=train_20labels['text'].apply(lambda x: remove_stopwords(x))
    #train_20labels['text']=train_20labels['text'].apply(lambda x: stemmer(x))

    test_20labels['text']=test_20labels['text'].apply(lambda x: punctuation_removal(x))
    test_20labels['text']=test_20labels['text'].apply(lambda x: tokenize(x.lower()))
    test_20labels['text']=test_20labels['text'].apply(lambda x: remove_stopwords(x))
    #test_20labels['text']=test_20labels['text'].apply(lambda x: stemmer(x))

    val_20labels['text']=val_20labels['text'].apply(lambda x: punctuation_removal(x))
    val_20labels['text']=val_20labels['text'].apply(lambda x: tokenize(x.lower()))
    val_20labels['text']=val_20labels['text'].apply(lambda x: remove_stopwords(x))
    #val_20labels['text']=val_20labels['text'].apply(lambda x: stemmer(x))
    train_20labels['label']=train_20labels['label'].str.replace('_','')
    test_20labels['label']=test_20labels['label'].str.replace('_','')
    val_20labels['label']=val_20labels['label'].str.replace('_','')

# Data Representation and feature selection

In [5]:
d={}
def data_representation():
    def changing1(t,x):
        a=[0 for i in range(20)]
        a[t.word_index.get(x)-1]=1
        d[t.word_index.get(x)-1]=x
        return a

    def changing(x):
        q=t.texts_to_sequences(x)
        r=[]
        if len(q)!=0:
            for i in q:
                r.append(i[0])
        c=max_length-len(r)
        z=[0 for i in range(c)]
        r=r+z
        return np.asarray(r)

    r=pd.DataFrame()
    frames=[train_20labels['text'],test_20labels['text'],val_20labels['text']]
    r['text']=pd.concat(frames)
    s=set()

    ##Input tokenizer for vectorizing features ie. text data for all test,train and val dataframe
    t = Tokenizer()
    t.fit_on_texts(r['text'])
    for i in range(len(r['text'])):
        for j in range(len(r['text'].iloc[i])):
            s.add(r['text'].iloc[i][j])

    vocab_size=len(s)
    max_length=int(train_20labels['text'].str.len().max())

    #applying the changes to train/test/val datarame
    train_20labels['matrix']= train_20labels['text'].apply(lambda x:changing(x))
    test_20labels['matrix']= test_20labels['text'].apply(lambda x: changing(x))
    val_20labels['matrix']= val_20labels['text'].apply(lambda x: changing(x))


    ##Output tokenizer for encoding labels

    ot=Tokenizer()
    ot.fit_on_texts(train_20labels['label'].values.tolist())


    #applying the changes to train/test/val datarame
    train_20labels['elabel']= train_20labels['label'].apply(lambda x: changing1(ot,x))
    test_20labels['elabel']= test_20labels['label'].apply(lambda x: changing1(ot,x))
    val_20labels['elabel']= val_20labels['label'].apply(lambda x: changing1(ot,x))
    train_20labels['elabel'] = np.array(train_20labels['elabel']).reshape(len(train_20labels['elabel']), 1)
    test_20labels['elabel'] = np.array(test_20labels['elabel']).reshape(len(test_20labels['elabel']), 1)
    val_20labels['elabel'] = np.array(val_20labels['elabel']).reshape(len(val_20labels['elabel']), 1)

    return vocab_size,max_length

In [6]:
def data_preperation():
    train_X=train_20labels['matrix']
    train_Y =train_20labels['elabel']
    val_X =val_20labels['matrix']
    val_Y =val_20labels['elabel']
    test_X=test_20labels['matrix']
    test_Y =test_20labels['elabel']

    # reshaping train_X and train_Y
    train_X=train_X.to_numpy()
    train_X=np.vstack(train_X)
    train_Y=train_Y.to_numpy()
    train_Y=np.vstack(train_Y)

    # reshaping val_X and val_Y
    val_X=val_X.to_numpy()
    val_X=np.vstack(val_X)
    val_Y=val_Y.to_numpy()
    val_Y=np.vstack(val_Y)

    # reshaping test_X and test_Y
    test_X=test_X.to_numpy()
    test_X=np.vstack(test_X)
    test_Y=test_Y.to_numpy()
    test_Y=np.vstack(test_Y)
    return train_X,test_X,val_X,train_Y,test_Y,val_Y

In [7]:
def LSTM_model(vocab_size, max_length,train_X,val_X,train_Y,val_Y):
    def create_model(vocab_size, max_length):
        model = Sequential()
        model.add(Embedding(vocab_size, 64, input_length = max_length, trainable = False))
        model.add(Bidirectional(LSTM(256)))
        model.add(Dense(32, activation = "relu"))
        model.add(Dropout(0.5))
        model.add(Dense(20, activation = "softmax"))
        return model

    model = create_model(vocab_size+1, max_length)
    model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
    model.summary()
    filename = 'model.h5'
    checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

    hist = model.fit(train_X, train_Y, epochs = 150, batch_size = 16, shuffle=True, validation_data = (val_X, val_Y), callbacks = [checkpoint])

In [8]:
def predict(test_X):
    def predictions():
        model = load_model("model.h5")
        pred=model.predict(test_X)
        return pred
    pred=predictions()
    def get_final_output(pred, classes):
      print(len(pred))  
      for i in range(len(pred)):
        predictions = pred[i]
        classes = np.array(classes)
        ids = np.argsort(-predictions)
        classes = classes[ids]
        predictions = -np.sort(-predictions)
        print("Testing for statement :", question[i])
        print('\n Results are:')
        for i in range(pred.shape[1]):
            print("%s has confidence = %s" % (classes[i], (predictions[i])))
        print('------------------------------------------------------------------------')
    p=list(selected)
    for i in range(len(selected)):
        p[i]=p[i].replace('_','')
    get_final_output(pred, p)


In [9]:
data=data_loader("https://raw.githubusercontent.com/clinc/oos-eval/master/data/data_full.json")
selected=[]
train_20labels,test_20labels,val_20labels = data_preprocessing(data)
finaldf=test_20labels
question=[]
for i in range(len(finaldf)):
    question.append(finaldf['text'].iloc[i])
print("\nRandomly Selected 20 labels from in-scope content are =\n", selected)
data_cleansing()
vocab_size,max_length=data_representation()
train_X,test_X,val_X,train_Y,test_Y,val_Y=data_preperation()
LSTM_model(vocab_size, max_length,train_X,val_X,train_Y,val_Y)


Data loaded successfully!
The dataset has following keys = dict_keys(['oos_val', 'val', 'train', 'oos_test', 'test', 'oos_train'])

Randomly Selected 20 labels from in-scope content are =
 ['taxes', 'uber', 'meaning_of_life', 'calendar', 'who_do_you_work_for', 'change_language', 'schedule_maintenance', 'flight_status', 'gas', 'food_last', 'bill_balance', 'gas_type', 'tell_joke', 'exchange_rate', 'travel_alert', 'reminder', 'nutrition_info', 'direct_deposit', 'credit_score', 'todo_list']
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 12, 64)            103168    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 512)               657408    
_________________________________________________________________
dense_1 (Dense)              (None, 32)                16416     
________________________


Epoch 00032: val_loss did not improve from 0.83400
Epoch 33/150

Epoch 00033: val_loss did not improve from 0.83400
Epoch 34/150

Epoch 00034: val_loss did not improve from 0.83400
Epoch 35/150

Epoch 00035: val_loss improved from 0.83400 to 0.81824, saving model to model.h5
Epoch 36/150

Epoch 00036: val_loss did not improve from 0.81824
Epoch 37/150

Epoch 00037: val_loss did not improve from 0.81824
Epoch 38/150

Epoch 00038: val_loss did not improve from 0.81824
Epoch 39/150

Epoch 00039: val_loss did not improve from 0.81824
Epoch 40/150

Epoch 00040: val_loss did not improve from 0.81824
Epoch 41/150

Epoch 00041: val_loss did not improve from 0.81824
Epoch 42/150

Epoch 00042: val_loss did not improve from 0.81824
Epoch 43/150

Epoch 00043: val_loss did not improve from 0.81824
Epoch 44/150

Epoch 00044: val_loss did not improve from 0.81824
Epoch 45/150

Epoch 00045: val_loss did not improve from 0.81824
Epoch 46/150

Epoch 00046: val_loss improved from 0.81824 to 0.80161, sav


Epoch 00073: val_loss did not improve from 0.80161
Epoch 74/150

Epoch 00074: val_loss did not improve from 0.80161
Epoch 75/150

Epoch 00075: val_loss did not improve from 0.80161
Epoch 76/150

Epoch 00076: val_loss did not improve from 0.80161
Epoch 77/150

Epoch 00077: val_loss did not improve from 0.80161
Epoch 78/150

Epoch 00078: val_loss did not improve from 0.80161
Epoch 79/150

Epoch 00079: val_loss did not improve from 0.80161
Epoch 80/150

Epoch 00080: val_loss did not improve from 0.80161
Epoch 81/150

Epoch 00081: val_loss did not improve from 0.80161
Epoch 82/150

Epoch 00082: val_loss did not improve from 0.80161
Epoch 83/150

Epoch 00083: val_loss did not improve from 0.80161
Epoch 84/150

Epoch 00084: val_loss did not improve from 0.80161
Epoch 85/150

Epoch 00085: val_loss did not improve from 0.80161
Epoch 86/150

Epoch 00086: val_loss did not improve from 0.80161
Epoch 87/150

Epoch 00087: val_loss did not improve from 0.80161
Epoch 88/150

Epoch 00088: val_loss di


Epoch 00114: val_loss did not improve from 0.80161
Epoch 115/150

Epoch 00115: val_loss did not improve from 0.80161
Epoch 116/150

Epoch 00116: val_loss did not improve from 0.80161
Epoch 117/150

Epoch 00117: val_loss did not improve from 0.80161
Epoch 118/150

Epoch 00118: val_loss did not improve from 0.80161
Epoch 119/150

Epoch 00119: val_loss did not improve from 0.80161
Epoch 120/150

Epoch 00120: val_loss did not improve from 0.80161
Epoch 121/150

Epoch 00121: val_loss did not improve from 0.80161
Epoch 122/150

Epoch 00122: val_loss did not improve from 0.80161
Epoch 123/150

Epoch 00123: val_loss did not improve from 0.80161
Epoch 124/150

Epoch 00124: val_loss did not improve from 0.80161
Epoch 125/150

Epoch 00125: val_loss did not improve from 0.80161
Epoch 126/150

Epoch 00126: val_loss did not improve from 0.80161
Epoch 127/150

Epoch 00127: val_loss did not improve from 0.80161
Epoch 128/150

Epoch 00128: val_loss did not improve from 0.80161
Epoch 129/150

Epoch 001

In [10]:
predict(test_X)

600
Testing for statement : tell me what my federal taxes amount to

 Results are:
taxes has confidence = 0.99999917
exchangerate has confidence = 4.7941506e-07
travelalert has confidence = 3.6658116e-07
changelanguage has confidence = 5.9607177e-09
schedulemaintenance has confidence = 4.1931267e-10
directdeposit has confidence = 1.6830262e-10
billbalance has confidence = 1.2866262e-10
calendar has confidence = 4.8195413e-11
reminder has confidence = 1.92795e-11
telljoke has confidence = 5.0924677e-12
foodlast has confidence = 9.658825e-13
todolist has confidence = 2.6138523e-13
nutritioninfo has confidence = 3.3556305e-14
gas has confidence = 2.800821e-14
whodoyouworkfor has confidence = 1.036129e-14
meaningoflife has confidence = 1.575015e-15
gastype has confidence = 2.4032318e-16
flightstatus has confidence = 2.104941e-17
uber has confidence = 1.5697453e-19
creditscore has confidence = 4.7401658e-21
------------------------------------------------------------------------
Testing for

flightstatus has confidence = 6.788974e-08
travelalert has confidence = 3.6529757e-10
telljoke has confidence = 2.0613289e-10
directdeposit has confidence = 1.5788246e-10
------------------------------------------------------------------------
Testing for statement : what is the reason humans even exist

 Results are:
calendar has confidence = 0.41147757
taxes has confidence = 0.18714744
billbalance has confidence = 0.14266798
whodoyouworkfor has confidence = 0.07564159
nutritioninfo has confidence = 0.046396647
creditscore has confidence = 0.0319234
flightstatus has confidence = 0.026241591
exchangerate has confidence = 0.019294612
travelalert has confidence = 0.018783225
gas has confidence = 0.012944864
foodlast has confidence = 0.010475854
schedulemaintenance has confidence = 0.008674843
meaningoflife has confidence = 0.00570533
reminder has confidence = 0.0019192412
uber has confidence = 0.00022078231
telljoke has confidence = 0.00014585411
gastype has confidence = 0.0001341257
cha

Testing for statement : please speak in tagalog

 Results are:
flightstatus has confidence = 0.99991214
todolist has confidence = 3.509004e-05
gas has confidence = 3.4354183e-05
directdeposit has confidence = 9.735476e-06
calendar has confidence = 3.622911e-06
travelalert has confidence = 2.2686738e-06
schedulemaintenance has confidence = 1.0878459e-06
nutritioninfo has confidence = 9.950453e-07
telljoke has confidence = 6.269457e-07
changelanguage has confidence = 1.6020728e-07
whodoyouworkfor has confidence = 6.912966e-08
creditscore has confidence = 5.5102042e-08
foodlast has confidence = 3.7660712e-08
uber has confidence = 2.4491946e-08
reminder has confidence = 1.2610439e-10
taxes has confidence = 9.51717e-11
gastype has confidence = 3.2818206e-11
exchangerate has confidence = 1.270861e-11
billbalance has confidence = 5.5814763e-12
meaningoflife has confidence = 3.1396734e-12
------------------------------------------------------------------------
Testing for statement : speak in 

------------------------------------------------------------------------
Testing for statement : is my flight, dl123 on time

 Results are:
schedulemaintenance has confidence = 0.9999759
exchangerate has confidence = 1.41444325e-05
gastype has confidence = 4.869741e-06
billbalance has confidence = 4.602591e-06
creditscore has confidence = 3.6277387e-07
calendar has confidence = 6.4150754e-08
whodoyouworkfor has confidence = 1.8284183e-08
telljoke has confidence = 1.2290788e-08
reminder has confidence = 1.0560317e-08
meaningoflife has confidence = 2.899889e-09
nutritioninfo has confidence = 2.3647042e-09
travelalert has confidence = 2.201512e-09
todolist has confidence = 8.715004e-10
foodlast has confidence = 3.1916515e-11
directdeposit has confidence = 2.1753802e-11
flightstatus has confidence = 1.118494e-11
gas has confidence = 1.0397347e-12
changelanguage has confidence = 3.0898823e-14
uber has confidence = 8.057285e-16
taxes has confidence = 2.0577332e-17
---------------------------

gastype has confidence = 3.7298204e-08
telljoke has confidence = 3.568767e-08
meaningoflife has confidence = 3.2435523e-09
------------------------------------------------------------------------
Testing for statement : what do i owe this month on all my bills

 Results are:
reminder has confidence = 0.99945575
directdeposit has confidence = 0.0002415952
changelanguage has confidence = 0.0001842721
travelalert has confidence = 8.015533e-05
billbalance has confidence = 2.3639566e-05
flightstatus has confidence = 6.1763667e-06
foodlast has confidence = 3.0584008e-06
calendar has confidence = 1.965729e-06
whodoyouworkfor has confidence = 1.7803209e-06
creditscore has confidence = 1.2478454e-06
taxes has confidence = 1.7035435e-07
meaningoflife has confidence = 1.224036e-07
schedulemaintenance has confidence = 1.0086387e-07
telljoke has confidence = 6.579092e-08
gastype has confidence = 5.4229005e-08
gas has confidence = 2.8670089e-08
todolist has confidence = 1.229288e-08
nutritioninfo ha

creditscore has confidence = 0.0053418977
foodlast has confidence = 0.0023863243
meaningoflife has confidence = 0.00031645817
directdeposit has confidence = 0.0001507518
uber has confidence = 9.595289e-05
gastype has confidence = 6.699889e-05
schedulemaintenance has confidence = 4.2529533e-05
changelanguage has confidence = 1.8413784e-05
travelalert has confidence = 1.704846e-05
nutritioninfo has confidence = 1.3008842e-05
reminder has confidence = 7.853837e-06
whodoyouworkfor has confidence = 3.7460927e-06
flightstatus has confidence = 1.097654e-06
billbalance has confidence = 6.8726524e-07
gas has confidence = 3.5535598e-07
calendar has confidence = 4.0836035e-08
telljoke has confidence = 3.2640128e-09
todolist has confidence = 3.0179063e-13
------------------------------------------------------------------------
Testing for statement : what is the current exchange rate between mxn and eur

 Results are:
whodoyouworkfor has confidence = 0.9996012
schedulemaintenance has confidence = 

schedulemaintenance has confidence = 2.4847554e-05
exchangerate has confidence = 5.743781e-06
whodoyouworkfor has confidence = 2.0553102e-06
telljoke has confidence = 1.8221583e-06
billbalance has confidence = 6.7183856e-07
creditscore has confidence = 2.1771129e-07
travelalert has confidence = 1.9067365e-07
foodlast has confidence = 1.06877266e-07
gas has confidence = 6.24202e-08
taxes has confidence = 5.2991197e-08
directdeposit has confidence = 6.377014e-09
todolist has confidence = 2.5202427e-09
uber has confidence = 6.461516e-10
meaningoflife has confidence = 4.94872e-10
flightstatus has confidence = 1.4920976e-10
changelanguage has confidence = 1.4195935e-10
reminder has confidence = 1.7674025e-11
nutritioninfo has confidence = 1.4532947e-13
------------------------------------------------------------------------
Testing for statement : tell me nutritional info for burger

 Results are:
flightstatus has confidence = 0.9999864
whodoyouworkfor has confidence = 8.573635e-06
uber has

Testing for statement : give me my credit rating

 Results are:
schedulemaintenance has confidence = 0.9999988
todolist has confidence = 9.410383e-07
reminder has confidence = 1.9366787e-07
gas has confidence = 4.1932175e-08
changelanguage has confidence = 1.8543977e-08
uber has confidence = 3.0585672e-09
gastype has confidence = 1.209257e-09
nutritioninfo has confidence = 6.0163496e-10
meaningoflife has confidence = 1.2385396e-11
directdeposit has confidence = 1.144706e-11
telljoke has confidence = 9.933492e-12
whodoyouworkfor has confidence = 1.9885286e-13
exchangerate has confidence = 6.305922e-14
taxes has confidence = 2.2437875e-15
calendar has confidence = 7.489616e-16
flightstatus has confidence = 8.8672365e-17
creditscore has confidence = 5.616548e-17
foodlast has confidence = 2.8562695e-17
travelalert has confidence = 1.004084e-17
billbalance has confidence = 4.436649e-19
------------------------------------------------------------------------
Testing for statement : please te