In [1]:
import pickle
import re
import pandas as pd
import numpy as np

## Torch imports
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Sklearn imports
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, log_loss

## NLP Libraries
import spacy
from sklearn.model_selection import train_test_split
from nltk import download
import gensim
from nltk.corpus import stopwords
spacy_en = spacy.load('en')
download('stopwords')

Using TensorFlow backend.
  return f(*args, **kwds)


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/rsilvei/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
!ls

baseline_handcrafted_features.ipynb   intents_5.csv
baseline_handcrafted_features_2.ipynb intents_6.csv
dataset_apiai_9.csv                   intents_7.csv
dataset_apiai_vocab_9.pkl             intents_8.csv
intents_1.csv                         intents_9.csv
intents_10.csv                        test.pkl
intents_11.csv                        test_complete.pkl
intents_12.csv                        test_dataset_to_pickle.ipynb
intents_13.csv                        torch_vectors_bow_baselines.ipynb
intents_2.csv                         torch_vectors_lstm_gru_word2vec.ipynb
intents_3.csv                         train.pkl
intents_4.csv


In [3]:
train = pd.read_csv('dataset_apiai_9.csv', delimiter=";")
print("Train size: {}".format(len(train)))
train.tail()

Train size: 788


Unnamed: 0,usersays,intent
783,Modify Vincent smoking status,workerTobaccoUsageStatus.update
784,Martha smokes,workerTobaccoUsageStatus.update
785,Bill start using tobacco,workerTobaccoUsageStatus.update
786,Margarite start smoking,workerTobaccoUsageStatus.update
787,Employee quit smoking,workerTobaccoUsageStatus.update


In [4]:
## Filtering out useless classes
train = train[train['intent']!='init.greetings']
train = train[train['intent']!='Default Welcome Intent']
print("Train size: {}".format(len(train)))

Train size: 777


In [5]:
!ls

baseline_handcrafted_features.ipynb   intents_5.csv
baseline_handcrafted_features_2.ipynb intents_6.csv
dataset_apiai_9.csv                   intents_7.csv
dataset_apiai_vocab_9.pkl             intents_8.csv
intents_1.csv                         intents_9.csv
intents_10.csv                        test.pkl
intents_11.csv                        test_complete.pkl
intents_12.csv                        test_dataset_to_pickle.ipynb
intents_13.csv                        torch_vectors_bow_baselines.ipynb
intents_2.csv                         torch_vectors_lstm_gru_word2vec.ipynb
intents_3.csv                         train.pkl
intents_4.csv


In [6]:
with open('test_complete.pkl', 'rb') as f:
    test = pickle.load(f)

In [7]:
test= test.sample(frac=1).reset_index(drop=True)
test=test.rename(index=str, columns={"Intents": "usersays", "label": "intent"})
test.tail()

Unnamed: 0,usersays,intent
795,update my worker birth day,workerBirthInformation.update
796,i want to change my labour birth information,workerBirthInformation.update
797,need form i9,worker.usI9Screening.section1.complete
798,modifications required in birth information,personBirthInformation.update
799,how to change labourer email id,workerPersonalEmail.update


In [9]:
train_full = train.append([test]).reset_index(drop=True)
train_full=train_full.sample(frac=1).reset_index(drop=True)
print(len(train_full))
train_full.tail()

1577


Unnamed: 0,usersays,intent
1572,how to change employee email id,workerPersonalEmail.update
1573,"E agent, can you please upload this person doc...",personDocument.upload
1574,Please adjust personal phone number,personPersonalPhoneNumber.update
1575,Changes in worker electronic mail,workerPersonalEmail.update
1576,modify worker email address,workerPersonalEmail.update


In [10]:
train_full[train_full['intent']=='question.detect']

Unnamed: 0,usersays,intent
151,Who is Danny Ocean compensation partner?,question.detect
163,What languages Danny speaks?,question.detect
194,What is Teresa compensation?,question.detect
227,Who is my supervisor?,question.detect
311,Who is New York supervisor?,question.detect
425,What's Danny's's salary?,question.detect
536,What is Corey email?,question.detect
593,Who is Teresa Brown HR partner?,question.detect
603,What are the contacts of Violet?,question.detect
642,Which languages Teresa speaks?,question.detect


## 2. Preprocessing

In [11]:
def transformText(text, do_stop=False, do_stem=False):
    
    stops = set(stopwords.words("english"))
    
    # Convert text to lower
    text = text.lower()
    
    # Removing non ASCII chars    
    text = re.sub(r'[^\x00-\x7f]',r' ',text)
    
    # Strip multiple whitespaces
    text = gensim.corpora.textcorpus.strip_multiple_whitespaces(text)
    
    # Removing all the stopwords
    
    if (do_stop==True):
        filtered_words = [word for word in text.split() if word not in stops]
    else:
        filtered_words = [word for word in text.split()]

    # Removing all the tokens with lesser than 3 characters
    filtered_words = gensim.corpora.textcorpus.remove_short(filtered_words, minsize=3)
    
    # Preprocessed text after stop words removal
    text = " ".join(filtered_words)
    
    # Remove the punctuation
    text = gensim.parsing.preprocessing.strip_punctuation2(text)
    
    # Strip all the numerics
    text = gensim.parsing.preprocessing.strip_numeric(text)
    
    # Strip multiple whitespaces
    text = gensim.corpora.textcorpus.strip_multiple_whitespaces(text)
    
    if (do_stem==True):
        # Stemming
        text = gensim.parsing.preprocessing.stem_text(text)
    return text

In [12]:
train_full['usersays_preprocessed']=train_full['usersays'].apply(lambda x: transformText(x,do_stop=False,do_stem=True))
train_full.tail()

Unnamed: 0,usersays,intent,usersays_preprocessed
1572,how to change employee email id,workerPersonalEmail.update,how chang employe email
1573,"E agent, can you please upload this person doc...",personDocument.upload,agent can you pleas upload thi person document
1574,Please adjust personal phone number,personPersonalPhoneNumber.update,pleas adjust person phone number
1575,Changes in worker electronic mail,workerPersonalEmail.update,chang worker electron mail
1576,modify worker email address,workerPersonalEmail.update,modifi worker email address


In [13]:
test['usersays_preprocessed']=test['usersays'].apply(lambda x: transformText(x,do_stop=False,do_stem=True))
test.tail()

Unnamed: 0,usersays,intent,usersays_preprocessed
795,update my worker birth day,workerBirthInformation.update,updat worker birth dai
796,i want to change my labour birth information,workerBirthInformation.update,want chang labour birth inform
797,need form i9,worker.usI9Screening.section1.complete,need form
798,modifications required in birth information,personBirthInformation.update,modif requir birth inform
799,how to change labourer email id,workerPersonalEmail.update,how chang labour email


## Applying handcrafted features

### 1. NER detection

In [15]:
spacy_en = spacy.load('en')

In [16]:
doc = spacy_en(''.join("update Bruno marital status"))

In [17]:
doc.ents

(Bruno,)

In [18]:
for names in doc.ents:
    print(names)
    if names.label_== "PERSON":
         print("Has person: {}".format(names.text))

Bruno


In [19]:
def has_person(text):
    doc = spacy_en(''.join(text))
    person=0
    for names in doc.ents:
        if names.label_== "PERSON":
            person = 1
           # print(names.text)
    return person

In [20]:
has_person("update Roberto job update e-mail")

1

In [22]:
train_full['has_person']=train_full['usersays'].apply(lambda x: has_person(x))
train_full[train_full['has_person']==1][0:10]

Unnamed: 0,usersays,intent,usersays_preprocessed,has_person
131,Bill wants to change his LGBT status,workerLGBT.update,bill want chang hi lgbt statu,1
146,modifications in Worker LGBT details,workerLGBT.update,modif worker lgbt detail,1
151,Who is Danny Ocean compensation partner?,question.detect,who danni ocean compens partner,1
160,modify Anthony Albright phone number,workerPersonalPhoneNumber.update,modifi anthoni albright phone number,1
163,What languages Danny speaks?,question.detect,what languag danni speak,1
222,Update Roberto phone,workerPersonalPhoneNumber.update,updat roberto phone,1
289,need to change my worker Marital Status,workerMaritalStatus.update,need chang worker marit statu,1
348,edit Bruno's phone,workerPersonalPhoneNumber.update,edit bruno s phone,1
355,edit Bruno's email,workerPersonalEmail.update,edit bruno s email,1
394,edit Bruno's e-mail,workerPersonalEmail.update,edit bruno s e mail,1


### 2. Question detection

In [23]:
train_full[train_full['intent']=='question.detect']

Unnamed: 0,usersays,intent,usersays_preprocessed,has_person
151,Who is Danny Ocean compensation partner?,question.detect,who danni ocean compens partner,1
163,What languages Danny speaks?,question.detect,what languag danni speak,1
194,What is Teresa compensation?,question.detect,what teresa compens,0
227,Who is my supervisor?,question.detect,who supervisor,0
311,Who is New York supervisor?,question.detect,who new york supervisor,0
425,What's Danny's's salary?,question.detect,what s danni s s salari,1
536,What is Corey email?,question.detect,what corei email,1
593,Who is Teresa Brown HR partner?,question.detect,who teresa brown partner,1
603,What are the contacts of Violet?,question.detect,what ar the contact violet,0
642,Which languages Teresa speaks?,question.detect,which languag teresa speak,0


In [85]:
list_question =['who','where','when','what','how','which']
list_question

['who', 'where', 'when', 'what', 'how', 'which']

In [99]:
msg = "is the main supervisor? "

In [132]:
import re
mo = re.search('\?',"Is This It Nope,??? that's fine")
if mo: 
    print(mo.group)
    print(mo.re)
else: 
    print("sem")

<built-in method group of _sre.SRE_Match object at 0x123389920>
re.compile('\\?')


In [133]:
def has_question_mark(text):
    a = re.search('\?',text)
    if a:
        return 1
    else:
        return 0

In [101]:
def has_question_inits(text):
    a = any(set(list_question).intersection(text.lower().split()))
    if a:
        return 1
    else:
        return 0

In [108]:
has_question_inits("I am")

0

In [140]:
has_question_mark("What this is the true value of life?")

1

In [180]:
train_full['has_question_init']=train_full['usersays'].apply(lambda x: has_question_inits(x))
train_full[train_full['has_question_init']==1][0:10]

Unnamed: 0,usersays,intent,usersays_preprocessed,has_person,has_question_init,has_question_mark
22,I want know how to change my worker mobile num...,workerPersonalPhoneNumber.update,want know how chang worker mobil number,0,1,0
78,if i want to change my worker mobile number wh...,workerPersonalPhoneNumber.update,want chang worker mobil number what need,0,1,0
91,how to update birth information,personBirthInformation.update,how updat birth inform,0,1,0
93,where to update my personal email address,personPersonalEmail.update,where updat person email address,0,1,0
100,How to change my worker number,workerPersonalPhoneNumber.update,how chang worker number,0,1,0
103,What to do to change my worker new electronic ...,workerPersonalEmail.update,what chang worker new electron mail,0,1,0
117,How to change my personal number,personPersonalPhoneNumber.update,how chang person number,0,1,0
136,how to update employee number,workerPersonalPhoneNumber.update,how updat employe number,0,1,0
142,how to change my lobour details,workerTobaccoUsageStatus.update,how chang lobour detail,0,1,0
151,Who is Danny Ocean compensation partner?,question.detect,who danni ocean compens partner,1,1,1


In [179]:
train_full['has_question_mark']=train_full['usersays'].apply(lambda x: has_question_mark(x))
train_full[train_full['has_question_mark']==1][0:10]

Unnamed: 0,usersays,intent,usersays_preprocessed,has_person,has_question_init,has_question_mark
151,Who is Danny Ocean compensation partner?,question.detect,who danni ocean compens partner,1,1,1
163,What languages Danny speaks?,question.detect,what languag danni speak,1,1,1
194,What is Teresa compensation?,question.detect,what teresa compens,0,1,1
227,Who is my supervisor?,question.detect,who supervisor,0,1,1
311,Who is New York supervisor?,question.detect,who new york supervisor,0,1,1
425,What's Danny's's salary?,question.detect,what s danni s s salari,1,0,1
536,What is Corey email?,question.detect,what corei email,1,1,1
593,Who is Teresa Brown HR partner?,question.detect,who teresa brown partner,1,1,1
603,What are the contacts of Violet?,question.detect,what ar the contact violet,0,1,1
642,Which languages Teresa speaks?,question.detect,which languag teresa speak,0,1,1


In [178]:
train_full[train_full['intent']=="question.detect"][0:10]

Unnamed: 0,usersays,intent,usersays_preprocessed,has_person,has_question_init,has_question_mark
151,Who is Danny Ocean compensation partner?,question.detect,who danni ocean compens partner,1,1,1
163,What languages Danny speaks?,question.detect,what languag danni speak,1,1,1
194,What is Teresa compensation?,question.detect,what teresa compens,0,1,1
227,Who is my supervisor?,question.detect,who supervisor,0,1,1
311,Who is New York supervisor?,question.detect,who new york supervisor,0,1,1
425,What's Danny's's salary?,question.detect,what s danni s s salari,1,0,1
536,What is Corey email?,question.detect,what corei email,1,1,1
593,Who is Teresa Brown HR partner?,question.detect,who teresa brown partner,1,1,1
603,What are the contacts of Violet?,question.detect,what ar the contact violet,0,1,1
642,Which languages Teresa speaks?,question.detect,which languag teresa speak,0,1,1


In [177]:
train_full[0:10]

Unnamed: 0,usersays,intent,usersays_preprocessed,has_person,has_question_init,has_question_mark
0,Help me to hold job requisition,jobRequisition.hold,help hold job requisit,0,0,0
1,required to update my mail,personPersonalEmail.update,requir updat mail,0,0,0
2,Anulate job requisition,jobRequisition.cancel,anul job requisit,0,0,0
3,I need to upload an person document,personDocument.upload,need upload person document,0,0,0
4,updates in LGBT status,personLGBT.update,updat lgbt statu,0,0,0
5,rem job board,jobBoard.delete,rem job board,0,0,0
6,Please submit job app internal,internalJobApplication.submit,pleas submit job app intern,0,0,0
7,change date of birth,personBirthInformation.update,chang date birth,0,0,0
8,Change employee LGBT status,workerLGBT.update,chang employe lgbt statu,0,0,0
9,just change my worker electronic mail id,workerPersonalEmail.update,just chang worker electron mail,0,0,0


## Including handcradted features in the train/test split

In [144]:
x_train, x_valid, y_train, y_valid = train_test_split(list(zip(train_full['usersays_preprocessed'],
                                                               train_full['has_person'],
                                                               train_full['has_question_init'],
                                                               train_full['has_question_mark'])),
                                                      train_full['intent'], 
                                                      test_size=0.2)

In [145]:
x_test = np.array(test['usersays_preprocessed'])
y_test = np.array(test['intent'])

In [176]:
x_train[0:10]

[('elimin search', 0, 0, 0),
 ('need chang lobour tobacco detail', 0, 0, 0),
 ('updat marit statu', 0, 0, 0),
 ('birthdai inform', 0, 0, 0),
 ('chang employe tobacco statu', 0, 0, 0),
 ('chang tobacco indic', 0, 0, 0),
 ('set employ search', 0, 0, 0),
 ('want chang lgbt statu', 0, 0, 0),
 ('negoti for job offer evalu', 0, 0, 0),
 ('chang employe number', 0, 0, 0)]

In [147]:
train_data=list(zip(x_train,y_train))
train_data[0:5]

[(('elimin search', 0, 0, 0), 'jobSearch.delete'),
 (('need chang lobour tobacco detail', 0, 0, 0),
  'workerTobaccoUsageStatus.update'),
 (('updat marit statu', 0, 0, 0), 'personMaritalStatus.update'),
 (('birthdai inform', 0, 0, 0), 'personBirthInformation.update'),
 (('chang employe tobacco statu', 0, 0, 0), 'workerTobaccoUsageStatus.update')]

In [148]:
train_data[0][0]

('elimin search', 0, 0, 0)

In [149]:
valid_data=list(zip(x_valid,y_valid))
valid_data[-5:-1]

[(('updat labor email', 0, 0, 0), 'workerPersonalEmail.update'),
 (('chang tobacco indic', 0, 0, 0), 'personTobaccoUsageStatus.update'),
 (('how chang worker electron mail', 0, 1, 0), 'workerPersonalEmail.update'),
 (('modifi person gender', 0, 0, 0), 'personLGBT.update')]

In [169]:
## Build Vocabulary
word_to_ix = {}
for (sent,_,_,_) in list(x_train) + list(x_valid):
    for word in sent.split():
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

In [171]:
len(word_to_ix)

398

In [172]:
## Build Vocabulary
label_to_ix = {}
for label in list(y_train)+list(y_valid)+list(y_test):
    for word in label.split():
        if word not in label_to_ix:
            label_to_ix[word]=len(label_to_ix)

In [174]:
len(label_to_ix)

71

In [175]:
VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = len(label_to_ix)
VOCAB_SIZE,NUM_LABELS

(398, 71)

## Building  iterable datasets

In [182]:
train_data=list(zip(x_train,y_train))
train_data[0:5]

[(('elimin search', 0, 0, 0), 'jobSearch.delete'),
 (('need chang lobour tobacco detail', 0, 0, 0),
  'workerTobaccoUsageStatus.update'),
 (('updat marit statu', 0, 0, 0), 'personMaritalStatus.update'),
 (('birthdai inform', 0, 0, 0), 'personBirthInformation.update'),
 (('chang employe tobacco statu', 0, 0, 0), 'workerTobaccoUsageStatus.update')]

In [183]:
valid_data=list(zip(x_valid,y_valid))
valid_data[-5:-1]

[(('updat labor email', 0, 0, 0), 'workerPersonalEmail.update'),
 (('chang tobacco indic', 0, 0, 0), 'personTobaccoUsageStatus.update'),
 (('how chang worker electron mail', 0, 1, 0), 'workerPersonalEmail.update'),
 (('modifi person gender', 0, 0, 0), 'personLGBT.update')]

## Model 2 - BoW Classifier with Handcrafted features

In [150]:
class BoWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        
        ## Defining parameters for linear model
        self.linear = nn.Linear(vocab_size, num_labels)
    
    def forward(self, bow_vec):
        ## do the foward pass and implement non-linearity
        return F.log_softmax(self.linear(bow_vec))

In [152]:
def make_bow_vector2(sentence, feat1, feat2, feat3, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence.split():
        if word in word_to_ix:
            vec[word_to_ix[word]] += 1
    sample_person = torch.from_numpy(np.array(feat1)).view(1,-1).type(torch.FloatTensor)
    sample_quest_init = torch.from_numpy(np.array(feat2)).view(1,-1).type(torch.FloatTensor)
    sample_quest_mark = torch.from_numpy(np.array(feat3)).view(1,-1).type(torch.FloatTensor)
    vec=vec.view(1,-1)
    sample = torch.cat((vec,sample_person,sample_quest_init,sample_quest_mark),dim=1)
    return sample

In [187]:
def make_target(label, label_to_idx):
    return torch.LongTensor([label_to_idx[label]])

In [188]:
x_train[64][0]

'pleas updat job requisit locat'

In [189]:
make_bow_vector2(x_train[64][0],x_train[64][1],x_train[64][2],x_train[64][3],word_to_ix)



Columns 0 to 12 
    0     0     0     0     0     0     0     1     0     0     0     0     0

Columns 13 to 25 
    0     0     0     0     0     0     0     1     0     0     0     0     0

Columns 26 to 38 
    0     0     0     1     1     0     0     0     0     0     0     0     0

Columns 39 to 51 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 52 to 64 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 65 to 77 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 78 to 90 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 91 to 103 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 104 to 116 
    1     0     0     0     0     0     0     0     0     0     0     0     0

Columns 117 to 129 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 130 to 142 
    

In [195]:
train_data[64][1]

'jobRequisitionLocation.update'

In [212]:
n=654
sample_phrase=make_bow_vector2(train_data[n][0][0],train_data[n][0][1],train_data[n][0][2],train_data[n][0][3],word_to_ix)
print(">> SENTENCE: {}".format(train_data[n][0][0]))
print(">> HAS PERSON FEATURE: {}".format(train_data[n][0][1]))
print(">> HAS QUESTION INIT FEATURE: {}".format(train_data[n][0][2]))
print(">> HAS QUESTION MAKR FEATURE: {}".format(train_data[n][0][3]))
print(">> CLASS: {}".format(train_data[n][1]))
print(">> INPUT SIZE: {}".format(sample_phrase.size()))
print(">> INPUT FORMAT: {}".format(type(sample_phrase)))

>> SENTENCE: make job offer revok
>> HAS PERSON FEATURE: 0
>> HAS QUESTION INIT FEATURE: 0
>> HAS QUESTION MAKR FEATURE: 0
>> CLASS: jobOffer.revoke
>> INPUT SIZE: torch.Size([1, 401])
>> INPUT FORMAT: <class 'torch.FloatTensor'>


In [213]:
model_bow_2 = BoWClassifier(NUM_LABELS, VOCAB_SIZE+3)
model_bow_2

BoWClassifier (
  (linear): Linear (401 -> 71)
)

In [214]:
## simple forward to see if its working
out=model_bow_2(Variable(sample_phrase))
out

Variable containing:

Columns 0 to 9 
-4.3428 -4.2625 -4.2607 -4.3020 -4.3123 -4.1854 -4.3706 -4.3744 -4.2301 -4.1232

Columns 10 to 19 
-4.2645 -4.2484 -4.2259 -4.2154 -4.2763 -4.1861 -4.2879 -4.2229 -4.2693 -4.3056

Columns 20 to 29 
-4.2168 -4.4251 -4.2557 -4.3818 -4.2582 -4.2990 -4.2005 -4.2926 -4.1660 -4.3229

Columns 30 to 39 
-4.4117 -4.3395 -4.2278 -4.1930 -4.2705 -4.3051 -4.3038 -4.2107 -4.2852 -4.1809

Columns 40 to 49 
-4.3406 -4.3249 -4.3699 -4.3150 -4.1761 -4.2583 -4.1949 -4.2876 -4.2506 -4.3054

Columns 50 to 59 
-4.1668 -4.2438 -4.4089 -4.2566 -4.2047 -4.2840 -4.3219 -4.2301 -4.3007 -4.2841

Columns 60 to 69 
-4.2920 -4.2687 -4.1741 -4.2494 -4.2674 -4.1824 -4.2277 -4.2558 -4.1551 -4.2384

Columns 70 to 70 
-4.1527
[torch.FloatTensor of size 1x71]

## Training

In [215]:
loss_function_2 = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer_2 = optim.SGD(params=model_bow_2.parameters(), lr=learning_rate)

In [216]:
valid_data[0:5]

[(('eval job offer', 0, 0, 0), 'jobOffer.evaluate'),
 (('chang electron mail', 0, 0, 0), 'personPersonalEmail.update'),
 (('would like chang worker electron mail', 0, 0, 0),
  'workerPersonalEmail.update'),
 (('request chang phone number', 0, 0, 0), 'personPersonalPhoneNumber.update'),
 (('worker start smoke', 0, 0, 0), 'workerTobaccoUsageStatus.update')]

In [224]:
batch_size = 30
n_iters = 5000000
num_epochs = n_iters/(len(x_train))/batch_size
num_epochs=int(num_epochs)
num_epochs

132

In [228]:
iter = 0
for epoch in range(num_epochs):
    for (sent,label) in train_data:
        # Step 1 - clear the gradients
        model_bow_2.zero_grad()
        optimizer_2.zero_grad()

        ## Step 2- Prepare input and label
        bow_vec = Variable(make_bow_vector2(sent[0],sent[1],sent[2],sent[3],word_to_ix))
        target = Variable(make_target(label, label_to_ix))
        
        # Step 3 - Run forward pass
        output = model_bow_2(bow_vec)
        #print("Log probabilities - {}".format(log_probs))
        
        # Step 4 - Compute loss, gradients, update parameters
        loss = loss_function_2(output, target)
        loss.backward()
        optimizer_2.step()
        
        iter+=1      
        ## Calculate final accuracy
        if iter % 5000 ==0:
            correct = 0
            total = 0
            for (sent,label) in valid_data:
                bow_vec = Variable(make_bow_vector2(sent[0],sent[1],sent[2],sent[3],word_to_ix))
                target = Variable(make_target(label, label_to_ix))
                output = model_bow_2(bow_vec)
                _,predicted = torch.max(output.data,1)
                total += target.size(0)
                correct += (predicted == make_target(label, label_to_ix)).sum()
            accuracy = 100 * correct/total
            print('Iterations: {}. Loss: {}. Accuracy: {}'.format(iter,loss.data[0],accuracy))

Iterations: 5000. Loss: 0.3448725938796997. Accuracy: 86.70886075949367
Iterations: 10000. Loss: 0.05238308012485504. Accuracy: 86.70886075949367
Iterations: 15000. Loss: 0.006602008361369371. Accuracy: 86.70886075949367
Iterations: 20000. Loss: 0.0006064579356461763. Accuracy: 86.70886075949367
Iterations: 25000. Loss: 0.009901398792862892. Accuracy: 86.70886075949367
Iterations: 30000. Loss: 0.035417523235082626. Accuracy: 86.70886075949367
Iterations: 35000. Loss: 0.06232452392578125. Accuracy: 86.70886075949367
Iterations: 40000. Loss: 0.0876854956150055. Accuracy: 86.70886075949367
Iterations: 45000. Loss: 0.012323392555117607. Accuracy: 86.70886075949367
Iterations: 50000. Loss: 0.005013932008296251. Accuracy: 86.70886075949367
Iterations: 55000. Loss: 0.18863515555858612. Accuracy: 86.70886075949367
Iterations: 60000. Loss: 0.025143731385469437. Accuracy: 86.70886075949367
Iterations: 65000. Loss: 0.02315877377986908. Accuracy: 86.70886075949367
Iterations: 70000. Loss: 0.014534

In [234]:
input_phrase = "is going on"
input_phrase_preprocessed = transformText(input_phrase,do_stop=False,do_stem=True)
person=has_person(input_phrase)
question_init=has_question_inits(input_phrase)
question_mark=has_question_mark(input_phrase)

bow_vec = Variable(make_bow_vector2(input_phrase_preprocessed, person,question_init, question_mark, word_to_ix))
print("-"*20 + " INPUT "+"-"*20)
print("SENTENCE           = {}".format(input_phrase))
print("SENTENCE PROCESSED = {}".format(input_phrase_preprocessed))
print("PERSON             = {}".format(person))
print("QUESTION INIT      = {}".format(question_init))
print("QUESTION MARK      = {}".format(question_mark))

print("-"*20 + " PREDICTION "+"-"*20)
log_probs = model_bow_2(bow_vec)
_,predicted = torch.max(log_probs.data,1)
pred_label=list(label_to_ix.keys())[list(label_to_ix.values()).index(predicted[0])]
print("PRED       = {}".format(predicted[0]))
print("PRED LABEL = {}".format(pred_label))

-------------------- INPUT --------------------
SENTENCE           = is going on
SENTENCE PROCESSED = go
PERSON             = 0
QUESTION INIT      = 0
QUESTION MARK      = 0
-------------------- PREDICTION --------------------
PRED       = 47
PRED LABEL = worker.usI9Screening.section1.complete


In [242]:
def get_reply_proba_2(msg):
    input_phrase_preprocessed = transformText(msg,do_stop=False,do_stem=True)
    person=has_person(msg)
    question_init=has_question_inits(msg)
    question_mark=has_question_mark(msg)

    bow_vec = Variable(make_bow_vector2(input_phrase_preprocessed, person,question_init, question_mark, word_to_ix))
    log_probs = model_bow_2(bow_vec)
    _,predicted = torch.max(log_probs.data,1)
    pred_label=list(label_to_ix.keys())[list(label_to_ix.values()).index(predicted[0])]
    ## Getting probabilities
    probs = F.softmax(log_probs.data)
    probs = probs.data[0].numpy()
    probs_frame = pd.DataFrame({'probs':probs,'label':list(label_to_ix)})
    probs_frame=probs_frame.sort_values(['probs'], ascending=False)[0:3].reset_index(drop=True)
    return pred_label,probs_frame,person,question_init,question_mark

In [259]:
label,probs,person,question,mark = get_reply_proba_2("update my worker e-mail")
print("PERSON             = {}".format(person))
print("QUESTION INIT      = {}".format(question))
print("QUESTION MARK      = {}".format(mark))
probs

PERSON             = 0
QUESTION INIT      = 0
QUESTION MARK      = 0


Unnamed: 0,label,probs
0,workerPersonalEmail.update,0.994281
1,workerPersonalPhoneNumber.update,0.004002
2,personPersonalEmail.update,0.001277
