In [11]:
import spacy

In [12]:
NER = spacy.load("en_core_web_sm")

raw_text = 'I would like to book a table for ten people  five at the evening'
sent = NER(raw_text)

for word in sent.ents:
    print(word.text,word.label_)

ten CARDINAL
five CARDINAL
evening TIME


In [3]:
for chunk in sent:
    print(chunk, chunk.pos_)

I PRON
would AUX
like VERB
to PART
book VERB
a DET
table NOUN
for ADP
ten NUM
people NOUN
  SPACE
friday PROPN
at ADP
9 NUM
pm NOUN


In [4]:
for chunk in sent.noun_chunks:
    print(chunk)

I
a table
ten people
9 pm


In [5]:
import json
with open("intent.json","r") as f:
    intents = json.load(f)

In [6]:
dico = []
corpus = []
data = []
tags = []

def tokenizer(text):
    tokens = NER(text)
    tokens = [token.lemma_ for token in tokens if token.lemma_.isalpha()]
    return tokens

for intent in intents['intents']:
    tag = intent[('tag')]
    dico += tokenizer(tag)
    for pattern in intent['patterns']:
        tags.append(tag)
        pattern_tokens = tokenizer(pattern)
        dico += pattern_tokens
        data.append([pattern_tokens, tag])
        corpus.append(' '.join(pattern_tokens))
    for resp in intent['responses']:
        dico += tokenizer(resp)
        


In [7]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import OneHotEncoder

dico_tags = {tag : i for (i, tag) in enumerate(set(tags))}

vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(corpus)
y_train = [dico_tags[tag] for tag in tags]

In [8]:
from torch.utils.data import Dataset, DataLoader
import numpy as np

class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train.toarray())
        self.x_data = X_train.toarray()
        self.y_data = np.array(y_train)
        
    def __getitem__(self, idx):
        return self.x_data[idx], self.y_data[idx]
    
    def __len__(self):
        return self.n_samples
batch_size = 8

    
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset, 
                            batch_size=batch_size, 
                            shuffle=True, 
                            num_workers=2)

In [9]:
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

hidden_size = 8
output_size = len(set(tags))
input_size = len(X_train.toarray()[0])
learning_rate = 0.001
n_epochs=300


class NeuralNet(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(input_size, num_classes)
        self.relu = nn.ReLU()
        
    def forward(self,x):
        out =self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(x)
        return out
    
model = NeuralNet(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)



In [10]:
for epoch in range(n_epochs):
    for (words,labels) in train_loader:
        words = words.to(device).float()
        labels = labels.to(device).float()
        
        
        outputs = model(words).float()
        loss = criterion(outputs, labels.long())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 50 == 0:
        print(f"epoch: {epoch+1}/{n_epochs}, loss: {loss.item():.4f}")
print(f"final loss, loss: {loss.item():.4f}")

epoch: 50/300, loss: 0.9496
epoch: 100/300, loss: 0.5372
epoch: 150/300, loss: 0.3842
epoch: 200/300, loss: 0.1293
epoch: 250/300, loss: 0.0474
epoch: 300/300, loss: 0.0972
final loss, loss: 0.0972


In [25]:
model.eval()
test_sentence = 'I would like to book a table for tomorow evening'

vect_sent = vectorizer.transform([' '.join(tokenizer(test_sentence))]).toarray()
output = model(torch.tensor(vect_sent).to(device).float())
list(dico_tags.keys())[torch.softmax(output, dim=1).argmax().item()]

'number of people'

In [24]:
set(tags)

{'booking',
 'datetime',
 'funny',
 'goodbye',
 'greeting',
 'number of people',
 'payments',
 'thanks'}

In [3]:
verbs = ['want', 'would like', 'wish']
complements = ['to book a table', 'a table']
times = ['this morning', 'for lunch', 'for this evening', 'for tomorow evening']

bookings = [f"I {verb} {complement} {time}" for verb in verbs for time in times for complement in complements]
bookings

['I want to book a table this morning',
 'I want a table this morning',
 'I want to book a table for lunch',
 'I want a table for lunch',
 'I want to book a table this evening',
 'I want a table this evening',
 'I want to book a table tomorow evening',
 'I want a table tomorow evening',
 'I would like to book a table this morning',
 'I would like a table this morning',
 'I would like to book a table for lunch',
 'I would like a table for lunch',
 'I would like to book a table this evening',
 'I would like a table this evening',
 'I would like to book a table tomorow evening',
 'I would like a table tomorow evening',
 'I wish to book a table this morning',
 'I wish a table this morning',
 'I wish to book a table for lunch',
 'I wish a table for lunch',
 'I wish to book a table this evening',
 'I wish a table this evening',
 'I wish to book a table tomorow evening',
 'I wish a table tomorow evening']

In [31]:
from fuzzywuzzy import process

listing = ['Hello', 'Hi', '9', 'like']
process.extract('Hlo', listing)

[('Hello', 75), ('Hi', 45), ('like', 29), ('9', 0)]

In [30]:
tokens = NER("I don't want today I want tomorrow")

print(' '.join(token.lemma_ for token in tokens if not token.is_stop))

want today want tomorrow
