# 1.0 Introduction to conversational software

In [1]:
import time

def respond(message):
    print('BOT: I can hear you! You said {}'.format(message))

def send_message(message):
    print('USER: {}'.format(message))
    time.sleep(2)
    return respond(message)

In [2]:
send_message('Hello!')

USER: Hello!
BOT: I can hear you! You said Hello!


# 1.1 Creating a personality

In [3]:
responses = {
    "What's your name?": "My name is Echobot",
    "What's the weather today?": "It's sunny!"
}

def respond(message):
    if message in responses:
        return responses[message]

In [4]:
respond("What's your name?")

'My name is Echobot'

In [5]:
responses = {
    "What's today's weather?": "It's {} today"
}

weatherToday = 'cloudy'

def respond(message):
    if message in responses:
        return responses[message].format(weatherToday)

In [6]:
respond("What's today's weather?")

"It's cloudy today"

In [7]:
responses = {
    "What's your name?": [
        "My name echobot",
        "They call me echobot",
        "The name's bot, echobot"
    ]
}

import random

def respond(message):
    if message in responses:
        return random.choice(responses[message])

In [8]:
respond("What's your name?")

'They call me echobot'

In [9]:
responses = ['Tell me more!', "Why do you think that?"]

import random

def respond(message):
    return random.choice(responses)

In [10]:
respond("I think you're really great")

'Why do you think that?'

# 1.2 Text processing with regex

In [11]:
import re

pattern = "Do you remember .*"
message = "Do you remember when you ate strawberries in the garden"
match = re.search(pattern, message)

if match:
    print("String matches!")

String matches!


In [12]:
pattern = "if (.*)"
message = "What would happen if bots took over the world!"
match = re.search(pattern, message)
match.group(0)

'if bots took over the world!'

In [13]:
match.group(1)

'bots took over the world!'

In [14]:
import re

def swap_pronouns(phrase):
    if 'I' in phrase:
        return re.sub('I', 'you', phrase)
    if 'my' in phrase:
        return re.sub('my', 'your', phrase)
    else:
        return phrase

In [15]:
swap_pronouns('I walk my dog')

'you walk my dog'

In [16]:
pattern = "do you remember (.*)"
message = "do you remember when you ate strawberries in the garden"
phrase = re.search(pattern, message)
phrase.group(0)

'do you remember when you ate strawberries in the garden'

In [17]:
phrase.group(1)

'when you ate strawberries in the garden'

In [18]:
phrase = swap_pronouns(phrase.group(1))
phrase

'when you ate strawberries in the garden'

# 2.0 Understanding intents and entities

- Intents
- Entities: NER (Named entity recognition)

## Regular expression to recognize intents and exercises
- Simpler than machine learning approaches
- Highly computationally efficient
- Debugging regex is difficult

In [19]:
re.search(r"(hello|hey|hi)", "hey there!") is not None

True

In [20]:
re.search(r"(hello|hey|hi)", "which one?") is not None

True

In [21]:
re.search(r"\b(hello|hey|hi)\b", "hey there!") is not None

True

In [22]:
re.search(r"\b(hello|hey|hi)\b", "which one?") is not None

False

In [23]:
# Entity recognition
pattern = re.compile('[A-Z]{1}[a-z]*')

message = """
Mary is a friend of mine,
she studied at Oxford and
now works at Google
"""

In [24]:
pattern.findall(message)

['Mary', 'Oxford', 'Google']

# 2.1 Word vectors

- Try to represent meaning of words
- Words with similar context have similar vector
- GloVe algorithm (cousin of word2vec)
- spaCy

In [25]:
import spacy

In [26]:
nlp = spacy.load('en')
nlp.vocab.vectors_length

ValueError: 1792000 exceeds max_bin_len(1048576)

In [None]:
doc = nlp('hello can you help me?')

for token in doc:
    print("{}: {}".format(token, token.vector[:3]))

## Similarity

- Direction of vectors matters
- "Distance" between words = angle between the vectors
- Cosine similarity:
    - 1: if vectors point in the same direction
    - 0: if they are perpendicular
    - -1: if they point in opposite direction

In [None]:
doc = nlp("cat")
doc.similarity(nlp("can"))

In [None]:
doc.similarity(nlp("dog"))

# 2.2 Intents and classification

- A classifier predicts the intent label from a sentence
- Use training data to tune classifer
- Use testing data to evaluate performance
- Accuracy: fraction of correctly predicted labels

In [None]:
from collections import defaultdict
import gzip
import numpy as np
import os
import pandas as pd
import pickle

In [None]:
os.system

In [None]:
os.environ

In [None]:
os.getcwd()

In [None]:
os.listdir()

In [None]:
os.uname()

In [None]:
sorted(os.listdir('data'))

In [None]:
names = ['label', 'sentence']

intentsDF = pd.read_csv('data/atis/atis_intents.csv', names=names)
trainDF = pd.read_csv('data/atis/atis_intents_train.csv', names=names)
testDF = pd.read_csv('data/atis/atis_intents_test.csv', names=names)

In [None]:
intentsDF.info()

In [None]:
trainDF.info()

In [None]:
testDF.info()

In [None]:
trainSentences = trainDF['sentence'].values.tolist()
trainSentences[:2]

In [None]:
trainLabels = trainDF['label'].values.tolist()
trainLabels[:2]

In [None]:
X_train_shape = (len(trainSentences), nlp.vocab.vectors_length)
X_train_shape

In [None]:
X_train = np.zeros(X_train_shape)
X_train

```python
for sentence in trainSentences:
    X_train[i, :] = nlp(sentence).vector
```

## Nearest neighbor classification

Simplest solution:
- Look for the labeled example that's most similar
- Use its intent as a best guess

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
sampleTestMessage = testDF.iloc[0, 1]
sampleTestMessage

In [None]:
test_x = nlp(sampleTestMessage).vector
np.shape(test_x)

```python
scores = [cosine_similarity(X[i,:], test_x) for i in range(len(trainSentences))]

trainLabels[np.argmax(scores)]
```

## Support vector machines

SVM / SVC

```python
from sklearn.svm import SVC
clf = SVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
```

# 2.3 Entity extraction

- Keywords don't work for entities you haven't seen before
- Use contextual clues:
    - Spelling
    - Capitalization
    - Words occurring before and after
- Pattern recognition

In [None]:
doc = nlp("my friend Mary has worked at Google since 2009")
doc

In [None]:
for ent in doc.ents:
    print(ent.text, ent.label_)

## Roles

In [None]:
import re

In [None]:
pattern1 = re.compile('.* from (.*) to (.*)')
string1 = "I want a flight from Tel Aviv to Bucharest"
pattern.match(string1)

In [None]:
pattern2 = re.compile('.* to (.*) from (.*)')
string2 = 'show me flights to Shanghai from Singapore'
pattern2.match(string2)

## Dependency parsing

In [None]:
doc = nlp('a flight to Shanghai from Singapore')
shanghai, singapore = doc[3], doc[5]

In [None]:
list(shanghai.ancestors)

In [None]:
list(singapore.ancestors)

In [None]:
doc = nlp("let's see that jacket in red and some blue jeans")

# [jacket, jeans]
items = [doc[4], doc[10]]

# [red, blue]
colours = [doc[6], doc[9]]

for color in colours:
    for tok in color.ancestors:
        if tok in items:
            print("color {} belongs to item {}".format(color, tok))
            break

# 2.4 Robust NLU with Rasa

- Library for intent recognition and entity extraction
- Based on spaCy, scikit-learn and other libraries
- Built-in support for chatbot specific tasks

In [None]:
from rasa_nlu.training_data import load_data

In [None]:
trainingData = load_data("./training_data.json")