In [1]:
import random
from pathlib import Path
from typing import Sequence
from typing import Tuple

from putput import ComboOptions
from putput import Pipeline
import random

In [2]:
pattern_def_path = Path('patterns.yml')

In [3]:
combo_options_map = {
    'DEFAULT': ComboOptions(max_sample_size=5, with_replacement=False)
}

# The two pipeline stages:

1) **Expansion**: expands the pattern definition file into an 'utterance_combo', 'tokens', and 'groups' for each utterance pattern.

2) **Combination**: yields a sequence of 'utterance', 'handled_tokens', and 'handled_groups'
- Applies handlers from 'token_handler_map' and 'group_handler_map' and is subject to constraints specified in 'combo_options_map'

Each pipeline stage can be hooked into to alter it's output by defining an **expansion_hooks_map** and/or a **combo_hooks_map**

## Expansion

### Expansion Hook: default output

In [4]:
def print_expansion_hook(utterance_combination, tokens, groups):
    print('utterance_combination: {}'.format(utterance_combination))
    print('tokens: {}'.format(tokens))
    print('groups: {}\n'.format(groups))
    return utterance_combination, tokens, groups

In [5]:
# Here we define the utterance patterns that we want to apply the hook to or in this case we specify DEFAULT 
# to apply it to all patterns
expansion_hooks_map = {
    'DEFAULT': (print_expansion_hook,)
}

In [6]:
p = Pipeline(pattern_def_path,
             expansion_hooks_map=expansion_hooks_map,
             combo_options_map=combo_options_map,
            )
print('This is the raw output of the expansion stage: \n')
x = [_ for _ in p.flow(disable_progress_bar=True)]

This is the raw output of the expansion stage: 

utterance_combination: (('hey speaker', 'hey sound system'), ('play',), ('Tom Waits', 'Kanye'))
tokens: ('WAKE', 'PLAY', 'ARTIST')
groups: (('None', 1), ('PLAY_ARTIST', 2))

utterance_combination: (('hey speaker', 'hey sound system'), ('play',), ('Tabletop Joe', 'Flashing Lights'))
tokens: ('WAKE', 'PLAY', 'SONG')
groups: (('None', 1), ('PLAY_SONG', 2))



### Expansion Hook: insert random words

In [7]:
def insert_random(utterance_component, chance):
    new_utterance_component = []
    random_words = ['hmm', 'uh', 'um', 'please']
    for phrase in utterance_component:
        if random.random() < chance:
            words = phrase.split()
            insert_index = random.randint(0, len(words))
            random_word = random.choice(random_words)
            words.insert(insert_index, random_word)
            phrase = ' '.join(words)
        new_utterance_component.append(phrase)
    return tuple(new_utterance_component)

This is a expansion hook for randomly inserting filler words

In [8]:
def insert_filler_randomly(utterance_combination, tokens, groups):
    with_fillers = [insert_random(utterance_component, .6) for utterance_component in utterance_combination] 
    return tuple(with_fillers), tokens, groups

This time we will specify that we only want to apply this hook to a specific pattern instead of DEFAULT.
Note that we are also printing the output and that these hooks are applied in order.

In [9]:
expansion_hooks_map = {
    'WAKE, PLAY_ARTIST': (insert_filler_randomly, print_expansion_hook),
    'WAKE, PLAY_SONG': (print_expansion_hook,)
}

In [10]:
p = Pipeline(pattern_def_path,
             expansion_hooks_map=expansion_hooks_map,
             combo_options_map=combo_options_map,
            )
print('This is the output of the expansion stage with random filler words: \n')
x = [_ for _ in p.flow(disable_progress_bar=True)]

This is the output of the expansion stage with random filler words: 

utterance_combination: (('hey speaker hmm', 'hey sound please system'), ('play hmm',), ('Tom Waits please', 'Kanye'))
tokens: ('WAKE', 'PLAY', 'ARTIST')
groups: (('None', 1), ('PLAY_ARTIST', 2))

utterance_combination: (('hey speaker', 'hey sound system'), ('play',), ('Tabletop Joe', 'Flashing Lights'))
tokens: ('WAKE', 'PLAY', 'SONG')
groups: (('None', 1), ('PLAY_SONG', 2))



We can see above that the random filler hook was only applied to the specified utterance pattern.

## Combination

### Combination Hook: default output

In [11]:
def print_combination_hook(utterance, handled_tokens, handled_groups):
    print('utterance: {}'.format(utterance))
    print('handled_tokens: {}'.format(handled_tokens))
    print('handled_groups: {}\n'.format(handled_groups))
    return utterance, handled_tokens, handled_groups

In [12]:
combo_hooks_map = {
    'DEFAULT': (print_combination_hook,)
}

In [13]:
p = Pipeline(pattern_def_path,
             combo_hooks_map=combo_hooks_map,
             combo_options_map=combo_options_map,
            )
print('This is the raw output of the combination stage: \n')
x = [_ for _ in p.flow(disable_progress_bar=True)]

This is the raw output of the combination stage: 

utterance: hey sound system play Kanye
handled_tokens: ('[WAKE(hey sound system)]', '[PLAY(play)]', '[ARTIST(Kanye)]')
handled_groups: ('{None([WAKE(hey sound system)])}', '{PLAY_ARTIST([PLAY(play)] [ARTIST(Kanye)])}')

utterance: hey speaker play Tom Waits
handled_tokens: ('[WAKE(hey speaker)]', '[PLAY(play)]', '[ARTIST(Tom Waits)]')
handled_groups: ('{None([WAKE(hey speaker)])}', '{PLAY_ARTIST([PLAY(play)] [ARTIST(Tom Waits)])}')

utterance: hey speaker play Kanye
handled_tokens: ('[WAKE(hey speaker)]', '[PLAY(play)]', '[ARTIST(Kanye)]')
handled_groups: ('{None([WAKE(hey speaker)])}', '{PLAY_ARTIST([PLAY(play)] [ARTIST(Kanye)])}')

utterance: hey sound system play Tom Waits
handled_tokens: ('[WAKE(hey sound system)]', '[PLAY(play)]', '[ARTIST(Tom Waits)]')
handled_groups: ('{None([WAKE(hey sound system)])}', '{PLAY_ARTIST([PLAY(play)] [ARTIST(Tom Waits)])}')

utterance: hey speaker play Flashing Lights
handled_tokens: ('[WAKE(hey spe

### Combination Hook: extract tokens and groups 

In [14]:
def token_extractor(handled_item):
    return handled_item[handled_item.index('[') + 1: handled_item.index('(')]

def group_name_extractor(handled_group):
    return handled_group[handled_group.index('{') + 1: handled_group.index('(')]

In [15]:
def extract_tokens_and_groups(utterance, handled_tokens, handled_groups):
    print('utterance: {}'.format(utterance))
    print('handled_tokens: {}'.format(list(map(token_extractor, handled_tokens))))
    print('handled_groups: {}\n'.format(list(map(group_name_extractor, handled_groups))))
    return utterance, handled_tokens, handled_groups

In [16]:
combo_hooks_map = {
    'DEFAULT': (extract_tokens_and_groups,)
}

In [17]:
p = Pipeline(pattern_def_path,
             combo_hooks_map=combo_hooks_map,
             combo_options_map=combo_options_map,
            )
print('This is the output of the combination stage after we extract tokens and groups: \n')
x = [_ for _ in p.flow(disable_progress_bar=True)]

This is the output of the combination stage after we extract tokens and groups: 

utterance: hey speaker play Tom Waits
handled_tokens: ['WAKE', 'PLAY', 'ARTIST']
handled_groups: ['None', 'PLAY_ARTIST']

utterance: hey speaker play Kanye
handled_tokens: ['WAKE', 'PLAY', 'ARTIST']
handled_groups: ['None', 'PLAY_ARTIST']

utterance: hey sound system play Tom Waits
handled_tokens: ['WAKE', 'PLAY', 'ARTIST']
handled_groups: ['None', 'PLAY_ARTIST']

utterance: hey sound system play Kanye
handled_tokens: ['WAKE', 'PLAY', 'ARTIST']
handled_groups: ['None', 'PLAY_ARTIST']

utterance: hey sound system play Flashing Lights
handled_tokens: ['WAKE', 'PLAY', 'SONG']
handled_groups: ['None', 'PLAY_SONG']

utterance: hey speaker play Flashing Lights
handled_tokens: ['WAKE', 'PLAY', 'SONG']
handled_groups: ['None', 'PLAY_SONG']

utterance: hey sound system play Tabletop Joe
handled_tokens: ['WAKE', 'PLAY', 'SONG']
handled_groups: ['None', 'PLAY_SONG']

utterance: hey speaker play Tabletop Joe
handled_

# Token and Group Handlers

In [18]:
def _iob_token_handler(token, phrase):
    tokens = ['{}-{}'.format('B' if i == 0 else 'I', token)
              for i, _ in enumerate(phrase.replace(" '", "'").split())]
    return ' '.join(tokens)

def _iob_group_handler(group_name, handled_tokens):
    num_tokens = 0
    for tokenized_phrase in handled_tokens:
        num_tokens += len(tokenized_phrase.split())
    groups = ['{}-{}'.format('B' if i == 0 else 'I', group_name)
              for i in range(num_tokens)]
    return ' '.join(groups)

In [19]:
token_handler_map = {'DEFAULT': _iob_token_handler}
group_handler_map = {'DEFAULT': _iob_group_handler}

In [20]:
p = Pipeline(pattern_def_path,
             token_handler_map=token_handler_map,
             group_handler_map=group_handler_map,
             combo_options_map=combo_options_map,
            )
print('This is the output after applying the token and group handlers to convert to IOB format: \n')
for utterance, tokens, groups in p.flow(disable_progress_bar=True):
    print('utterance: {}'.format(utterance))
    print('tokens: {}'.format(' '.join(tokens)))
    print('groups: {}\n'.format(' '.join(groups)))

This is the output after applying the token and group handlers to convert to IOB format: 

utterance: hey speaker play Kanye
tokens: B-WAKE I-WAKE B-PLAY B-ARTIST
groups: B-None I-None B-PLAY_ARTIST I-PLAY_ARTIST

utterance: hey sound system play Tom Waits
tokens: B-WAKE I-WAKE I-WAKE B-PLAY B-ARTIST I-ARTIST
groups: B-None I-None I-None B-PLAY_ARTIST I-PLAY_ARTIST I-PLAY_ARTIST

utterance: hey speaker play Tom Waits
tokens: B-WAKE I-WAKE B-PLAY B-ARTIST I-ARTIST
groups: B-None I-None B-PLAY_ARTIST I-PLAY_ARTIST I-PLAY_ARTIST

utterance: hey sound system play Kanye
tokens: B-WAKE I-WAKE I-WAKE B-PLAY B-ARTIST
groups: B-None I-None I-None B-PLAY_ARTIST I-PLAY_ARTIST

utterance: hey speaker play Flashing Lights
tokens: B-WAKE I-WAKE B-PLAY B-SONG I-SONG
groups: B-None I-None B-PLAY_SONG I-PLAY_SONG I-PLAY_SONG

utterance: hey sound system play Tabletop Joe
tokens: B-WAKE I-WAKE I-WAKE B-PLAY B-SONG I-SONG
groups: B-None I-None I-None B-PLAY_SONG I-PLAY_SONG I-PLAY_SONG

utterance: hey so