In [1]:
import itertools
from collections import namedtuple


class PhraseSlot(namedtuple('PhraseSlot', ['phrase', 'slot'])):
    """
    This class represents named tuple with two records: phrase and slot.
    This wrapper over namedtuple is needed only for initialization with default values.
    """
    
    __slots__ = ()
    def __new__(self, phrase, slot=None):
        assert type(phrase) is str, f'{phrase} must be of type `str`, got {type(phrase)}'
        phrase = phrase.strip()
        assert '' != phrase, f'phrase must be non empty'
        if slot is not None:
            assert type(slot) is str, f'{slot} must be of type `str`, got {type(slot)}'
            slot = slot.strip()
            assert '' != slot, f'slot must be non empty'
        
        return super(PhraseSlot, self).__new__(self, phrase, slot)


def make_samples(*phrase_slots):
    """
    Makes list of samples for slotfilling from lists *phrase_slots as descartes product
    """
    
    return [make_slot(phrase_slot) for phrase_slot in itertools.product(*phrase_slots)]

def make_slot(phrase_slots, sep=' '):
    """
    Makes pair of phrase and list of slots, where phrase is string and
    slot is dict with the following structure:
    
        {'start': start position of slot,
         'end': end position of slot,
         'len': length of slot value # actually redundant key
         'title': name of slot,
         'text': slot value}
    
    inputs:
        phrase_slots: list of PhraseSlot
        sep: separator between phrases when concatenating
    outputs: pair of text and slots
    """
    
    current_length = 0
    slots = list()
    for phrase, slot in phrase_slots:
        start_position = current_length
        current_length += len(phrase) + len(sep)
        if slot is not None:
            slot_dict = dict()
            slot_dict['start'] = start_position
            slot_dict['end'] = start_position + len(phrase)
            slot_dict['len'] = slot_dict['end'] - slot_dict['start']
            slot_dict['title'] = slot
            slot_dict['text'] = phrase
            slots.append(slot_dict)
            
    return sep.join([phrase for phrase, _ in phrase_slots]), slots

In [2]:
def test_phrase_slot():
    def empty_phrase():
        try:
            PhraseSlot('')
            print('not ok')
        except:
            print('ok')
    
    def not_string_phrase():
        try:
            PhraseSlot(1)
            print('not ok')
        except:
            print('ok')
    
    def empty_slot():
        try:
            PhraseSlot('1', '')
            print('not ok')
        except:
            print('ok')
    
    def not_string_slot():
        try:
            PhraseSlot('1', 1)
            print('not ok')
        except:
            print('ok')
        
    def all_non_empty_strings():
        try:
            PhraseSlot('1', '1')
            print('ok')
        except:
            print('not ok')
    
    def non_empty_string_phrase_with_not_slot():
        try:
            PhraseSlot('1')
            print('ok')
        except:
            print('not ok')
            
    def phrase_with_space():
        try:
            PhraseSlot(' ')
            print('not ok')
        except:
            print('ok')
            
    def slot_with_space():
        try:
            PhraseSlot(word='a', slot=' ')
            print('not ok')
        except:
            print('ok')
            
    empty_phrase()
    not_string_phrase()
    empty_slot()
    not_string_slot()
    all_non_empty_strings()
    non_empty_string_phrase_with_not_slot()
    phrase_with_space()
    slot_with_space()


test_phrase_slot()

ok
ok
ok
ok
ok
ok
ok
ok


In [3]:
begin_phrases = [PhraseSlot(phrase='я хочу купить'),
                 PhraseSlot(phrase='где купить')]

slots = [PhraseSlot(phrase='велик', slot='Товра'),
         PhraseSlot(phrase='велосипед', slot='Товра')]

price = [PhraseSlot(phrase='60k', slot='price')]


samples = make_samples(begin_phrases, slots, price)
samples
# check_bad_samples(samples)
# [d for d, _ in samples]

[('я хочу купить велик 60k',
  [{'start': 14, 'end': 19, 'len': 5, 'title': 'Товра', 'text': 'велик'},
   {'start': 20, 'end': 23, 'len': 3, 'title': 'price', 'text': '60k'}]),
 ('я хочу купить велосипед 60k',
  [{'start': 14, 'end': 23, 'len': 9, 'title': 'Товра', 'text': 'велосипед'},
   {'start': 24, 'end': 27, 'len': 3, 'title': 'price', 'text': '60k'}]),
 ('где купить велик 60k',
  [{'start': 11, 'end': 16, 'len': 5, 'title': 'Товра', 'text': 'велик'},
   {'start': 17, 'end': 20, 'len': 3, 'title': 'price', 'text': '60k'}]),
 ('где купить велосипед 60k',
  [{'start': 11, 'end': 20, 'len': 9, 'title': 'Товра', 'text': 'велосипед'},
   {'start': 21, 'end': 24, 'len': 3, 'title': 'price', 'text': '60k'}])]