In [2]:
import itertools
import sys
from nltk.grammar import Nonterminal

In [84]:
def generate(grammar, start=None, depth=None, n=None):
    """
    Generates an iterator of all sentences from a CFG.

    :param grammar: The Grammar used to generate sentences.
    :param start: The Nonterminal from which to start generate sentences.
    :param depth: The maximal depth of the generated tree.
    :param n: The maximum number of sentences to return.
    :return: An iterator of lists of terminal tokens.
    """
    if not start:
        start = grammar.start()
    if depth is None:
        depth = sys.maxsize

    iter = _generate_all(grammar, [start], depth)

    if n:
        iter = itertools.islice(iter, n)

    return iter



def _generate_all(grammar, items, depth):
    if items:
        try:
            for frag1 in _generate_one(grammar, items[0], depth):
                for frag2 in _generate_all(grammar, items[1:], depth):
                    yield frag1 + frag2
        except RuntimeError as _error:
            if _error.message == "maximum recursion depth exceeded":
                # Helpful error message while still showing the recursion stack.
                raise RuntimeError(
                    "The grammar has rule(s) that yield infinite recursion!!"
                )
            else:
                raise
    else:
        yield []


def _generate_one(grammar, item, depth):
    if depth > 0:
        if isinstance(item, Nonterminal):
            for prod in grammar.productions(lhs=item):
                for frag in _generate_all(grammar, prod.rhs(), depth - 1):
                    yield frag
        else:
            yield [item]

O =   ["name", "date of birth", "first name", "last name", "amount owed", "address", "registration number", "company number"]
Os = (" [{0}] | ".format(1 / len(O))).join(["'{0}'".format(o) for o in O])            
Os += " [{0}] ".format(1 / len(O))

Det = ["the"] 
Dets = "'{0}' [1.0]".format(Det[0])

Conj = ["and"]
Conjs = "'{0}' [1.0]".format(Conj[0])

Comm = [","]
Comms = "'{0}' [1.0]".format(Comm[0])

Pos = ["of", "to"]
Poss = (" [{0}] | ".format(1 / len(Pos))).join(["'{0}'".format(p) for p in Pos])            
Poss += " [{0}] ".format(1 / len(Pos))

O2 = ["employee", "customer", "client", "company", "payee", "recipient"]
O2s = (" [{0}] | ".format(1 / len(O2))).join(["'{0}'".format(o2) for o2 in O2])            
O2s += " [{0}] ".format(1 / len(O2))

question_grammar = """
  S -> QP T [1.0]
  QP -> Q PR [1.0]
  PR -> Det OP [0.333] | Det O Conj O Pos Det O2 [0.333] | Det O Comm O Conj O Pos Det O2 [0.333]
  OP -> O Pos Det O2 [0.5] | O2 Ap O [0.5]
  Q -> 'what is' [1]
  Det -> """ + Dets + """
  Conj -> """ + Conjs + """
  Comm -> """ + Comms + """
  O -> """ + Os + """
  Pos -> """ + Poss + """
  Ap -> 's' [1.0]
  O2 -> """ + O2s + """
  T -> '?' [1.0]
"""
question_grammar



answer_grammar = """
  S -> Det O Pos Det O2 V SL [0.25] | Det O Pos Det O2 V SL Conj Det O V SL [0.25] | Det O2 Ap O V SL [0.25] | Det O2 Ap O V SL Conj O V SL [0.25]
  O2 -> """ + O2s + """
  V -> 'is' [1.0]
  SL -> '{SLOT}' [1.0]
  Ap -> 's' [1.0]
  O -> """ + Os + """
  Det -> """ + Dets + """
  Conj -> """ + Conjs + """
  Comm -> """ + Comms + """
  Pos -> """ + Poss 

answer_grammar

"\n  S -> Det O Pos Det O2 V SL [0.25] | Det O Pos Det O2 V SL Conj Det O V SL [0.25] | Det O2 Ap O V SL [0.25] | Det O2 Ap O V SL Conj O V SL [0.25]\n  O2 -> 'employee' [0.16666666666666666] | 'customer' [0.16666666666666666] | 'client' [0.16666666666666666] | 'company' [0.16666666666666666] | 'payee' [0.16666666666666666] | 'recipient' [0.16666666666666666] \n  V -> 'is' [1.0]\n  SL -> '{SLOT}' [1.0]\n  Ap -> 's' [1.0]\n  O -> 'name' [0.125] | 'date of birth' [0.125] | 'first name' [0.125] | 'last name' [0.125] | 'amount owed' [0.125] | 'address' [0.125] | 'registration number' [0.125] | 'company number' [0.125] \n  Det -> 'the' [1.0]\n  Conj -> 'and' [1.0]\n  Comm -> ',' [1.0]\n  Pos -> 'of' [0.5] | 'to' [0.5] "

In [85]:
from nltk.grammar import CFG, PCFG
N = 10000
grammar_q = PCFG.fromstring(question_grammar)
grammar_a = PCFG.fromstring(answer_grammar)

with open('./out/questions.csv', 'w') as outfile:
    for n, sent in enumerate(generate(grammar_q, n=N), 1):
        #print('%3d. %s' % (n, ' '.join(sent)))
        outfile.write(' '.join(sent) + '\n')

with open('./out/answers.csv', 'w') as outfile:
    for n, sent in enumerate(generate(grammar_a, n=N), 1):
        print('%3d. %s' % (n, ' '.join(sent)))

  1. the name of the employee is {SLOT}
  2. the name of the customer is {SLOT}
  3. the name of the client is {SLOT}
  4. the name of the company is {SLOT}
  5. the name of the payee is {SLOT}
  6. the name of the recipient is {SLOT}
  7. the name to the employee is {SLOT}
  8. the name to the customer is {SLOT}
  9. the name to the client is {SLOT}
 10. the name to the company is {SLOT}
 11. the name to the payee is {SLOT}
 12. the name to the recipient is {SLOT}
 13. the date of birth of the employee is {SLOT}
 14. the date of birth of the customer is {SLOT}
 15. the date of birth of the client is {SLOT}
 16. the date of birth of the company is {SLOT}
 17. the date of birth of the payee is {SLOT}
 18. the date of birth of the recipient is {SLOT}
 19. the date of birth to the employee is {SLOT}
 20. the date of birth to the customer is {SLOT}
 21. the date of birth to the client is {SLOT}
 22. the date of birth to the company is {SLOT}
 23. the date of birth to the payee is {SLOT}
 2

In [161]:
questions = list(" ".join(g) for g in generate(grammar_q, n=N))
answers = list(" ".join(g) for g in generate(grammar_a, n=N))
questions

['what is the name of the employee ?',
 'what is the name of the customer ?',
 'what is the name of the client ?',
 'what is the name of the company ?',
 'what is the name of the payee ?',
 'what is the name of the recipient ?',
 'what is the name to the employee ?',
 'what is the name to the customer ?',
 'what is the name to the client ?',
 'what is the name to the company ?',
 'what is the name to the payee ?',
 'what is the name to the recipient ?',
 'what is the date of birth of the employee ?',
 'what is the date of birth of the customer ?',
 'what is the date of birth of the client ?',
 'what is the date of birth of the company ?',
 'what is the date of birth of the payee ?',
 'what is the date of birth of the recipient ?',
 'what is the date of birth to the employee ?',
 'what is the date of birth to the customer ?',
 'what is the date of birth to the client ?',
 'what is the date of birth to the company ?',
 'what is the date of birth to the payee ?',
 'what is the date of bir

In [162]:
import re, string
from bs4 import BeautifulSoup
from os import listdir
from os.path import isfile, join
import random
from random import randint
import datetime


path = "/virtualmachines/data/companies"
files = [join(path,f) for f in listdir(path) if isfile(join(path, f)) and f.endswith(".xml")]

def generate_names():
    with open(random.choice(files), "r") as infile:
        for line in infile:
            z = re.search(r'<NonIndividualNameText>([^</]+)</NonIndividualNameText>',line)
            if z:
                yield " ".join([n.capitalize() for n in z.group(1).replace("&amp;", "&").split(" ")])
names = generate_names()                
def generate_date():
    formats = ['%d, %b %Y', '%d %b %Y', '%d %B %Y', '%d %m %Y','%d-%m-%Y', '%d/%m/%Y','%Y-%m-%d', '%Y/%m/%d']
    return datetime.date(randint(1950,2025), randint(1,12),randint(1,28)).strftime(random.choice(formats))

def generate_id():
    return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(random.randint(3,20))).upper()
    
def generate_amount():
    ret_string = "";
    if random.random() > 0.5:
        ret_string = random.choice(["US","AU","A", "US$", "$US", "AU$", "$AU", "A$", "$A", "GBP", "GBP£", "£GBP"])
        
    if random.random() > 0.5:
        ret_string = ret_string + "{:,}".format(random.randint(0,100000))
    else:
        ret_string = ret_string + "{:}".format(random.randint(0,100000))
    
    if random.random() > 0.25:
        ret_string += "." + str(random.randint(0,99));
    return ret_string

def generate_address():
    ret_string = str(random.randint(1,1000))
    if random.random() > 0.25:
        ret_string += random.choice(string.ascii_uppercase)
    if random.random() > 0.25:
        ret_string += "/" + str(random.randint(1,1000))
    ret_string += " " + random.choice(next(names).split(" ")).capitalize()
    ret_string += " " + random.choice(["St", "Street", "Place", "Pl", "Road", "Rd", "Way", "Wy", "Alley", "All", "Lane", "Ln"])
    if random.random() > 0.25:
        ret_string += ","
    ret_string += " " + random.choice(next(names).split(" ")).capitalize()
    if random.random() > 0.25:
        ret_string += ","
    ret_string += " " + random.choice(["SA", "South Australia", "Victoria", "Queensland", "QLD", "Qld", "New South Wales", "NSW", "Greensborough", "VA", "Virginia", "Massachusetts", "MA", 
                                       "Tasmania", "Tas", "WA", "TX", "Texas", "Washington", "HI", "Hawaii"])
    if random.random() > 0.25:        
        ret_string += " " + "".join(random.choice(string.digits) for _ in range(random.randint(4,8)))
    return ret_string


In [235]:
answers

['the name of the employee is {SLOT}',
 'the name of the customer is {SLOT}',
 'the name of the client is {SLOT}',
 'the name of the company is {SLOT}',
 'the name of the payee is {SLOT}',
 'the name of the recipient is {SLOT}',
 'the name to the employee is {SLOT}',
 'the name to the customer is {SLOT}',
 'the name to the client is {SLOT}',
 'the name to the company is {SLOT}',
 'the name to the payee is {SLOT}',
 'the name to the recipient is {SLOT}',
 'the date of birth of the employee is {SLOT}',
 'the date of birth of the customer is {SLOT}',
 'the date of birth of the client is {SLOT}',
 'the date of birth of the company is {SLOT}',
 'the date of birth of the payee is {SLOT}',
 'the date of birth of the recipient is {SLOT}',
 'the date of birth to the employee is {SLOT}',
 'the date of birth to the customer is {SLOT}',
 'the date of birth to the client is {SLOT}',
 'the date of birth to the company is {SLOT}',
 'the date of birth to the payee is {SLOT}',
 'the date of birth to th

In [242]:
filled = []
for answer in answers:
    answer = answer.replace(" s ", "'s ")    
    slots = []
    while "{SLOT}" in answer:
        for slot, gen_func in {r"(first name|last name|name)":lambda: next(names), 
                               r"(date of birth)": generate_date, 
                               r"(amount owed)": generate_amount, 
                               r"(address)": generate_address,
                               r"(registration number|company number)": generate_id}.items():
            search = re.search(slot + r" ([^{SLOT}]+) {SLOT}", answer)
            if search:
                fill = gen_func()
                prev = answer
                answer = re.sub(slot + r" ([^{SLOT}]+) {SLOT}", r"\1 \2 " + fill, answer, 1)

                pos = answer.find(fill)
                for slot in slots:
                    for slot_pair in slots[i]:
                        if slot_pair[0] >= pos:
                            slot_pair[0] += len(fill) - len("{SLOT}")
                        if slot_pair[1] >= pos:
                            slot_pair[1] += len(fill) - len("{SLOT}")
                        extract = answer[slot_pair[0]:slot_pair[1]]
                        if extract not in O:
                            print(extract)
                        
                slot_name_span = [answer.find(search.groups()[0]), answer.find(search.groups()[0]) + len(search.groups()[0])]
                fill_span = [answer.find(fill), answer.find(fill) + len(fill)]
                extract = answer[slot_name_span[0]:slot_name_span[1]]
                if extract not in O:
                    print(extract)                
                slots.append([slot_name_span, fill_span])
                
        valid = re.search("(name|address|birth|number) to|owed of|number to|date of birth of the company", answer) == None
        valid = valid and all([len(re.findall(o, answer)) <= 1 for o in O if o != "'name'"])
    filled.append((answer, valid, slots))
#filled

C.a Francis & P.t Francis
25
19
the name of the employee is C.a Francis & P.t Francis
28
Taxi Driver
11
5
the name of the customer is Taxi Driver
28
G.j Ekin & J.v Pritchard
24
18
the name of the client is G.j Ekin & J.v Pritchard
26
Brett A Mclennan & Andrew S Downing
35
29
the name of the company is Brett A Mclennan & Andrew S Downing
27
D Mulholland & R.m Mulholland
29
23
the name of the payee is D Mulholland & R.m Mulholland
25
Leo Nichols Contracting
23
17
the name of the recipient is Leo Nichols Contracting
29
Angela Washington
17
11
the name to the employee is Angela Washington
28
Thomas M Winter & Simone L Harland
34
28
the name to the customer is Thomas M Winter & Simone L Harland
28
The Trustee For Bw Family Trust
31
25
the name to the client is The Trustee For Bw Family Trust
26
Dr John Parker
14
8
the name to the company is Dr John Parker
27
The Trustee For Dragon Dai Family Trust
39
33
the name to the payee is The Trustee For Dragon Dai Family Trust
25
Abul Syed & Co
14
8


Elbee Spa
9
3
the date of birth to the company is {SLOT} and the first name is Elbee Spa
65
22 Nov 1965
11
5
the date of birth to the company is 22 Nov 1965 and the first name is Elbee Spa
36
Elbee Spa
S Lee Consulting
16
10
the date of birth to the company is {SLOT} and the last name is S Lee Consulting
64
05/09/1982
10
4
the date of birth to the company is 05/09/1982 and the last name is S Lee Consulting
36
S Lee Consulting
07 02 2012
10
4
the date of birth to the company is 07 02 2012 and the amount owed is {SLOT}
36
339.6
5
-1
the date of birth to the company is 07 02 2012 and the amount owed is 339.6
70
07 02 2012
21, Oct 1990
12
6
the date of birth to the company is 21, Oct 1990 and the address is {SLOT}
36
484Q/760 Gonzalez Place Champery, VA 399593
43
37
the date of birth to the company is 21, Oct 1990 and the address is 484Q/760 Gonzalez Place Champery, VA 399593
68
21, Oct 1990
27, Apr 2024
12
6
the date of birth to the company is 27, Apr 2024 and the registration number is {

21
15
the amount owed of the recipient is {SLOT} and the name is H.g Creed & C.d Smith
59
43677.94
8
2
the amount owed of the recipient is 43677.94 and the name is H.g Creed & C.d Smith
36
H.g Creed & C.d Smith
08 08 1958
10
4
the amount owed of the recipient is {SLOT} and the date of birth is 08 08 1958
68
82627.28
8
2
the amount owed of the recipient is 82627.28 and the date of birth is 08 08 1958
36
08 08 1958
Diamond Valley Pistol Club
26
20
the amount owed of the recipient is {SLOT} and the first name is Diamond Valley Pistol Club
65
93187
5
-1
the amount owed of the recipient is 93187 and the first name is Diamond Valley Pistol Club
36
Diamond Valley Pistol Club
Martin Searles
14
8
the amount owed of the recipient is {SLOT} and the last name is Martin Searles
64
615.48
6
0
the amount owed of the recipient is 615.48 and the last name is Martin Searles
36
Martin Searles
GBP74017.10
11
5
the amount owed of the recipient is GBP74017.10 and the amount owed is {SLOT}
36
17,125.68
9
3
t

NOBXAFKMB7PEA2UC
16
10
the registration number to the recipient is ACNLFTTCYW7 and the registration number is NOBXAFKMB7PEA2UC
87
ACNLFTTCYW7
65TIS
5
-1
the registration number to the recipient is 65TIS and the company number is {SLOT}
44
E8JZLL79XWOO8FL
15
9
the registration number to the recipient is 65TIS and the company number is E8JZLL79XWOO8FL
76
65TIS
The Trustee For Aor Family Trust
32
26
the company number of the employee is {SLOT} and the name is The Trustee For Aor Family Trust
61
NYPZMEKNQN2NEH3
15
9
the company number of the employee is NYPZMEKNQN2NEH3 and the name is The Trustee For Aor Family Trust
38
The Trustee For Aor Family Trust
05, Apr 1969
12
6
the company number of the employee is {SLOT} and the date of birth is 05, Apr 1969
70
TJKGA0
6
0
the company number of the employee is TJKGA0 and the date of birth is 05, Apr 1969
38
05, Apr 1969
K.l Payne & T.v Payne
21
15
the company number of the employee is {SLOT} and the first name is K.l Payne & T.v Payne
67
Z0JMG7JQX

the customer's company number is AVNXB5CE4TKVPQGMLR and name is The Trustee For The Thibault Family Trust
33
The Trustee For The Thibault Family Trust
18-03-1997
10
4
the customer's company number is {SLOT} and date of birth is 18-03-1997
61
C7AGUV0N1NLULJ5K450
19
13
the customer's company number is C7AGUV0N1NLULJ5K450 and date of birth is 18-03-1997
33
18-03-1997
James L & Lorraine M Norton
27
21
the customer's company number is {SLOT} and first name is James L & Lorraine M Norton
58
MGTA3GCBHEEMHHONU93
19
13
the customer's company number is MGTA3GCBHEEMHHONU93 and first name is James L & Lorraine M Norton
33
James L & Lorraine M Norton
Phil Benson
11
5
the customer's company number is {SLOT} and last name is Phil Benson
57
JD4EJGDEZXPHIOIDV4W3
20
14
the customer's company number is JD4EJGDEZXPHIOIDV4W3 and last name is Phil Benson
33
Phil Benson
13683
5
-1
the customer's company number is {SLOT} and amount owed is 13683
59
YLVPGROR77EKJYFEA
17
11
the customer's company number is YLVP

04 04 1967
10
4
the recipient's registration number is {SLOT} and date of birth is 04 04 1967
67
BPUYTJTVZISOCDVJLT
18
12
the recipient's registration number is BPUYTJTVZISOCDVJLT and date of birth is 04 04 1967
39
04 04 1967
Kevin Brennan Autos Pty Ltd & Dunrunnin Pty Limited
51
45
the recipient's registration number is {SLOT} and first name is Kevin Brennan Autos Pty Ltd & Dunrunnin Pty Limited
64
OEDKANM7RHFET6UMVQLM
20
14
the recipient's registration number is OEDKANM7RHFET6UMVQLM and first name is Kevin Brennan Autos Pty Ltd & Dunrunnin Pty Limited
39
Kevin Brennan Autos Pty Ltd & Dunrunnin Pty Limited
G Wheatley & P.a Wheatley
25
19
the recipient's registration number is {SLOT} and last name is G Wheatley & P.a Wheatley
63
LAMGMDPV
8
2
the recipient's registration number is LAMGMDPV and last name is G Wheatley & P.a Wheatley
39
G Wheatley & P.a Wheatley
19,292.19
9
3
the recipient's registration number is {SLOT} and amount owed is 19,292.19
65
JJ59E2UXLYSHNZCD1E
18
12
the recipie

In [244]:
with open("out/answers.csv", "w") as outfile:
    for line in filled:
        outfile.write(line[0].replace(",", "{COMMA}"))
        outfile.write(",")
        outfile.write(str(line[1] == True))
        outfile.write("\n")
        
with open("out/question_answer_spans.csv", "w") as outfile:
    for line in filled:
        if line[1]:
            slot_spans = list(map(lambda x: x[0], line[2]))
            slot_names = [line[0][slot[0]:slot[1]] for slot in slot_spans]
            q = random.choice([q for q in questions if all([slot_name in q for slot_name in slot_names])])
            outfile.write(q.replace(",", "{COMMA}"))
            outfile.write(",")
            outfile.write(line[0].replace(",", "{COMMA}"))
            outfile.write(",")
            for span_pair in line[2]:
                outfile.write("|".join(map(str, itertools.chain.from_iterable(span_pair))))
            outfile.write("\n")

('the name of the employee is C.a Francis & P.t Francis', True, [[[4, 8], [28, 53]]])
['name']
('the name of the customer is Taxi Driver', True, [[[4, 8], [28, 39]]])
['name']
('the name of the client is G.j Ekin & J.v Pritchard', True, [[[4, 8], [26, 50]]])
['name']
('the name of the company is Brett A Mclennan & Andrew S Downing', True, [[[4, 8], [27, 62]]])
['name']
('the name of the payee is D Mulholland & R.m Mulholland', True, [[[4, 8], [25, 54]]])
['name']
('the name of the recipient is Leo Nichols Contracting', True, [[[4, 8], [29, 52]]])
['name']
('the date of birth of the employee is 14 January 1977', True, [[[4, 17], [37, 52]]])
['date of birth']
('the date of birth of the customer is 20 04 1983', True, [[[4, 17], [37, 47]]])
['date of birth']
('the date of birth of the client is 12 01 1989', True, [[[4, 17], [35, 45]]])
['date of birth']
('the date of birth of the payee is 2007/01/13', True, [[[4, 17], [34, 44]]])
['date of birth']
('the date of birth of the recipient is 21

['first name', 'date of birth']
('the first name of the recipient is The Trustee For Rch Properties Trust and the amount owed is $A55186.50', True, [[[4, 14], [35, 71]], [[80, 91], [95, 105]]])
['first name', 'amount owed']
('the first name of the recipient is Catlin Family Trust & Sixsmith Family Trust and the address is 355E Sarah Rd Nguyen, VA 35939', True, [[[4, 14], [35, 78]], [[87, 94], [98, 128]]])
['first name', 'address']
('the first name of the recipient is Magnus B Agren and the registration number is VUEDGL', True, [[[4, 14], [35, 49]], [[58, 77], [81, 87]]])
['first name', 'registration number']
('the first name of the recipient is Kosta and the company number is G2FPRLFP3', True, [[[4, 14], [35, 40]], [[49, 63], [67, 76]]])
['first name', 'company number']
('the last name of the employee is The Trustee For Hogan Super Fund and the date of birth is 03 09 1983', True, [[[4, 13], [33, 65]], [[74, 87], [91, 101]]])
['last name', 'date of birth']
('the last name of the employe

("the employee's first name is The Trustee For The Sunset Beach Unit Trust", True, [[[15, 25], [29, 72]]])
['first name']
("the employee's last name is The Trustee For I.d.c. Family Trust", True, [[[15, 24], [28, 63]]])
['last name']
("the employee's amount owed is A$56,995.23", True, [[[15, 26], [30, 41]]])
['amount owed']
("the employee's address is 575L/48 Nolan St, Km, Tas 34002409", True, [[[15, 22], [26, 60]]])
['address']
("the employee's registration number is NATBYHNV", True, [[[15, 34], [38, 46]]])
['registration number']
("the employee's company number is GERRN2TLNYXV", True, [[[15, 29], [33, 45]]])
['company number']
("the customer's name is Four Plus Two", True, [[[15, 19], [23, 36]]])
['name']
("the customer's date of birth is 05 06 1994", True, [[[15, 28], [32, 42]]])
['date of birth']
("the customer's first name is Bonjim Pty Ltd & Loxon Pty Ltd", True, [[[15, 25], [29, 59]]])
['first name']
("the customer's last name is Partnership Of Metropole Pty Ltd & Markianda Pty 

("the client's address is 773Y/855 Rebecca St, 1, Greensborough 71730 and registration number is LHJ2TJVVL5WVGR", True, [[[13, 20], [24, 67]], [[72, 91], [95, 109]]])
['address', 'registration number']
("the client's address is 277Q/223 Trust Alley Furbish, Qld 79353437 and company number is CKAP8DGP0ZOVR", True, [[[13, 20], [24, 66]], [[71, 85], [89, 102]]])
['address', 'company number']
("the client's registration number is C5YL and name is A.w.pyle", True, [[[45, 49], [53, 61]], [[13, 32], [36, 40]]])
['name', 'registration number']
("the client's registration number is OREFS6ERB3B7TCW1Q and date of birth is 1975-10-06", True, [[[58, 71], [75, 85]], [[13, 32], [36, 53]]])
['date of birth', 'registration number']
("the client's registration number is QFIM7IYHCYAPGUBPCNY and first name is P & A Sarakinis", True, [[[60, 70], [74, 89]], [[13, 32], [36, 55]]])
['first name', 'registration number']
("the client's registration number is 4DM8DJUK0ZIYFC1DN6BF and last name is Jay Lupton", Tr

In [None]:
  Q -> 'what' [0.1428] | 'who' [0.1428] | 'where' [0.1428] | 'when' [0.1428] | 'how much' [0.1428] | 'will' [0.1428] | 'which' [0.1428]
