In [3]:
import itertools
import sys
from nltk.grammar import Nonterminal

In [22]:
def generate(grammar, start=None, depth=None, n=None):
    """
    Generates an iterator of all sentences from a CFG.

    :param grammar: The Grammar used to generate sentences.
    :param start: The Nonterminal from which to start generate sentences.
    :param depth: The maximal depth of the generated tree.
    :param n: The maximum number of sentences to return.
    :return: An iterator of lists of terminal tokens.
    """
    if not start:
        start = grammar.start()
    if depth is None:
        depth = sys.maxsize

    iter = _generate_all(grammar, [start], depth)

    if n:
        iter = itertools.islice(iter, n)

    return iter



def _generate_all(grammar, items, depth):
    if items:
        try:
            for frag1 in _generate_one(grammar, items[0], depth):
                for frag2 in _generate_all(grammar, items[1:], depth):
                    yield frag1 + frag2
        except RuntimeError as _error:
            if _error.message == "maximum recursion depth exceeded":
                # Helpful error message while still showing the recursion stack.
                raise RuntimeError(
                    "The grammar has rule(s) that yield infinite recursion!!"
                )
            else:
                raise
    else:
        yield []


def _generate_one(grammar, item, depth):
    if depth > 0:
        if isinstance(item, Nonterminal):
            for prod in grammar.productions(lhs=item):
                for frag in _generate_all(grammar, prod.rhs(), depth - 1):
                    yield frag
        else:
            yield [item]


demo_grammar = """
  S -> QP T [1.0]
  QP -> Q PR [1.0]
  PR -> Det OP [0.333] | Det O Conj O Pos Det O2 [0.333] | Det O Comm O Conj O Pos Det O2 [0.333]
  OP -> O Pos Det O2 [0.5] | O2 Ap O [0.5]
  Q -> 'what is' [1] 
  Det -> 'the' [1.0]
  Conj -> 'and' [1.0] 
  Comm -> ',' [1.0]
  O -> 'name' [0.166666666] | 'date of birth' [0.166666666] | 'first name' [0.166666666] | 'last name' [0.166666666] | 'amount owed' [0.166666666] | 'address' [0.166666666]
  Pos -> 'of' [0.9] | 'to' [0.1]
  Ap -> 's' [1.0]
  O2 -> [0.166666666] 'employee' | [0.166666666] 'customer' | [0.166666666] 'client' | [0.166666666] 'company' | [0.166666666] 'payee' | [0.166666666] 'recipient'
  T -> '?' [1.0]
"""

In [27]:
from nltk.grammar import CFG, PCFG
N = 10000
print('Generating the first %d sentences for demo grammar:' % (N,))
print(demo_grammar)
grammar = PCFG.fromstring(demo_grammar)
with open('./generated.out', 'w') as outfile:
    for n, sent in enumerate(generate(grammar, n=N), 1):
    #print('%3d. %s' % (n, ' '.join(sent)))
        outfile.write(' '.join(sent) + '\n')

Generating the first 10000 sentences for demo grammar:

  S -> QP T [1.0]
  QP -> Q PR [1.0]
  PR -> Det OP [0.333] | Det O Conj O Pos Det O2 [0.333] | Det O Comm O Conj O Pos Det O2 [0.333]
  OP -> O Pos Det O2 [0.5] | O2 Ap O [0.5]
  Q -> 'what is' [1] 
  Det -> 'the' [1.0]
  Conj -> 'and' [1.0] 
  Comm -> ',' [1.0]
  O -> 'name' [0.166666666] | 'date of birth' [0.166666666] | 'first name' [0.166666666] | 'last name' [0.166666666] | 'amount owed' [0.166666666] | 'address' [0.166666666]
  Pos -> 'of' [0.9] | 'to' [0.1]
  Ap -> 's' [1.0]
  O2 -> [0.166666666] 'employee' | [0.166666666] 'customer' | [0.166666666] 'client' | [0.166666666] 'company' | [0.166666666] 'payee' | [0.166666666] 'recipient'
  T -> '?' [1.0]



In [None]:
def name_generator():
    

In [None]:
  Q -> 'what' [0.1428] | 'who' [0.1428] | 'where' [0.1428] | 'when' [0.1428] | 'how much' [0.1428] | 'will' [0.1428] | 'which' [0.1428]
