Skip to content
This repository has been archived by the owner on Jan 30, 2023. It is now read-only.

Commit

Permalink
Boltzmann: document the generator module
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerl13 committed Jun 19, 2019
1 parent 75ec37c commit 8b40cc1
Showing 1 changed file with 178 additions and 22 deletions.
200 changes: 178 additions & 22 deletions src/sage/combinat/boltzmann_sampling/generator.pyx
@@ -1,14 +1,83 @@
# coding: utf-8
# cython: profile=True
r"""Boltzmann generator for Context-free grammars.
This module provides functions for generating combinatorial objects (i.e.
objects described by a combinatorial specification, see
:ref:`sage.combinat.bolzmann_sampling.grammar`) according to the Boltzmann
distribution.
Given an unlabelled combinatorial class A, the Boltzmann distribution of
parameter x is such that an object of size n is drawn with the probability ``x^n
/ A(x)`` where ``A(x)`` denotes the ordinary generating function of A. For
labelled classes, this probability is set to ``x^n / (n! * A(x))`` where A(x)
denotes the exponential generating function of A. See [DuFlLoSc04] for details.
By default, the objects produced by the generator are nested tuples of strings
(the atoms). For instance ``('z', ('z', 'e', 'e'), ('z', 'e', 'e'))`` is a
balanced binary tree with 3 internal nodes (z) and 4 leaves (e). To alter this
behaviour and generate other types of objects, you can specify a builder
function for each type of symbol in the grammar. The behaviour of the builders
is that they are applied in bottom up order to the structure "on the fly" during
generation. For instance, in order to generate Dyck words using the grammar for
binary trees, one can use a builder that return ``""`` for each leaf ``"(" +
left child + ")" + right child`` for each node. The builders will receive a
tuple for each product, a string for each atom and builders for unions should be
computed using the ``UnionBuilder`` helper. See the example below for the case
of Dyck words.
EXAMPLES::
sage: from sage.combinat.boltzmann_sampling.grammar import *
sage: from sage.combinat.boltzmann_sampling.generator import *
sage: leaf = Atom("e", size=0)
sage: z = Atom("z")
sage: grammar = Grammar(rules={"B": Union(leaf, Product(z, "B", "B"))})
sage: generator = Generator(grammar)
sage: def leaf_builder(_):
....: return ""
sage: def node_builder(tuple):
....: _, left, right = tuple
....: return "(" + left + ")" + right
sage: generator.set_builder("B", UnionBuilder(leaf_builder, node_builder))
sage: dyck_word, _ = generator.gen("B", (10, 20))
sage: dyck_word # random
"(()((()())))((())(()((()))))"
Note that the builders' mechanism can also be used to compute parameters on the
structure on the fly without building the whole structure such as the height of
the tree.
EXAMPLES::
sage: from sage.combinat.boltzmann_sampling.grammar import *
sage: from sage.combinat.boltzmann_sampling.generator import *
sage: leaf = Atom("e", size=0)
sage: z = Atom("z")
sage: grammar = Grammar(rules={"B": Union(leaf, Product(z, "B", "B"))})
sage: generator = Generator(grammar)
sage: def leaf_height(_):
....: return 0
sage: def node_height(tuple):
....: _, left, right = tuple
....: return 1 + max(left, right)
sage: generator.set_builder("B", UnionBuilder(leaf_height, node_height))
sage: height, _ = generator.gen("B", (10, 20))
sage: height # random
6
REFERENCES::
.. [DuFlLoSc04] Philippe Duchon, Philippe Flajolet, Guy Louchard, and Gilles
Schaeffer. 2004. Boltzmann Samplers for the Random Generation of
Combinatorial Structures. Comb. Probab. Comput. 13, 4-5 (July 2004), 577-625.
DOI=http://dx.doi.org/10.1017/S0963548304006315
"""

from sage.libs.gmp.random cimport gmp_randinit_set, gmp_randinit_default
from sage.libs.gmp.types cimport gmp_randstate_t
from sage.misc.randstate cimport randstate, current_randstate
from .grammar import Atom, Product, Ref, Union

# ---
# Preprocessing
# ---
from .oracle import SimpleOracle

ctypedef enum options:
REF,
Expand All @@ -19,29 +88,48 @@ ctypedef enum options:
FUNCTION,
WRAP_CHOICE

cdef inline wrap_choice(float weight, int id):
return (WRAP_CHOICE, weight, id)
# ---
# Preprocessing
# ---

cdef map_all_names_to_ids_expr(name_to_id, expr):
# For performance reasons, we use integers to identify symbols rather that
# strings during generation. These two functions do the substitutions and
# compute tables for mapping the names to their integer id and the ids to their
# original names.
# All of this is hidden from the user.

cdef _map_all_names_to_ids_expr(name_to_id, expr):
"""Recursively transform an expression into a triple of the form
(RULE_TYPE, weight, args) where:
- RULE_TYPE is of the values of the options enum (see above)
- weight is the value of the generating function of the expression
- args is auxilliary information (the name of an atom, the terms of an
union, ...)
"""
if isinstance(expr, Ref):
return (REF, expr.weight, name_to_id[expr.name])
elif isinstance(expr, Atom):
return (ATOM, expr.weight, (expr.name, expr.size))
elif isinstance(expr, Union):
args = tuple((map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args))
args = tuple((_map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args))
return (UNION, expr.weight, args)
elif isinstance(expr, Product):
args = tuple((map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args))
args = tuple((_map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args))
return (PRODUCT, expr.weight, args)

cdef map_all_names_to_ids(rules):
cdef _map_all_names_to_ids(rules):
"""Assign an integer (identifier) to each symbol in the grammar and compute
two dictionnaries:
- one that maps the names to their identifiers
- one that maps the identifier to the original names
"""
name_to_id = {}
id_to_name = {}
for i, name in enumerate(rules.keys()):
name_to_id[name] = i
id_to_name[i] = name
rules = [
map_all_names_to_ids_expr(name_to_id, rules[id_to_name[i]])
_map_all_names_to_ids_expr(name_to_id, rules[id_to_name[i]])
for i in range(len(name_to_id))
]
return name_to_id, id_to_name, rules
Expand Down Expand Up @@ -82,7 +170,10 @@ cdef int c_simulate(int id, float weight, int size_max, flat_rules, randstate rs
# Actual generation
# ---

cdef c_generate(int id, float weight, flat_rules, builders, randstate rstate):
cdef inline wrap_choice(float weight, int id):
return (WRAP_CHOICE, weight, id)

cdef c_generate(int id, float weight, rules, builders, randstate rstate):
generated = []
cdef list todo = [(REF, weight, id)]
cdef float r = 0.
Expand All @@ -91,7 +182,7 @@ cdef c_generate(int id, float weight, flat_rules, builders, randstate rstate):
if type == REF:
symbol = args
todo.append((FUNCTION, weight, symbol))
todo.append(flat_rules[symbol])
todo.append(rules[symbol])
elif type == ATOM:
atom_name, __ = args
generated.append(atom_name)
Expand Down Expand Up @@ -120,23 +211,27 @@ cdef c_generate(int id, float weight, flat_rules, builders, randstate rstate):
generated.append(func(x))
elif type == WRAP_CHOICE:
choice = generated.pop()
generated.append((args, choice))
choice_number = args
generated.append((choice_number, choice))

obj, = generated
return obj


cdef c_gen(int id, float weight, flat_rules, int size_min, int size_max, int max_retry, builders):
cdef c_gen(int id, float weight, rules, int size_min, int size_max, int max_retry, builders):
cdef int nb_rejections = 0
cdef int cumulative_rejected_size = 0
cdef int size = -1
# A handle on the random generator
cdef randstate rstate = current_randstate()
# Allocate a gmp_randstate_t
cdef gmp_randstate_t gmp_state
gmp_randinit_default(gmp_state)

while nb_rejections < max_retry:
# save the random generator's state
gmp_randinit_set(gmp_state, rstate.gmp_state)
size = c_simulate(id, weight, size_max, flat_rules, rstate)
size = c_simulate(id, weight, size_max, rules, rstate)
if size <= size_max and size >= size_min:
break
else:
Expand All @@ -146,32 +241,59 @@ cdef c_gen(int id, float weight, flat_rules, int size_min, int size_max, int max
if not(size <= size_max and size >= size_min):
return None

# Reset the random generator to the state it was just before the simulation
gmp_randinit_set(rstate.gmp_state, gmp_state)
obj = c_generate(id, weight, flat_rules, builders, rstate)
obj = c_generate(id, weight, rules, builders, rstate)
statistics = {
"size": size,
"nb_rejections": nb_rejections,
"cumulative_rejected_size": cumulative_rejected_size,
}
return statistics, obj

# ---
# Builders
# ---

cdef inline identity(x):
return x

def UnionBuilder(*builders):
""""Helper for computing the builder of a union out of the builders of
its components. For instance, in order to compute the height of a binary
tree on the fly:
EXAMPLES::
sage: # Grammar: B = Union(leaf, Product(z, B, B))
sage: from sage.combinat.boltzmann_sampling.generator import UnionBuilder
sage: def leaf_builder(_):
....: return 0
sage: def node_builder(tuple):
....: _, left, right = tuple
....: return 1 + max(left, right)
sage: builder = UnionBuilder(leaf_builder, node_builder)
sage: choice_number = 0
sage: builder((choice_number, "leaf"))
0
sage: choice_number = 1
sage: builder((choice_number, ('z', 37, 23)))
38
"""
def build(obj):
index, content = obj
builder = builders[index]
return builder(content)
return build

cdef inline ProductBuilder(builders):
"""Default builder for product: return a tuple."""
def build(terms):
return tuple(builders[i](terms[i]) for i in range(len(terms)))
return build

cdef make_default_builder(rule):
"""Generate the default builders for a rule"""
type, __, args = rule
if type == REF:
return identity
Expand All @@ -184,18 +306,26 @@ cdef make_default_builder(rule):
subbuilders = [make_default_builder(component) for component in args]
return ProductBuilder(subbuilders)


class Generator:
"""High level interface for Boltzmann samplers."""

def __init__(self, grammar, oracle=None):
"""Make a Generator out of a grammar.
INPUT::
- ``grammar`` -- a combinatorial grammar
- ``oracle`` (default: None) -- an oracle for the grammar. If not
supplied, a default generic oracle is automatically generated.
"""
# Load the default oracle if none is supplied
if oracle is None:
from .oracle import OracleSimple
oracle = OracleSimple(grammar, e1=1e-6, e2=1e-6)
oracle = SimpleOracle(grammar, e1=1e-6, e2=1e-6)
self.oracle = oracle
# flatten the grammar for faster access to rules
self.grammar = grammar
self.grammar.annotate(oracle)
name_to_id, id_to_name, flat_rules = map_all_names_to_ids(grammar.rules)
name_to_id, id_to_name, flat_rules = _map_all_names_to_ids(grammar.rules)
self.name_to_id = name_to_id
self.id_to_name = id_to_name
self.flat_rules = flat_rules
Expand All @@ -206,14 +336,40 @@ class Generator:
]

def set_builder(self, non_terminal, func):
"""Set the builder for a non-terminal symbol.
INPUT::
- ``non_terminal`` -- string, the name of the non-terminal symbol
- ``func`` -- function, the builder
"""
symbol_id = self.name_to_id[non_terminal]
self.builders[symbol_id] = func

def get_builder(self, non_terminal):
"""Retrieve the current builder for a non-terminal symbol.
INPUT::
- ``non_terminal`` -- string, the name of the non-terminal symbol
"""
symbol_id = self.name_to_id[non_terminal]
return self.builders[symbol_id]

def gen(self, name, window, max_retry=2000):
"""Generate a term of the grammar in a given size window.
INPUT::
- ``name`` -- string, the name of the symbol of the grammar you want to
generate
- ``window`` -- pair of integers, the size of the generated object will
be greater than the first component of the window and lower than the
second component
- ``max_retry`` (default: 2000) -- integer, maximum number of attempts.
If no object in the size window is found in less that ``max_retry``
attempts, the generator returns None
"""
id = self.name_to_id[name]
weight = self.grammar.rules[name].weight
size_min, size_max = window
Expand Down

0 comments on commit 8b40cc1

Please sign in to comment.