diff --git a/src/sage/combinat/boltzmann_sampling/generator.pyx b/src/sage/combinat/boltzmann_sampling/generator.pyx index 55531de1d43..38bbb394b3f 100644 --- a/src/sage/combinat/boltzmann_sampling/generator.pyx +++ b/src/sage/combinat/boltzmann_sampling/generator.pyx @@ -1,14 +1,83 @@ # coding: utf-8 -# cython: profile=True +r"""Boltzmann generator for Context-free grammars. + +This module provides functions for generating combinatorial objects (i.e. +objects described by a combinatorial specification, see +:ref:`sage.combinat.bolzmann_sampling.grammar`) according to the Boltzmann +distribution. + +Given an unlabelled combinatorial class A, the Boltzmann distribution of +parameter x is such that an object of size n is drawn with the probability ``x^n +/ A(x)`` where ``A(x)`` denotes the ordinary generating function of A. For +labelled classes, this probability is set to ``x^n / (n! * A(x))`` where A(x) +denotes the exponential generating function of A. See [DuFlLoSc04] for details. + +By default, the objects produced by the generator are nested tuples of strings +(the atoms). For instance ``('z', ('z', 'e', 'e'), ('z', 'e', 'e'))`` is a +balanced binary tree with 3 internal nodes (z) and 4 leaves (e). To alter this +behaviour and generate other types of objects, you can specify a builder +function for each type of symbol in the grammar. The behaviour of the builders +is that they are applied in bottom up order to the structure "on the fly" during +generation. For instance, in order to generate Dyck words using the grammar for +binary trees, one can use a builder that return ``""`` for each leaf ``"(" + +left child + ")" + right child`` for each node. The builders will receive a +tuple for each product, a string for each atom and builders for unions should be +computed using the ``UnionBuilder`` helper. See the example below for the case +of Dyck words. + +EXAMPLES:: + + sage: from sage.combinat.boltzmann_sampling.grammar import * + sage: from sage.combinat.boltzmann_sampling.generator import * + sage: leaf = Atom("e", size=0) + sage: z = Atom("z") + sage: grammar = Grammar(rules={"B": Union(leaf, Product(z, "B", "B"))}) + sage: generator = Generator(grammar) + sage: def leaf_builder(_): + ....: return "" + sage: def node_builder(tuple): + ....: _, left, right = tuple + ....: return "(" + left + ")" + right + sage: generator.set_builder("B", UnionBuilder(leaf_builder, node_builder)) + sage: dyck_word, _ = generator.gen("B", (10, 20)) + sage: dyck_word # random + "(()((()())))((())(()((()))))" + +Note that the builders' mechanism can also be used to compute parameters on the +structure on the fly without building the whole structure such as the height of +the tree. + +EXAMPLES:: + + sage: from sage.combinat.boltzmann_sampling.grammar import * + sage: from sage.combinat.boltzmann_sampling.generator import * + sage: leaf = Atom("e", size=0) + sage: z = Atom("z") + sage: grammar = Grammar(rules={"B": Union(leaf, Product(z, "B", "B"))}) + sage: generator = Generator(grammar) + sage: def leaf_height(_): + ....: return 0 + sage: def node_height(tuple): + ....: _, left, right = tuple + ....: return 1 + max(left, right) + sage: generator.set_builder("B", UnionBuilder(leaf_height, node_height)) + sage: height, _ = generator.gen("B", (10, 20)) + sage: height # random + 6 + +REFERENCES:: + +.. [DuFlLoSc04] Philippe Duchon, Philippe Flajolet, Guy Louchard, and Gilles + Schaeffer. 2004. Boltzmann Samplers for the Random Generation of + Combinatorial Structures. Comb. Probab. Comput. 13, 4-5 (July 2004), 577-625. + DOI=http://dx.doi.org/10.1017/S0963548304006315 +""" from sage.libs.gmp.random cimport gmp_randinit_set, gmp_randinit_default from sage.libs.gmp.types cimport gmp_randstate_t from sage.misc.randstate cimport randstate, current_randstate from .grammar import Atom, Product, Ref, Union - -# --- -# Preprocessing -# --- +from .oracle import SimpleOracle ctypedef enum options: REF, @@ -19,29 +88,48 @@ ctypedef enum options: FUNCTION, WRAP_CHOICE -cdef inline wrap_choice(float weight, int id): - return (WRAP_CHOICE, weight, id) +# --- +# Preprocessing +# --- -cdef map_all_names_to_ids_expr(name_to_id, expr): +# For performance reasons, we use integers to identify symbols rather that +# strings during generation. These two functions do the substitutions and +# compute tables for mapping the names to their integer id and the ids to their +# original names. +# All of this is hidden from the user. + +cdef _map_all_names_to_ids_expr(name_to_id, expr): + """Recursively transform an expression into a triple of the form + (RULE_TYPE, weight, args) where: + - RULE_TYPE is of the values of the options enum (see above) + - weight is the value of the generating function of the expression + - args is auxilliary information (the name of an atom, the terms of an + union, ...) + """ if isinstance(expr, Ref): return (REF, expr.weight, name_to_id[expr.name]) elif isinstance(expr, Atom): return (ATOM, expr.weight, (expr.name, expr.size)) elif isinstance(expr, Union): - args = tuple((map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args)) + args = tuple((_map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args)) return (UNION, expr.weight, args) elif isinstance(expr, Product): - args = tuple((map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args)) + args = tuple((_map_all_names_to_ids_expr(name_to_id, arg) for arg in expr.args)) return (PRODUCT, expr.weight, args) -cdef map_all_names_to_ids(rules): +cdef _map_all_names_to_ids(rules): + """Assign an integer (identifier) to each symbol in the grammar and compute + two dictionnaries: + - one that maps the names to their identifiers + - one that maps the identifier to the original names + """ name_to_id = {} id_to_name = {} for i, name in enumerate(rules.keys()): name_to_id[name] = i id_to_name[i] = name rules = [ - map_all_names_to_ids_expr(name_to_id, rules[id_to_name[i]]) + _map_all_names_to_ids_expr(name_to_id, rules[id_to_name[i]]) for i in range(len(name_to_id)) ] return name_to_id, id_to_name, rules @@ -82,7 +170,10 @@ cdef int c_simulate(int id, float weight, int size_max, flat_rules, randstate rs # Actual generation # --- -cdef c_generate(int id, float weight, flat_rules, builders, randstate rstate): +cdef inline wrap_choice(float weight, int id): + return (WRAP_CHOICE, weight, id) + +cdef c_generate(int id, float weight, rules, builders, randstate rstate): generated = [] cdef list todo = [(REF, weight, id)] cdef float r = 0. @@ -91,7 +182,7 @@ cdef c_generate(int id, float weight, flat_rules, builders, randstate rstate): if type == REF: symbol = args todo.append((FUNCTION, weight, symbol)) - todo.append(flat_rules[symbol]) + todo.append(rules[symbol]) elif type == ATOM: atom_name, __ = args generated.append(atom_name) @@ -120,23 +211,27 @@ cdef c_generate(int id, float weight, flat_rules, builders, randstate rstate): generated.append(func(x)) elif type == WRAP_CHOICE: choice = generated.pop() - generated.append((args, choice)) + choice_number = args + generated.append((choice_number, choice)) obj, = generated return obj -cdef c_gen(int id, float weight, flat_rules, int size_min, int size_max, int max_retry, builders): +cdef c_gen(int id, float weight, rules, int size_min, int size_max, int max_retry, builders): cdef int nb_rejections = 0 cdef int cumulative_rejected_size = 0 cdef int size = -1 + # A handle on the random generator cdef randstate rstate = current_randstate() + # Allocate a gmp_randstate_t cdef gmp_randstate_t gmp_state gmp_randinit_default(gmp_state) while nb_rejections < max_retry: + # save the random generator's state gmp_randinit_set(gmp_state, rstate.gmp_state) - size = c_simulate(id, weight, size_max, flat_rules, rstate) + size = c_simulate(id, weight, size_max, rules, rstate) if size <= size_max and size >= size_min: break else: @@ -146,8 +241,9 @@ cdef c_gen(int id, float weight, flat_rules, int size_min, int size_max, int max if not(size <= size_max and size >= size_min): return None + # Reset the random generator to the state it was just before the simulation gmp_randinit_set(rstate.gmp_state, gmp_state) - obj = c_generate(id, weight, flat_rules, builders, rstate) + obj = c_generate(id, weight, rules, builders, rstate) statistics = { "size": size, "nb_rejections": nb_rejections, @@ -155,11 +251,35 @@ cdef c_gen(int id, float weight, flat_rules, int size_min, int size_max, int max } return statistics, obj +# --- +# Builders +# --- cdef inline identity(x): return x def UnionBuilder(*builders): + """"Helper for computing the builder of a union out of the builders of + its components. For instance, in order to compute the height of a binary + tree on the fly: + + EXAMPLES:: + + sage: # Grammar: B = Union(leaf, Product(z, B, B)) + sage: from sage.combinat.boltzmann_sampling.generator import UnionBuilder + sage: def leaf_builder(_): + ....: return 0 + sage: def node_builder(tuple): + ....: _, left, right = tuple + ....: return 1 + max(left, right) + sage: builder = UnionBuilder(leaf_builder, node_builder) + sage: choice_number = 0 + sage: builder((choice_number, "leaf")) + 0 + sage: choice_number = 1 + sage: builder((choice_number, ('z', 37, 23))) + 38 + """ def build(obj): index, content = obj builder = builders[index] @@ -167,11 +287,13 @@ def UnionBuilder(*builders): return build cdef inline ProductBuilder(builders): + """Default builder for product: return a tuple.""" def build(terms): return tuple(builders[i](terms[i]) for i in range(len(terms))) return build cdef make_default_builder(rule): + """Generate the default builders for a rule""" type, __, args = rule if type == REF: return identity @@ -184,18 +306,26 @@ cdef make_default_builder(rule): subbuilders = [make_default_builder(component) for component in args] return ProductBuilder(subbuilders) - class Generator: + """High level interface for Boltzmann samplers.""" + def __init__(self, grammar, oracle=None): + """Make a Generator out of a grammar. + + INPUT:: + + - ``grammar`` -- a combinatorial grammar + - ``oracle`` (default: None) -- an oracle for the grammar. If not + supplied, a default generic oracle is automatically generated. + """ # Load the default oracle if none is supplied if oracle is None: - from .oracle import OracleSimple - oracle = OracleSimple(grammar, e1=1e-6, e2=1e-6) + oracle = SimpleOracle(grammar, e1=1e-6, e2=1e-6) self.oracle = oracle # flatten the grammar for faster access to rules self.grammar = grammar self.grammar.annotate(oracle) - name_to_id, id_to_name, flat_rules = map_all_names_to_ids(grammar.rules) + name_to_id, id_to_name, flat_rules = _map_all_names_to_ids(grammar.rules) self.name_to_id = name_to_id self.id_to_name = id_to_name self.flat_rules = flat_rules @@ -206,14 +336,40 @@ class Generator: ] def set_builder(self, non_terminal, func): + """Set the builder for a non-terminal symbol. + + INPUT:: + + - ``non_terminal`` -- string, the name of the non-terminal symbol + - ``func`` -- function, the builder + """ symbol_id = self.name_to_id[non_terminal] self.builders[symbol_id] = func def get_builder(self, non_terminal): + """Retrieve the current builder for a non-terminal symbol. + + INPUT:: + + - ``non_terminal`` -- string, the name of the non-terminal symbol + """ symbol_id = self.name_to_id[non_terminal] return self.builders[symbol_id] def gen(self, name, window, max_retry=2000): + """Generate a term of the grammar in a given size window. + + INPUT:: + + - ``name`` -- string, the name of the symbol of the grammar you want to + generate + - ``window`` -- pair of integers, the size of the generated object will + be greater than the first component of the window and lower than the + second component + - ``max_retry`` (default: 2000) -- integer, maximum number of attempts. + If no object in the size window is found in less that ``max_retry`` + attempts, the generator returns None + """ id = self.name_to_id[name] weight = self.grammar.rules[name].weight size_min, size_max = window