### Imports

In [None]:
from rayuela.base.semiring import Boolean, Real, Tropical, String, product_semiring_builder
from rayuela.base.symbol import Sym, ε
from rayuela.fsa.fsa import FSA
from rayuela.fsa.fst import FST
from rayuela.fsa.state import State
from rayuela.fsa.transformer import Transformer

In [None]:
%load_ext autoreload
%autoreload 2

## Example 1

Using the product semiring

In [None]:
# We define a new product semiring class with the helper builder function
StringReal = product_semiring_builder(String, Real)

# The values of the defined product semiring are pairs of values of the 
# semirings inside
funny = StringReal(String('funny'), Real(12.0))
funnier = StringReal(String('funicular'), Real(3.0))

print(funny + funnier)
print(funny * funnier)

### Encoding a Transducer as an Acceptor

In [None]:
# We encode the transducer as an acceptor over the product of the 
# string (output words) and real (weights) semirings
StringReal = product_semiring_builder(String, Real)

fst = FSA(StringReal)

# The added arc has the form
# (source, input_symbol, target, (output_symbol, weight))
fst.add_arc(State(0), Sym('a'), State(1), StringReal(String('x'), Real(0.23)))
fst.add_arc(State(0), Sym('a'), State(3), StringReal(String('zz'), Real(0.42)))

fst.add_arc(State(1), Sym('c'), State(1), StringReal(String('w'), Real(0.46)))
fst.add_arc(State(1), Sym('b'), State(1), StringReal(String('x'), Real(0.33)))
fst.add_arc(State(1), Sym('d'), State(2), StringReal(String('xw'), Real(0.46)))

fst.add_arc(State(2), Sym('c'), State(1), StringReal(String('yy'), Real(0.46)))
fst.add_arc(State(2), Sym('a'), State(3), StringReal(String('x'), Real(0.46)))

# We use the string multiplicative unit for weighting the inital and final states
fst.set_I(State(0), StringReal(String(''), Real(0.92)))
fst.set_F(State(3), StringReal(String(''), Real(0.1)))

fst

A simple language model encoded by a FSA

In [None]:
fsa = FSA(Real)

fsa.add_arc(State(0), Sym('formal'), State(1), Real(0.2))
fsa.add_arc(State(0), Sym('natural'), State(1), Real(0.3))
fsa.add_arc(State(0), Sym('learning'), State(2), Real(0.2))
fsa.add_arc(State(0), Sym('data'), State(3), Real(0.3))

fsa.add_arc(State(1), Sym('language'), State(2), Real(0.6))
fsa.add_arc(State(1), Sym('languages'), State(4), Real(0.4))

fsa.add_arc(State(2), Sym('is'), State(5), Real(1.0))

fsa.add_arc(State(5), Sym('fun'), State(6), Real(1.0))

fsa.add_arc(State(3), Sym('is'), State(5), Real(0.5))
fsa.add_arc(State(3), Sym('are'), State(5), Real(0.5))

fsa.add_arc(State(4), Sym('are'), State(5), Real(1.0))

fsa.set_I(State(0))
fsa.set_F(State(6))

fsa

The WFSA from above encoded as a WFST

In [None]:
StringReal = product_semiring_builder(String, Real)

fst = FSA(StringReal)

fst.add_arc(State(0), Sym('formal'), State(1), StringReal(String('formal'), Real(0.2)))
fst.add_arc(State(0), Sym('natural'), State(1), StringReal(String('natural'), Real(0.3)))
fst.add_arc(State(0), Sym('learning'), State(2), StringReal(String('learning'), Real(0.2)))
fst.add_arc(State(0), Sym('data'), State(3), StringReal(String('data'), Real(0.3)))

fst.add_arc(State(1), Sym('language'), State(2), StringReal(String('language'), Real(0.6)))
fst.add_arc(State(1), Sym('languages'), State(4), StringReal(String('languages'), Real(0.4)))

fst.add_arc(State(2), Sym('is'), State(5), StringReal(String('is'), Real(1.0)))

fst.add_arc(State(5), Sym('fun'), State(6), StringReal(String('fun'), Real(1.0)))

fst.add_arc(State(3), Sym('is'), State(5), StringReal(String('is'), Real(0.5)))
fst.add_arc(State(3), Sym('are'), State(5), StringReal(String('are'), Real(0.5)))

fst.add_arc(State(4), Sym('are'), State(5), StringReal(String('are'), Real(1.0)))

fst.set_I(State(0))
fst.set_F(State(6))

fst

In [None]:
StringReal = product_semiring_builder(String, Real)

fst = FSA(StringReal)

fst.add_arc(State(0), Sym('learning'), State(2), StringReal(String('learning'), Real(0.2)))
fst.add_arc(State(0), Sym('leaning'), State(2), StringReal(String('leaning'), Real(0.2)))

fst.add_arc(State(2), Sym('is'), State(5), StringReal(String('is'), Real(1.0)))
fst.add_arc(State(2), Sym('are'), State(5), StringReal(String('are'), Real(1.0)))

fst.add_arc(State(5), Sym('fun'), State(6), StringReal(String('fun'), Real(1.0)))

fst.set_I(State(0))
fst.set_F(State(6))

fst

A WFSA for _transliteration_

In [None]:
StringReal = product_semiring_builder(String, Real)

fst = FSA(StringReal)

fst.add_arc(State(0), Sym('d'), State(1), StringReal(String('data'), Real(0.5)))
fst.add_arc(State(0), Sym('d'), State(5), StringReal(String('dew'), Real(0.5)))

fst.add_arc(State(1), Sym('ey'), State(2), StringReal(String('ε'), Real(0.5)))
fst.add_arc(State(1), Sym('ae'), State(2), StringReal(String('ε'), Real(0.5)))

fst.add_arc(State(2), Sym('t'), State(3), StringReal(String('ε'), Real(0.7)))
fst.add_arc(State(2), Sym('dx'), State(3), StringReal(String('ε'), Real(0.3)))

fst.add_arc(State(3), Sym('ax'), State(4), StringReal(String('ε'), Real(1.0)))

fst.add_arc(State(5), Sym('uw'), State(6), StringReal(String('ε'), Real(1.0)))

fst.set_I(State(0))
fst.set_F(State(4))
fst.set_F(State(6))

fst

### Using the FST class directly

In [None]:
# Initilize directly with the semiring we want
fst = FST(Real)

# We add *two* symbols per arc and the weight directly in the semiring itself
fst.add_arc(State(0), Sym('d'), Sym('data'), State(1), Real(0.5))
fst.add_arc(State(0), Sym('d'), Sym('dew'), State(5), Real(0.5))

fst.add_arc(State(1), Sym('ey'), Sym('ε'), State(2), Real(0.5))
fst.add_arc(State(1), Sym('ae'), Sym('ε'), State(2), Real(0.5))

fst.add_arc(State(2), Sym('t'), Sym('ε'), State(3), Real(0.7))
fst.add_arc(State(2), Sym('dx'), Sym('ε'), State(3), Real(0.3))

fst.add_arc(State(3), Sym('ax'), Sym('ε'), State(4), Real(1.0))

fst.add_arc(State(5), Sym('uw'), Sym('ε'), State(6), Real(1.0))

fst.set_I(State(0))
fst.set_F(State(4))
fst.set_F(State(6))

fst

In [None]:
for ab, j, w in fst.arcs(State(0)):
    print(ab)

### Composition

In [None]:
# Mohri(1997), Fig. 2
fst1 = FST(Boolean)
one = Boolean.one

fst1.add_arc(State(0), Sym('a'), Sym('a'), State(1), one)
fst1.add_arc(State(0), Sym('b'), Sym('a'), State(2), one)
fst1.add_arc(State(1), Sym('a'), Sym('a'), State(3), one)
fst1.add_arc(State(2), Sym('b'), Sym('b'), State(3), one)

fst1.set_I(State(0))
fst1.set_F(State(3))

fst1

In [None]:
fst2 = FST(Boolean)
one = Boolean.one

fst2.add_arc(State(0), Sym('a'), Sym('b'), State(1), one)
fst2.add_arc(State(1), Sym('a'), Sym('b'), State(2), one)
fst2.add_arc(State(1), Sym('b'), Sym('a'), State(2), one)

fst2.set_I(State(0))
fst2.set_F(State(2))

fst2

In [None]:
# naïve composition
comp1 = fst.top_compose_brute(fst2)
comp1

In [None]:
# on-the-fly composition
comp2 = fst.top_compose(fst2)
comp2

## Example 2: Epsilon Removal


In [None]:
fsa = FSA(Real)

fsa.add_arc(State(0), Sym('a'), State(1), Real(0.1))
fsa.add_arc(State(1), ε, State(2), Real(0.2))
fsa.add_arc(State(0), Sym('a'), State(2), Real(0.3))

fsa.add_I(State(0), Real(0.5))
fsa.add_F(State(2), Real(0.8))

fsa

In [None]:
Transformer.epsremoval(fsa)

## Example 3: Determinization

Determinization of an unweighted finite state automaton.

In [None]:
# We can define an unweighted FSA with a WFSA over the boolean semiring
fsa = FSA(Boolean)

fsa.add_arc(State(0), Sym('a'), State(0), Boolean(True))
fsa.add_arc(State(0), Sym('a'), State(1), Boolean(True))
fsa.add_arc(State(0), Sym('a'), State(2), Boolean(True))
fsa.add_arc(State(0), Sym('b'), State(2), Boolean(True))
fsa.add_arc(State(0), Sym('b'), State(3), Boolean(True))

fsa.add_arc(State(2), Sym('a'), State(1), Boolean(True))
fsa.add_arc(State(2), Sym('b'), State(3), Boolean(True))

fsa.add_arc(State(1), Sym('b'), State(1), Boolean(True))
fsa.add_arc(State(1), Sym('b'), State(3), Boolean(True))

fsa.set_I(State(0))
fsa.set_I(State(1))
fsa.set_F(State(3))

fsa

In [None]:
Transformer.determinize(fsa)

## Example 4: Weighted Determinization

Weighted determinization of a weighted finite state automaton.

In [None]:
fsa = FSA(Tropical)

fsa.add_arc(State(0), Sym('a'), State(1), Tropical(1))
fsa.add_arc(State(0), Sym('a'), State(2), Tropical(2))

fsa.add_arc(State(1), Sym('b'), State(1), Tropical(3))
fsa.add_arc(State(1), Sym('c'), State(3), Tropical(5))

fsa.add_arc(State(2), Sym('b'), State(2), Tropical(3))
fsa.add_arc(State(2), Sym('d'), State(3), Tropical(6))

fsa.set_I(State(0))
fsa.set_F(State(3))

fsa

Determinize the automaton above.

The `Transformer` class in `rayuela` contains many transfromation operations on a single WFSA, including determinization.

In [None]:
Transformer.determinize(fsa)

## Example 5: The Twins Property

The twins property

A non-subsequential weighted finite state automaton.

In [None]:
fsa = FSA(Tropical)

fsa.add_arc(State(0), Sym('a'), State(1), Tropical(1))
fsa.add_arc(State(0), Sym('a'), State(2), Tropical(2))

fsa.add_arc(State(1), Sym('b'), State(1), Tropical(3))
fsa.add_arc(State(1), Sym('c'), State(3), Tropical(5))

fsa.add_arc(State(2), Sym('b'), State(2), Tropical(4))
fsa.add_arc(State(2), Sym('d'), State(3), Tropical(6))

fsa.set_I(State(0))
fsa.set_F(State(3))

fsa

In [None]:
Transformer.twins(fsa)

In [None]:
fsa = FSA(Tropical)

fsa.add_arc(State(0), Sym('a'), State(1), Tropical(1))
fsa.add_arc(State(0), Sym('a'), State(2), Tropical(2))

fsa.add_arc(State(1), Sym('b'), State(1), Tropical(3))
fsa.add_arc(State(1), Sym('c'), State(3), Tropical(5))

fsa.add_arc(State(2), Sym('b'), State(2), Tropical(3))
fsa.add_arc(State(2), Sym('d'), State(3), Tropical(6))

fsa.set_I(State(0))
fsa.set_F(State(3))

fsa

In [None]:
Transformer.twins(fsa)