In [None]:
%%HTML
<style>
.container { width: 100% }
</style>

# From Regular Expressions to <span style="font-variant:small-caps;">Fsm</span>s

The class `RegExp2NFA` administers two member variables:
- `Sigma` is the <em style="color:blue">alphabet</em>, i.e. the set of characters used.
- `StateCount` is a counter that is needed to create unique state names.

In [None]:
class RegExp2NFA:
    def __init__(self, Sigma):
        self.Sigma      = Sigma
        self.StateCount = 0

The member function `toNFA` takes an object `self` of class `RegExp2NFA` and a regular expression `r` and returns a finite state machine 
that accepts the same language as described by `r`.  The regular expression is represented in `Python` as follows:
- The regular expression $\emptyset$ is represented as the number `0`.
- The regular expression $\varepsilon$ is represented as the empty string `''`.
- The regular expression $c$ that matches the character $c$ is represented by the character $c$.
- The regular expression $r_1 \cdot r_2$  is represented by the triple $\bigl(\texttt{'cat'}, \texttt{repr}(r_1), \texttt{repr}(r_2)\bigr)$.

  Here, and in the following, for a given regular expression $r$ the expression $\texttt{repr}(r)$ denotes the `Python` representation of the regular 
  expressions  $r$.
- The regular expression $r_1 + r_2$  is represented by the triple $\bigl(\texttt{'or'}, \texttt{repr}(r_1), \texttt{repr}(r_2)\bigr)$.
- The regular expression $r^*$  is represented by the pair $\bigl(\texttt{'star'}, \texttt{repr}(r)\bigr)$.

In [None]:
def toNFA(self, r):
    if r == 0: 
        return self.genEmptyNFA()
    if r == '': 
        return self.genEpsilonNFA()
    if isinstance(r, str) and len(r) == 1: 
        return self.genCharNFA(r)
    if r[0] == 'cat':
        return self.catenate(self.toNFA(r[1]), self.toNFA(r[2]))
    if r[0] == 'or':
        return self.disjunction(self.toNFA(r[1]), self.toNFA(r[2]))
    if r[0] == 'star':
        return self.kleene(self.toNFA(r[1]))
    raise ValueError(f'{r} is not a proper regular expression.')
    
RegExp2NFA.toNFA = toNFA
del toNFA

<span style="font-variant:small-caps;">Fsm</span> `genEmptyNFA` is defined as
$$\langle \{ q_0, q_1 \}, \Sigma, \{\}, q_0, \{ q_1 \} \rangle. $$
Note that this <span style="font-variant:small-caps;">Fsm</span> has no transitions at all.

In [None]:
def genEmptyNFA(self):
        q0 = self.getNewState()
        q1 = self.getNewState()
        delta = {}
        return {q0, q1}, self.Sigma, delta, q0, { q1 }

RegExp2NFA.genEmptyNFA = genEmptyNFA
del genEmptyNFA

The <span style="font-variant:small-caps;">Fsm</span> `genEpsilonNFA` is defined as
$$\langle \{ q_0, q_1 \}, \Sigma, \{ \langle q_0, \varepsilon\rangle \mapsto q_1 \}, q_0, \{ q_1 \} \rangle.$$

In [None]:
def genEpsilonNFA(self):
    q0 = self.getNewState()
    q1 = self.getNewState()
    delta = { (q0, ''): {q1} }
    return {q0, q1}, self.Sigma, delta, q0, { q1 }

RegExp2NFA.genEpsilonNFA = genEpsilonNFA 
del genEpsilonNFA

For a letter $c \in \Sigma$ the <span style="font-variant:small-caps;">Fsm</span> `genCharNFA`$(c)$ is defined as 
$$ A(c) = \langle \{ q_0, q_1 \}, \Sigma, 
                                \{ \langle q_0, c \rangle \mapsto q_1\}, q_0, \{ q_1 \} \rangle. $$

In [None]:
def genCharNFA(self, c):
    q0 = self.getNewState()
    q1 = self.getNewState()
    delta = { (q0, c): {q1} } 
    return {q0, q1}, self.Sigma, delta, q0, { q1 }

RegExp2NFA.genCharNFA = genCharNFA
del genCharNFA

In [None]:
def catenate(self, f1, f2):
        M1, Sigma, delta1, q1, A1 = f1
        M2, Sigma, delta2, q3, A2 = f2
        q2 = arb(A1)
        q4 = arb(A2)
        delta = dict_union(delta1, delta2)
        delta[q2, ''] = {q3}
        return M1 | M2, Sigma, delta, q1, A2

RegExp2NFA.catenate = catenate
del catenate

In [None]:
def disjunction(self, f1, f2):
        M1, Sigma, delta1, q1, A1 = f1
        M2, Sigma, delta2, q2, A2 = f2
        q3 = arb(A1)
        q4 = arb(A2)
        q0 = self.getNewState()
        q5 = self.getNewState() 
        delta = dict_union(delta1, delta2)
        delta[q0, ''] = { q1, q2 }
        delta[q3, ''] = { q5 }
        delta[q4, ''] = { q5 }
        return { q0, q5 } | M1 | M2, Sigma, delta, q0, { q5 }
    
RegExp2NFA.disjunction = disjunction
del disjunction

In [None]:
def kleene(self, f):
    M, Sigma, delta0, q1, A = f
    q2 = arb(A)
    q0 = self.getNewState()
    q3 = self.getNewState()
    delta = delta0
    delta[q0, ''] = { q1, q3 }
    delta[q2, ''] = { q1, q3 }
    return { q0, q3 } | M, Sigma, delta, q0, { q3 }

RegExp2NFA.kleene = kleene
del kleene

In [None]:
def getNewState(self):
    self.StateCount += 1
    return self.StateCount

RegExp2NFA.getNewState = getNewState
del getNewState

In [None]:
def arb(S):
    for x in S:
        return x

In [None]:
def dict_union(d1, d2):
    return { **d1, **d2 }

In [None]:
A = { 'a': 1, 'c': 3 }
B = { 'b': 2, 'c': 4 }
dict_union(A, B)

In [None]:
A, B