# NFA and DFA algorithms

Here is a dfa with 2 states, 0 and 1. where 0 is both a start and a final state

In [9]:
dfa1 = {'start':0,'final':{0},'edges':{(0,1,'a'),(1,0,'b'),(1,0,'c')}}


# Algorithm to check if a DFA accepts a string

In [118]:
def edge(dfa,state,c):
    next_states = [e[1] for e in dfa['edges'] if e[0]==state and e[2]==c]
    if len(next_states)==1:
        return next_states[0]
    else:
        return 'error'

def accepts(dfa,chars):
    state = dfa['start']
    for i in range(len(chars)):
        c = chars[i]
        state1 = edge(dfa,state,c)
        print('traverse edge',state,c,state1)
        if state1=='error':
            return False
        else:
            state=state1
    return state in dfa['final']
    

In [119]:
accepts(dfa1,'abac')

traverse edge 0 a 1
traverse edge 1 b 0
traverse edge 0 a 1
traverse edge 1 c 0


True

# The epsilon closure of a set of nodes

In [165]:
def epsilon_edge(NFA,S):
    ''' returns all states reachable from a state in s by an edge labelled epsilon '''
    return {e[1] for e in NFA['edges'] if e[0] in S and e[2]=='epsilon'}

def closure(NFA,S):
    ''' returns set of states reachable from a state in S following epsilon edges'''
    T=S
    next_states = epsilon_edge(NFA,S)
    print('epsilon edges',T,'epsilon',T.union(next_states))
    while not next_states.issubset(T):
        T = T.union(next_states)
        next_states = epsilon_edge(NFA,T)
        print('epsilon edges',T,'epsilon',T.union(next_states))
    return T
     


In [46]:
nfa1 = {'start':0,'final':{0,2},'edges':{(0,1,'a'),(1,0,'b'),(1,0,'c'),(1,2,'b'),(2,0,'c')}}

In [68]:
nfa2 = {'start':0,'final':{0,2},
        'edges':{(0,1,'epsilon'),(1,2,'epsilon'),(1,3,'epsilon'),(2,3,'epsilon'),
                 (1,3,'a'),(2,2,'a'),(3,3,'b'),(2,3,'c'),(3,2,'epsilon')}}

In [166]:
closure(nfa2,{0})

epsilon edges {0} epsilon {0, 1}
epsilon edges {0, 1} epsilon {0, 1, 2, 3}
epsilon edges {0, 1, 2, 3} epsilon {0, 1, 2, 3}


{0, 1, 2, 3}

# NFA accepting a string

In [167]:
def dfa_edge(nfa,s,c):
    ''' returns closure of all states reachable from a state in s by an edge labelled c '''
    s1 = {e[1] for e in nfa['edges'] if e[0] in s and e[2]==c}
    s2 = closure(nfa,s1)
    return s2

def is_final(nfa,state):
    ''' return True if the nfa state is a final state '''
    return len(state.intersection(nfa['final']))>0

def nfa_accepts(nfa,chars):
    ''' returns True if the nfa accepts the string of chars '''
    state = closure(nfa,{nfa['start']})
    for i in range(len(chars)):
        c = chars[i]
        state1=state
        state = dfa_edge(nfa,state,c)
        print('dfa_edge',state1,c,state)
    return is_final(nfa,state)

In [163]:
print(nfa1)
nfa_accepts(nfa1,'abcabac')

{'start': 0, 'final': {0, 2}, 'edges': {(0, 1, 'a'), (2, 0, 'c'), (1, 2, 'b'), (1, 0, 'b'), (1, 0, 'c')}}
epsilon edges {0} epsilon {0}
epsilon edges {1} epsilon {1}
dfa_edge {0} a {1}
epsilon edges {0, 2} epsilon {0, 2}
dfa_edge {1} b {0, 2}
epsilon edges {0} epsilon {0}
dfa_edge {0, 2} c {0}
epsilon edges {1} epsilon {1}
dfa_edge {0} a {1}
epsilon edges {0, 2} epsilon {0, 2}
dfa_edge {1} b {0, 2}
epsilon edges {1} epsilon {1}
dfa_edge {0, 2} a {1}
epsilon edges {0} epsilon {0}
dfa_edge {1} c {0}


True

In [164]:
nfa_accepts(nfa2,'abaac')

epsilon edges {0} epsilon {0, 1}
epsilon edges {0, 1} epsilon {0, 1, 2, 3}
epsilon edges {0, 1, 2, 3} epsilon {0, 1, 2, 3}
epsilon edges {2, 3} epsilon {2, 3}
dfa_edge {0, 1, 2, 3} a {2, 3}
epsilon edges {3} epsilon {2, 3}
epsilon edges {2, 3} epsilon {2, 3}
dfa_edge {2, 3} b {2, 3}
epsilon edges {2} epsilon {2, 3}
epsilon edges {2, 3} epsilon {2, 3}
dfa_edge {2, 3} a {2, 3}
epsilon edges {2} epsilon {2, 3}
epsilon edges {2, 3} epsilon {2, 3}
dfa_edge {2, 3} a {2, 3}
epsilon edges {3} epsilon {2, 3}
epsilon edges {2, 3} epsilon {2, 3}
dfa_edge {2, 3} c {2, 3}


True

# Homework problem 1: longest match
Define the function longest match which returns the longest prefix of a string accepted by the NFA (or DFA)

# NFA to DFA
To convert an NFA to a DFA we do a breadth first search of the graph of NFA states
that are reachable from the start state with dfa_edge

# Homework problem 2: nfa to dfa
Write the nfa_to_dfa(nfa) function

# NFA with multiple final states
A tokenizer will have different final states for different tokens
and the final states should be listed in priority, so if a token matches 2 final state,
then the first one determines the token. 

# Homework problem 3: Tokenizer
Write a tokenizer which accepts a list of token definitions of the form
```
(token_name,  NFA)
```
and generates a DFA for recognizing those tokens!