# Overview

This module focuses on converting NFA to RE. This is achieved by eliminating states one by one, and building an abstract syntax tree for the RE generated at each stage. Finally, the abstract syntax tree is rendered into an RE string.

# Top-level functions in this module

```
This module contains the following functions that may be used in other modules to exercise concepts, compose functions, etc.

N  : NFA
D  : DFA
G  : GNFA

def mk_gnfa(N):
def mk_gnfa_from_D(D):
def dfa2nfa(D):
def del_gnfa_states(Gin):
```

In [None]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import sys

# -- Detect if in Own Install or in Colab
try:
    import google.colab
    OWN_INSTALL = False
except:
    OWN_INSTALL = True
    
if OWN_INSTALL:
    
  #---- Leave these definitions ON if running on laptop
  #---- Else turn OFF by putting them between ''' ... '''

  sys.path[0:0] = ['../../../../..',  '../../../../../3rdparty',  
                   '../../../..',  '../../../../3rdparty',  
                   '../../..',     '../../../3rdparty', 
                   '../..',        '../../3rdparty',
                   '..',           '../3rdparty' ]

else: # In colab
  ! if [ ! -d Jove ]; then git clone https://github.com/anon-Jove/Jove Jove; fi
  sys.path.append('./Jove')
  sys.path.append('./Jove/jove')

# -- common imports --
from jove.Module5_RE  import re2nfa

from jove.Module4_NFA import nfa2dfa

from jove.Module4_NFA import min_dfa_brz

from jove.Module3_DFA import min_dfa

from jove.SystemImports import *    

from jove.DotBashers  import *
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

## Algorithm

The algorithm is spelled out along these major steps:

* If given a DFA, convert it to an NFA, else proceed with given NFA

* Pick a state to eliminate 

    - doing this interactively, state by state, would make for a good interactive animation

* Find replacement paths for the state

* Do this till all states except GNFA's initial and final are gone
 

In [None]:
def opr(E):
    return E[0]

def arg1(E):
    return E[1][0]

def arg2(E):
    return E[1][1]

def arg(E):
    return E[1]

In [None]:
def RE2Str(RE):
    """Given an RE as a tree, return the string equivalent of the RE.
    """
    if type(RE) == str:
        if (RE == ""):
            return '""'  # was return '@', but now no more '@'
        else:
            return RE
    elif type(RE) == tuple:
        if opr(RE) == "*":
            return( "("+ RE2Str(arg(RE)) + ")*")
        elif opr(RE) == "+":
            return ( "("+ RE2Str(arg1(RE)) + " + " +
                          RE2Str(arg2(RE)) +")" )
        elif opr(RE) == ".":
            return ( "("+ RE2Str(arg1(RE)) + " "  +
                          RE2Str(arg2(RE)) +
                     ")" )
        else:
            print("Illegal RE detected in RETree2Str")

In [None]:
RE2Str("")

In [None]:
def mk_gnfa(Nin):
    """Input : Nin, an NFA.
       Output: G, a GNFA, with at-most one transition from any
               state p to a state q.??true any more?? Note that we have created
               an NFA (G+NFA), and so one state can have a transition
               to A SET OF STATES !!
       Method: Add a new set of states {Real_I} and another, {Real_F}.
               Move from state Real_I to the NFA's initial set of states
               upon epsilon, and from each state in NFA's F to Real_F.
               Return this GNFA.
               We will keep GNFA's alphabet implicit (whatever edge
               labels exist will be deemed to be in the alphabet.)
    """
    assert(
        is_consistent_nfa(Nin)
    ), "NFA given to mk_gnfa is not consistent."
    N       = copy.deepcopy(Nin)
    GNFA_Q0 = {"Real_I"} # Name Real_I reserved for GNFA's starting state
    GNFA_F  = {"Real_F"} # Name Real_F reserved for GNFA's final state
    GNFA_Q  = N["Q"] | GNFA_Q0 | GNFA_F
    # Start with NFA's moves in Delta accumulator
    GNFA_Delta = N["Delta"]
    # Add a jump from Real_I to the original initial state
    GNFA_Delta.update({ ("Real_I","") : N["Q0"] })   
    # Add all "original final" to "Real_F" moves
    GNFA_Delta.update({ (f, "") : GNFA_F for f in N["F"] })    
    # Return the GNFA
    return { "Q"     : GNFA_Q,
             "Sigma" : N["Sigma"],
             "Delta" : GNFA_Delta,
             "Q0"    : GNFA_Q0,
             "F"     : GNFA_F }

def mk_gnfa_from_D(D):
    """Given a DFA D, turn that into a GNFA by first making the D
       into an equivalent N, and then passing onto mk_gnfa.
    """
    assert(
    is_partially_consistent_dfa(D)
    ), "DFA given to mk_gnfa_from_D is not part. consist."
    return mk_gnfa(dfa2nfa(D))

def dfa2nfa(D):
    """Given a DFA D, make a language-equivalent NFA.
    """
    assert(
    is_partially_consistent_dfa(D)
    ), "DFA given to dfa2nfa is not part. consist."
    return { "Q"     : D["Q"],
             "Sigma" : D["Sigma"],
             "Delta" : dict((a,{b}) for (a,b) in D["Delta"].items()),
             "Q0"    : { D["q0"] },
             "F"     : D["F"] }   

def del_gnfa_states(Gin):
    """Given a GNFA G with no unreachable states, 
       delete all states but f
       or Real_I and Real_F.
       
       Return a triple (Gfinal, dotObj_List, final_re_str), where
         Gfinal       : the final GNFA
         dotObj_List  : a list of Dot objects recording the process of
                        deleting states and forming intermediate REs
         final_re_str : the final RE as a string (ready to be fed to  
                        re2nfa for converting back to an NFA)
    """
    G = copy.deepcopy(Gin) # To preserve the given GNFA
    StatesLeft  = G["Q"]
    dotObj_List = [ dotObj_gnfa(G) ] # List of intermediate GNFAs  
    while len(StatesLeft) > 2: # Exists one more than Real_I,Real_F
        (qdel, StatesLeft) = choose_state_to_del(G, StatesLeft)
        print("**** Eliminating state " + qdel + " ****")
         
        New_Edges = dict() #-- Brand new edges; ALL new paths supported 
                           #-- by qdel 
        for p in StatesLeft:
            for q in StatesLeft:
                new_p_q_label = del_one_gnfa_state(G, p, qdel, q)
                if new_p_q_label != "NOPATH": # There is a p-qdel->q path
                    old_p_q_labels = Edges_Exist_Via(G, p, q) # Exist p-qdel->q edges?
                    if old_p_q_labels != "NOEDGE":            # There are.
                        combined_label = form_alt_RE( [new_p_q_label] + old_p_q_labels )                                    
                        New_Edges.update( { (p, combined_label) : {q} } )
                    else:
                        # Only new_p_q_label needs to be added
                        New_Edges.update( { (p, new_p_q_label)  : {q} } )
                #else no new path involving qdel exists for THIS p,q pair
            #-end for
        #-end for
        G["Q"] = StatesLeft   # Fix G by adjusting its Q 
        
        # Extinguish qdel from Delta by (1) and (2) below
        Surviving_Edges = []  # These edges don't get nuked
        for ((q,symb), States) in G["Delta"].items():
            if (q != qdel): # (1) Removing all mappings out of qdel
                Surviving_Edges += [ ((q,symb), States - { qdel }) ] # (2) Remove from images  
        G["Delta"] = dict( Surviving_Edges )
        
        # Now bring in the brand new edges
        G["Delta"].update( New_Edges )
        
        # Stringify the REs in the G to display at the end
        dotObj_List += [ dotObj_gnfa( gnfa_w_REStr(G) ) ]
    #Finish while loop and then return
    
    #-- What is in G's Delta as edge-labels now is what G's Sigma is
    G["Sigma"] = { edgelab for ((p,edgelab), q) in G["Delta"].items() }
    
    #-- Merge edge labels of all paths from Real_I to Real_F into one
    final_re     = form_alt_RE(Edges_Exist_Via(G, "Real_I", "Real_F"))
    final_re_str = RE2Str(final_re)
  
    #-- Make a relevant GNFA retaining only Real_I, Real_F and one connection
    Gfinal = {"Q"     : {"Real_I", "Real_F"},
              "Sigma" : {final_re},
              "Delta" : { ("Real_I", final_re) : {"Real_F"} },
              "Q0"    : { "Real_I" },
              "F"     : { "Real_F" }
             }
    
    #-- Return the triple Gfinal, dotObj_List, final_re_str 
    return (Gfinal, dotObj_List, final_re_str)

def gnfa_w_REStr(G):
    """Given a GNFA G, return a GNFA with the RE Trees labeling its
       edges replaced by RE strings.
    """
    Gstr = copy.deepcopy(G)
    NewDelta = []
    for ((q, RE), States) in G["Delta"].items():
        NewDelta += [ ( (q, RE2Str(RE)), States ) ]
    Gstr["Delta"] = dict( NewDelta )
    return Gstr

            
def del_one_gnfa_state(G, p, qdel, q):   
    """Delete state qdel if path p--qdel-->q exists.
       Return "NOPATH" if no such path.
       Else return new direct edge label p--new_label-->q.
       new_label will be a single RE.
    """
    #print("G,p,qdel,q", G,p,qdel,q)
    p_qdel_edges = Edges_Exist_Via(G, p, qdel)
    qdel_q_edges = Edges_Exist_Via(G, qdel, q) 

    if (p_qdel_edges == "NOEDGE" or qdel_q_edges == "NOEDGE"):
        return "NOPATH"
    else:
        p_qdel_RE = form_alt_RE(p_qdel_edges)
        qdel_q_RE = form_alt_RE(qdel_q_edges)
        
        qdel_qdel_edges = Edges_Exist_Via(G, qdel, qdel)
        if qdel_qdel_edges == "NOEDGE":
            return form_concat_RE(p_qdel_RE, qdel_q_RE)  
        else:
            qdel_qdel_RE = form_alt_RE(qdel_qdel_edges)
            return form_concat_RE(p_qdel_RE,
                                  form_concat_RE(
                                    form_kleene_RE(qdel_qdel_RE),
                                    qdel_q_RE))
        
def Edges_Exist_Via(G, p, q):
    """If G has a direct edge p--edgelab-->q, return edgelab.
       Else return "NOEDGE". We maintain the invariant of
       at-most one edge such as edgelab for any p,q in the GNFA.
    """
    edges = [ edge 
              for ((x, edge), States) in G["Delta"].items() 
              if x==p and q in States ]
    if len(edges) == 0:
        return "NOEDGE"
    else:
        return edges
   
# Make this interactive later.. menu-selectible
def choose_state_to_del(G, StatesLeft):
    """Given a GNFA G and a set of StatesLeft,
       choose first eligible state to delete, and return it
       plus the set of non-deleted entries. 
       Called only if there is an eligible state to be deleted.
    """
    for q in StatesLeft: 
        if (q not in G["Q0"] | G["F"]):
            # There is one eligible state to delete
            return ( q, {x for x in StatesLeft if x != q} )


def form_alt_RE(RElist):
    """Given a non-empty RElist, merge them all using a binary
       tree formed with root '+' and interior nodes x,y.
    """
    fst = RElist[0]
    rst = RElist[1:]
    if len(RElist) > 1:
        if fst in rst:
            return form_alt_RE(st) # remove duplicates
        else:
            return ('+', (fst, form_alt_RE(rst)))
    else:
        return fst
    
def form_concat_RE(re1, re2):
    """Given two non-eps REs, form their concatenation.
    """
    if re1=="":
        return re2
    elif re2=="":
        return re1
    else:
        return ('.', (re1, re2))

def form_kleene_RE(re):
    """Given a non-eps RE, form its star.
    """
    if re=="":
        return re
    else:
        return ('*', re)   

In [None]:
RE2Str("")

In [None]:
re2nfa( "''*")

In [None]:
n0 = {'Delta': {('St1', ''): {'St2'}, ('St2', ''): {'St1'}},
 'F': {'St2'},
 'Q': {'St1', 'St2'},
 'Q0': {'St2'},
 'Sigma': set()}

In [None]:
nfa2dfa(n0)

In [None]:
dotObj_nfa(n0)

In [None]:
n0

In [None]:
dotObj_nfa( re2nfa( "''*") )

In [None]:
dotObj_dfa( nfa2dfa(re2nfa( "''*") ) )

In [None]:
NFA6 =  {'Q': {'I', 'F'},
         'Sigma': {'0', '1'},
         'Delta': {  
                   ('I', '0') : {'F'},
                   ('F', '1') : {'I'}
                   },
         'Q0': {'I'}, 
         'F' : {'F'}   
}
is_consistent_nfa(NFA6)
is_consistent_nfa(NFA6)
GN6 = mk_gnfa(NFA6)
(GF6, do6, restr6) = del_gnfa_states(GN6)
#do6[2]

In [None]:
restr6

In [None]:
dotObj_gnfa(GN6)

In [None]:
do6[0]

In [None]:
RE2Str(("*","a"))

In [None]:
RE2Str(("+",("a", "b")))

In [None]:
RE2Str((".",("", "")))

In [None]:
RE2Str((".",("a", "b")))

In [None]:
RE2Str(("+",( ("+",("a", "b")) , (".",("c", ("*","d") )) ) ) )

In [None]:
D34bl = { 'Q': {'A', 'IF', 'B', 'A1', 'B1'},
  'Sigma': {'0', '1'},
  'Delta': { ('IF', '0'): 'A',
             ('IF', '1'): 'IF',
             ('A', '0'): 'B1',
             ('A', '1'): 'A1',
             ('A1', '0'): 'B',
             ('A1', '1'): 'A',
             ('B1', '0'): 'IF',
             ('B1', '1'): 'B',
             ('B','0') : 'IF',
             ('B', '1'): 'B1' },
  'q0': 'IF', 
  'F': {'IF'}   
}

N1  = dfa2nfa(D34bl)
is_consistent_nfa(N1)

In [None]:
GN1 = mk_gnfa(N1)
is_consistent_nfa(N1)

In [None]:
DFA_fig34 = { 'Q': {'A', 'IF', 'B'},
              'Sigma': {'0', '1'},
              'Delta': { ('IF', '0'): 'A',
                         ('IF', '1'): 'IF',
                         ('A', '0'): 'B',
                         ('A', '1'): 'A',
                         ('B', '0'): 'IF',
                         ('B', '1'): 'B' },
              'q0': 'IF', 
              'F': {'IF'}   
             }

NFA_fig34 = dfa2nfa(DFA_fig34)

GN2 = mk_gnfa(dfa2nfa(DFA_fig34))



N3  = dfa2nfa(D34bl)
is_consistent_nfa(N3)
GN3 = mk_gnfa(N3)

In [None]:
DFmulti   = { 'Q': {'I', 'F', 'A'},
              'Sigma': {'0', '1'},
              'Delta': { ('I', '0') :  'A',
                         ('I', '1') :  'A',
                         ('A', '0') :  'A',
                         ('A', '1') :  'F',
                         ('F', '0') :  'I',
                         ('F', '1') :  'I' },
              'q0': 'I', 
              'F': {'F'}   
             }
GN4 = mk_gnfa(dfa2nfa(DFmulti))

In [None]:
dotObj_nfa(GN1)

In [None]:
(GNf, doL, restr1) = del_gnfa_states(GN1)

In [None]:
form_alt_RE(Edges_Exist_Via(GNf, "Real_I", "Real_F"))

In [None]:
Edges_Exist_Via(GNf, "Real_I", "Real_F")

In [None]:
doL

In [None]:
dotObj_gnfa(GN1)

In [None]:
doL[0]

In [None]:
doL[1]

In [None]:
doL[2]

In [None]:
doL[3]

In [None]:
restr1

In [None]:
Edges_Exist_Via(GN1, "I", "A")

In [None]:
dotObj_gnfa(GN2)

In [None]:
(GNf2, doL2, restr2) = del_gnfa_states(GN2)

In [None]:
doL2[0]

In [None]:
doL2[1]

In [None]:
doL2[2]

In [None]:
doL2[3]

In [None]:
dotObj_gnfa(GN3)

In [None]:
(GNf3, doL3, restr3) = del_gnfa_states(GN3)

In [None]:
GNf3

In [None]:
restr3

In [None]:
Es = Edges_Exist_Via(GNf3, "Real_I", "Real_F")

In [None]:
Es[0]

In [None]:
doL3[0]

In [None]:
doL3[1]

In [None]:
doL3[2]

In [None]:
doL3[3]

In [None]:
doL3[4]

In [None]:
doL3[5]

In [None]:
dotObj_nfa(GN3)

In [None]:
(GNf3, doL3, restr3) = del_gnfa_states(GN3)

In [None]:
restr3

In [None]:
del_one_gnfa_state(GN3, "B", "B1", "B")

In [None]:
del_one_gnfa_state(GN3, "B", "IF", "A")

In [None]:
GN4 = mk_gnfa(NFA_fig34)
dotObj_nfa(GN4)

In [None]:
NFA_fig34

In [None]:
(GNf4, doL4, restr4) = del_gnfa_states(GN4)

In [None]:
doL4[0]

In [None]:
doL4[1]

In [None]:
doL4[2]

In [None]:
doL4[3]

In [None]:
restr4

In [None]:
del_one_gnfa_state(GN4, "B", "IF", "A")

In [None]:
del_one_gnfa_state(GN4, "B", "IF", "Real_I")

In [None]:
NFA_fig61ab = \
{ 'Q': {'I', 'S0', 'S1', 'S2', 'F'},
  'Sigma': {'0', '1'},
  'Delta': { # 
             ('I', '0')  : {'I'},
             ('I', '1')  : {'I', 'S0'},
             ('I', '')   : {'S0'},
             #
             ('S0', '1') : {'S1'},
             #
             ('S1', '0') : {'S2'},
             ('S1', '1') : {'S2'},
             #
             ('S2', '0') : {'F'},
             ('S2', '1') : {'F'},
           },
  'Q0': {'I'}, 
  'F' : {'F'}   
}

In [None]:
GN5 = mk_gnfa(NFA_fig61ab)
dotObj_gnfa(GN5)

In [None]:
(GNf5, doL5, restr5) = del_gnfa_states(GN5)

In [None]:
GNf5

In [None]:
restr5

In [None]:
dotObj_dfa(min_dfa(nfa2dfa(re2nfa(restr5))))

In [None]:
form_alt_RE( Edges_Exist_Via(GNf5, "Real_I", "Real_F") )

In [None]:
restr5

In [None]:
RE2Str(restr5)

In [None]:
Edges_Exist_Via(GNf5, "Real_I", "Real_F")

In [None]:
GNf5

In [None]:
doL5[0]

In [None]:
doL5[1]

In [None]:
doL5[2]

In [None]:
doL5[3]

In [None]:
doL5[4]

In [None]:
doL5[5]

In [None]:
restr5

In [None]:
re2nfa(restr5)

In [None]:
NFA6 =  {'Q': {'I', 'F'},
         'Sigma': {'0', '1'},
         'Delta': {  
                   ('I', '0') : {'F'},
                   ('F', '1') : {'I'}
                   },
         'Q0': {'I'}, 
         'F' : {'F'}   
}
GN6 = mk_gnfa(NFA6)
del_gnfa_states(GN6)

In [None]:
GN6 = mk_gnfa(NFA6)
dotObj_gnfa(GN6)

In [None]:
del_gnfa_states(GN6)

In [None]:
(Gf6, do6, restr6) = del_gnfa_states(GN6)

In [None]:
do6[0]

In [None]:
do6[1]

In [None]:
do6[2]

In [None]:
del_one_gnfa_state(GN6, "I", "F", "I")

In [None]:
del_one_gnfa_state(GN6, "I", "F", "Real_F")

In [None]:
dotObj_dfa(min_dfa(nfa2dfa(re2nfa( "1*01*(01*01*)*" ))))

In [None]:
dotObj_dfa(min_dfa(nfa2dfa(re2nfa( "0*(10*10*)*" ))))

In [None]:
even1sdfa = min_dfa(nfa2dfa(re2nfa( "0*(10*10*)*" )))

In [None]:
dotObj_dfa(min_dfa(nfa2dfa(re2nfa( "0*(10*10*)*  +  1*01*(01*01*)*" ))))

In [None]:
def dfa2re(D):
    return del_gnfa_states(mk_gnfa_from_D(D))

In [None]:
(GNev1, doev1, restrev1) = dfa2re(even1sdfa)

In [None]:
restrev1

In [None]:
minev1 = min_dfa(nfa2dfa(re2nfa( "0*(10*10*)*" )))
(Gev1, doev1, reev1) = del_gnfa_states(mk_gnfa_from_D(minev1))
minev2 = min_dfa(nfa2dfa(re2nfa(reev1)))

In [None]:
dotObj_dfa(minev1)

In [None]:
dotObj_dfa(minev2)

In [None]:
dotObj_dfa(min_dfa(nfa2dfa(re2nfa( "((0+((1((0)*1))+0)))*" ))))

In [None]:
dotObj_nfa(re2nfa("''"))

In [None]:
dotObj_nfa(re2nfa("''*"))

In [None]:
dotObj_nfa(re2nfa("''*"), visible_eps = True)

In [None]:
re2nfa("''*")

In [None]:
min_dfa_brz

In [None]:
dotObj_dfa(min_dfa_brz(nfa2dfa(re2nfa( "((0+((1((0)*1))+0)))* + '' " ))))

In [None]:
dotObj_dfa(min_dfa_brz(nfa2dfa(re2nfa( "((0+((1((0)*1))+0)))* + '' " ))))

In [None]:
"0*(10*10*)*"

In [None]:
ev1od0 = "0*(10*10*)* + 1*01*(01*01*)*"

In [None]:
minb_ev1od0 = min_dfa_brz(nfa2dfa(re2nfa(ev1od0)))

In [None]:
mino_ev1od0 = min_dfa(nfa2dfa(re2nfa(ev1od0)))

In [None]:
dotObj_dfa(minb_ev1od0)

In [None]:
dotObj_dfa(mino_ev1od0)

In [None]:
del_gnfa_states

In [None]:
mk_gnfa_from_D

In [None]:
minb_ev1od0 = min_dfa_brz(nfa2dfa(re2nfa(ev1od0)))
dotObj_dfa(minb_ev1od0)

In [None]:
(Ge1o0, do_e1o0, re_e1o0) = del_gnfa_states(mk_gnfa_from_D(minb_ev1od0))

In [None]:
do_e1o0[0]

In [None]:
do_e1o0[1]

In [None]:
do_e1o0[2]

In [None]:
do_e1o0[3]

In [None]:
do_e1o0[4]

In [None]:
re_e1o0

In [None]:
dotObj_nfa(re2nfa(re_e1o0))

In [None]:
dotObj_dfa(nfa2dfa(re2nfa(re_e1o0)))

In [None]:
dotObj_dfa(min_dfa_brz(nfa2dfa(re2nfa(re_e1o0))))