In [42]:

def save_file(filename, txt):
    f = open(filename, "w")
    f.write(txt)
    f.close()

In [43]:
class SPPF:
    def __init__(self, tag="", children=None):
        self.tag = tag
        self.visited = False
        self.children = children if children is not None else []
        
    def to_dict(self):
        res = {}
        res["tag"] = self.tag
        res["children"] = [ [ y.to_dict() for y in x] for x in self.children ]
        return res

class State:
    def __init__(self):
        self.LHS = ""
        self.RHS = []
        self.pos = -1; # 0 means the beginning
        self.origin = 0
        self.SPPF = SPPF()

    #debug only
    def str(self):
        res = self.LHS + " -> "
        for i, x in enumerate(self.RHS):
            if i == self.pos:
                res += ". "
            res  += x + " "
        if "." not in res:
            res += ". "
        res += "| " + str(self.origin)
        #print( "\t", res )
        return res

    def __eq__(self, other):
        return self.LHS == other.LHS and self.RHS == other.RHS and self.pos == other.pos and self.origin == other.origin

In [70]:
def state_to_cpp(state, SET_ID, POS_IN_SET ):
    templ = """states[{{SET_ID}}].insert({"{{LHS}}", {{RHS}},{{TAB}} {{POS}}, {{ORIGIN}}, {{SPPF}}, {{POS_IN_SET}}});"""
    return templ\
        .replace("{{STRING}}", state.str())\
        .replace("{{SET_ID}}", str(SET_ID))\
        .replace("{{LHS}}", state.LHS)\
        .replace("{{RHS}}", str(state.RHS).replace("[", "{").replace("]","}").replace("'", '"'))\
        .replace("{{TAB}}", "\t" if len(state.RHS) != 1 else "\t\t")\
        .replace("{{POS}}", str(state.pos))\
        .replace("{{ORIGIN}}", str(state.origin))\
        .replace("{{SPPF}}", "nullptr")\
        .replace("{{POS_IN_SET}}", str(POS_IN_SET))

In [71]:
def read_rules(string):
    res = {}
    for rule in string.split("\n"):
        head, body = rule.split("->")
        head = head.strip()
        body = [ x.strip() for x in body]
        body = [ x for x in body if x != ""]
        if head not in res:
            res[head] = []
        res[head].append(body)
    return res

In [72]:
def all_combs_of_2(list1, list2):
    #list1 is a list of lists, list2 is just list
    res = []
    for x in list1:
        for y in list2:
            res.append( x + [y])
    return res
    
def all_combs(lists):
    #function that takes a list L of lists l0,l1,l2... and returns of combination of their elements
    res = [ [x] for x in lists[0]]
    for x in lists[1:]:
        res = all_combs_of_2(res, x)
    return res

def unpack(tree):
    res = []
    if len(tree.children) == 0:
        res = [tree.tag]
    for alt in tree.children:
        children = [ unpack(child) for child in alt] #children is a list of lists
        combinations = all_combs(children)
        for x in combinations:
            res.append (tree.tag + "["+ " ".join(x) + "]")
    return res
        

In [73]:
import copy
class Parser:
    def __init__(self):
        self.sets = []

    def _debug(self, i):
            #debugging 
            print("-" * 30, i, "-"*30)
            for s in self.sets[i]:
                s.str()
                
    def parse(self, word, rules, head):
        self.sets = [ [] for x in range(len(word)+1) ]
        #add the first state
        for RHS in rules[head]:
            state = State()
            state.LHS = head
            state.RHS = RHS
            state.pos = 0
            state.origin = 0
            state.SPPF.tag = head
            self.sets[0].append(state)

        
        for i in range(len(word)):
             
            j = 0
            while j < len(self.sets[i]):
                state = self.sets[i][j]
                if state.pos != len(state.RHS):
                    if state.RHS[state.pos] == word[i]:
                        self.scanner(state, i)
                    else:
                        self.predict(rules, state, i)
                else:
                    self.complete(state, i)
                j += 1
            
        for s in self.sets[i+1]:
            if s.pos == len(s.RHS):
                self.complete(s,i+1)
            else:
                self.predict(rules, s, i+1)

        res = []
        for s in self.sets[i+1]:
            if s.LHS == head and s.pos == len(s.RHS) and s.origin == 0:
                res.append(s)

        return res
            
    def scanner(self, state, i):
        _state = copy.deepcopy(state)
        _state.pos += 1
        leaf = SPPF(state.RHS[state.pos], [])
        if len(_state.SPPF.children) == 0:
            _state.SPPF.children.append([leaf])
        else:
            for child in _state.SPPF.children:
                child.append(leaf)
        
        if _state not in self.sets[i+1]:
            self.sets[i+1].append(_state)

    def predict(self, rules, state, i):
        next_nterm = state.RHS[state.pos]
        for head, RHSs in rules.items():
            if head == next_nterm:
                for RHS in RHSs:
                    _state = State()
                    _state.LHS = head
                    _state.RHS = RHS
                    _state.pos = 0
                    _state.origin = i
                    #or should we keep the old sppf?
                    _state.SPPF = SPPF(head, [])
                    if _state not in self.sets[i]:
                        self.sets[i].append(_state)
                        #no need to call for predict recurently, the new state will be noticed and predicted in the main loop

    def complete(self, state, i):
        for s in self.sets[state.origin]:
            if s.pos < len(s.RHS) and s.RHS[s.pos] == state.LHS:
                _state = copy.deepcopy(s)
                _state.pos += 1

                
                
                leaf = state.SPPF
                if len(_state.SPPF.children) == 0:
                    _state.SPPF.children.append([leaf])
                else:
                    for child in _state.SPPF.children:
                        child.append(leaf)

                if _state not in self.sets[i]:
                    self.sets[i].append(_state)
                else:
                    #we just add the alternative children
                    #print("the evil state is ccaused by: ", state.str())
                    pos = self.sets[i].index(_state)
                    self.sets[i][pos].SPPF.children.extend(_state.SPPF.children)
                
        

In [74]:
def test(rules_str, head, input):
    rules = read_rules(rules_str)
    p = Parser()
    res = p.parse(input, rules, head)
    
    for SET_ID, SET in enumerate(p.sets):
        print("// state[", SET_ID, "]")
        for POS_IN_SET, STATE in enumerate(SET):
            print(state_to_cpp(STATE, SET_ID, POS_IN_SET))
    
#     for i in range(len(p.sets)):
#         p._debug(i)
#     print("-"*63)
#     print ([unpack(x.SPPF) for x in res])

In [75]:
rules = """ S -> S S 
S -> b"""
test(rules, "S", ["b","b","b"])

// state[ 0 ]
states[0].insert({"S", {"S", "S"},	 0, 0, nullptr, 0});
states[0].insert({"S", {"b"},		 0, 0, nullptr, 1});
// state[ 1 ]
states[1].insert({"S", {"b"},		 1, 0, nullptr, 0});
states[1].insert({"S", {"S", "S"},	 1, 0, nullptr, 1});
states[1].insert({"S", {"S", "S"},	 0, 1, nullptr, 2});
states[1].insert({"S", {"b"},		 0, 1, nullptr, 3});
// state[ 2 ]
states[2].insert({"S", {"b"},		 1, 1, nullptr, 0});
states[2].insert({"S", {"S", "S"},	 2, 0, nullptr, 1});
states[2].insert({"S", {"S", "S"},	 1, 1, nullptr, 2});
states[2].insert({"S", {"S", "S"},	 1, 0, nullptr, 3});
states[2].insert({"S", {"S", "S"},	 0, 2, nullptr, 4});
states[2].insert({"S", {"b"},		 0, 2, nullptr, 5});
// state[ 3 ]
states[3].insert({"S", {"b"},		 1, 2, nullptr, 0});
states[3].insert({"S", {"S", "S"},	 2, 1, nullptr, 1});
states[3].insert({"S", {"S", "S"},	 2, 0, nullptr, 2});
states[3].insert({"S", {"S", "S"},	 1, 2, nullptr, 3});
states[3].insert({"S", {"S", "S"},	 1, 1, nullptr, 4});
states[3].insert

In [82]:
rules = """ S -> S  
S -> b"""
test(rules, "S", ["b"])

// state[ 0 ]
states[0].insert({"S", {"S"},		 0, 0, nullptr, 0});
states[0].insert({"S", {"b"},		 0, 0, nullptr, 1});
// state[ 1 ]
states[1].insert({"S", {"b"},		 1, 0, nullptr, 0});
states[1].insert({"S", {"S"},		 1, 0, nullptr, 1});


In [76]:
input = ["x", "+", "x", "*", "x"]
rules = """ P -> S
S -> S + M
S -> M
M -> M * T
M -> T
T -> x """
test(rules, "P", input)

// state[ 0 ]
states[0].insert({"P", {"S"},		 0, 0, nullptr, 0});
states[0].insert({"S", {"S", "+", "M"},	 0, 0, nullptr, 1});
states[0].insert({"S", {"M"},		 0, 0, nullptr, 2});
states[0].insert({"M", {"M", "*", "T"},	 0, 0, nullptr, 3});
states[0].insert({"M", {"T"},		 0, 0, nullptr, 4});
states[0].insert({"T", {"x"},		 0, 0, nullptr, 5});
// state[ 1 ]
states[1].insert({"T", {"x"},		 1, 0, nullptr, 0});
states[1].insert({"M", {"T"},		 1, 0, nullptr, 1});
states[1].insert({"S", {"M"},		 1, 0, nullptr, 2});
states[1].insert({"M", {"M", "*", "T"},	 1, 0, nullptr, 3});
states[1].insert({"P", {"S"},		 1, 0, nullptr, 4});
states[1].insert({"S", {"S", "+", "M"},	 1, 0, nullptr, 5});
// state[ 2 ]
states[2].insert({"S", {"S", "+", "M"},	 2, 0, nullptr, 0});
states[2].insert({"M", {"M", "*", "T"},	 0, 2, nullptr, 1});
states[2].insert({"M", {"T"},		 0, 2, nullptr, 2});
states[2].insert({"T", {"x"},		 0, 2, nullptr, 3});
// state[ 3 ]
states[3].insert({"T", {"x"},		 1, 2, nullptr, 0});
states

In [77]:
rules = """ S -> a S
S -> a """
input = ["a","a","a","a"]
test(rules, "S", input)

// state[ 0 ]
states[0].insert({"S", {"a", "S"},	 0, 0, nullptr, 0});
states[0].insert({"S", {"a"},		 0, 0, nullptr, 1});
// state[ 1 ]
states[1].insert({"S", {"a", "S"},	 1, 0, nullptr, 0});
states[1].insert({"S", {"a"},		 1, 0, nullptr, 1});
states[1].insert({"S", {"a", "S"},	 0, 1, nullptr, 2});
states[1].insert({"S", {"a"},		 0, 1, nullptr, 3});
// state[ 2 ]
states[2].insert({"S", {"a", "S"},	 1, 1, nullptr, 0});
states[2].insert({"S", {"a"},		 1, 1, nullptr, 1});
states[2].insert({"S", {"a", "S"},	 0, 2, nullptr, 2});
states[2].insert({"S", {"a"},		 0, 2, nullptr, 3});
states[2].insert({"S", {"a", "S"},	 2, 0, nullptr, 4});
// state[ 3 ]
states[3].insert({"S", {"a", "S"},	 1, 2, nullptr, 0});
states[3].insert({"S", {"a"},		 1, 2, nullptr, 1});
states[3].insert({"S", {"a", "S"},	 0, 3, nullptr, 2});
states[3].insert({"S", {"a"},		 0, 3, nullptr, 3});
states[3].insert({"S", {"a", "S"},	 2, 1, nullptr, 4});
states[3].insert({"S", {"a", "S"},	 2, 0, nullptr, 5});
// state[ 4 ]
states

In [78]:
rules = """ S ->  S a
S -> a """
input = ["a","a","a","a"]
test(rules, "S", input)

// state[ 0 ]
states[0].insert({"S", {"S", "a"},	 0, 0, nullptr, 0});
states[0].insert({"S", {"a"},		 0, 0, nullptr, 1});
// state[ 1 ]
states[1].insert({"S", {"a"},		 1, 0, nullptr, 0});
states[1].insert({"S", {"S", "a"},	 1, 0, nullptr, 1});
// state[ 2 ]
states[2].insert({"S", {"S", "a"},	 2, 0, nullptr, 0});
states[2].insert({"S", {"S", "a"},	 1, 0, nullptr, 1});
// state[ 3 ]
states[3].insert({"S", {"S", "a"},	 2, 0, nullptr, 0});
states[3].insert({"S", {"S", "a"},	 1, 0, nullptr, 1});
// state[ 4 ]
states[4].insert({"S", {"S", "a"},	 2, 0, nullptr, 0});
states[4].insert({"S", {"S", "a"},	 1, 0, nullptr, 1});


In [79]:
rules = """ S ->  S A
S -> A
A -> a 
A -> a a"""
input = ["a","a","a","a"]
test(rules, "S", input)

// state[ 0 ]
states[0].insert({"S", {"S", "A"},	 0, 0, nullptr, 0});
states[0].insert({"S", {"A"},		 0, 0, nullptr, 1});
states[0].insert({"A", {"a"},		 0, 0, nullptr, 2});
states[0].insert({"A", {"a", "a"},	 0, 0, nullptr, 3});
// state[ 1 ]
states[1].insert({"A", {"a"},		 1, 0, nullptr, 0});
states[1].insert({"A", {"a", "a"},	 1, 0, nullptr, 1});
states[1].insert({"S", {"A"},		 1, 0, nullptr, 2});
states[1].insert({"S", {"S", "A"},	 1, 0, nullptr, 3});
states[1].insert({"A", {"a"},		 0, 1, nullptr, 4});
states[1].insert({"A", {"a", "a"},	 0, 1, nullptr, 5});
// state[ 2 ]
states[2].insert({"A", {"a", "a"},	 2, 0, nullptr, 0});
states[2].insert({"A", {"a"},		 1, 1, nullptr, 1});
states[2].insert({"A", {"a", "a"},	 1, 1, nullptr, 2});
states[2].insert({"S", {"A"},		 1, 0, nullptr, 3});
states[2].insert({"S", {"S", "A"},	 2, 0, nullptr, 4});
states[2].insert({"S", {"S", "A"},	 1, 0, nullptr, 5});
states[2].insert({"A", {"a"},		 0, 2, nullptr, 6});
states[2].insert({"A", {"a", "a"},	 0,

In [80]:
rules = """ S ->  a S B B
S -> a
B -> b
B -> """
input = ["a","a","a","b"]
test(rules, "S", input)

// state[ 0 ]
states[0].insert({"S", {"a", "S", "B", "B"},	 0, 0, nullptr, 0});
states[0].insert({"S", {"a"},		 0, 0, nullptr, 1});
// state[ 1 ]
states[1].insert({"S", {"a", "S", "B", "B"},	 1, 0, nullptr, 0});
states[1].insert({"S", {"a"},		 1, 0, nullptr, 1});
states[1].insert({"S", {"a", "S", "B", "B"},	 0, 1, nullptr, 2});
states[1].insert({"S", {"a"},		 0, 1, nullptr, 3});
// state[ 2 ]
states[2].insert({"S", {"a", "S", "B", "B"},	 1, 1, nullptr, 0});
states[2].insert({"S", {"a"},		 1, 1, nullptr, 1});
states[2].insert({"S", {"a", "S", "B", "B"},	 0, 2, nullptr, 2});
states[2].insert({"S", {"a"},		 0, 2, nullptr, 3});
states[2].insert({"S", {"a", "S", "B", "B"},	 2, 0, nullptr, 4});
states[2].insert({"B", {"b"},		 0, 2, nullptr, 5});
states[2].insert({"B", {},	 0, 2, nullptr, 6});
states[2].insert({"S", {"a", "S", "B", "B"},	 3, 0, nullptr, 7});
states[2].insert({"S", {"a", "S", "B", "B"},	 4, 0, nullptr, 8});
// state[ 3 ]
states[3].insert({"S", {"a", "S", "B", "B"},	 1, 2, null

In [81]:
rules = """S -> A
A -> B
B -> A
B -> a """
input = ["a"]
test(rules, "S", input)

// state[ 0 ]
states[0].insert({"S", {"A"},		 0, 0, nullptr, 0});
states[0].insert({"A", {"B"},		 0, 0, nullptr, 1});
states[0].insert({"B", {"A"},		 0, 0, nullptr, 2});
states[0].insert({"B", {"a"},		 0, 0, nullptr, 3});
// state[ 1 ]
states[1].insert({"B", {"a"},		 1, 0, nullptr, 0});
states[1].insert({"A", {"B"},		 1, 0, nullptr, 1});
states[1].insert({"S", {"A"},		 1, 0, nullptr, 2});
states[1].insert({"B", {"A"},		 1, 0, nullptr, 3});


In [58]:
res[0].SPPF.children[0][0].children[1][0].children[0][0].children

[[<__main__.SPPF at 0x266c0d62e10>], [<__main__.SPPF at 0x266bf078650>]]