In [1]:

def save_file(filename, txt):
    f = open(filename, "w")
    f.write(txt)
    f.close()

In [2]:
class SPPF:
    def __init__(self, tag="", children=None):
        self.tag = tag
        self.visited = False
        self.children = children if children is not None else []
        
    def to_dict(self):
        res = {}
        res["tag"] = self.tag
        res["children"] = [ [ y.to_dict() for y in x] for x in self.children ]
        return res

class State:
    def __init__(self):
        self.LHS = ""
        self.RHS = []
        self.pos = -1; # 0 means the beginning
        self.origin = 0
        self.SPPF = SPPF()

    #debug only
    def str(self):
        res = self.LHS + " -> "
        for i, x in enumerate(self.RHS):
            if i == self.pos:
                res += ". "
            res  += x + " "
        if "." not in res:
            res += ". "
        res += "| " + str(self.origin)
        print( "\t", res )
        return res

    def __eq__(self, other):
        return self.LHS == other.LHS and self.RHS == other.RHS and self.pos == other.pos and self.origin == other.origin

In [3]:
def read_rules(string):
    res = {}
    for rule in string.split("\n"):
        head, body = rule.split("->")
        head = head.strip()
        body = [ x.strip() for x in body]
        body = [ x for x in body if x != ""]
        if head not in res:
            res[head] = []
        res[head].append(body)
    return res

In [4]:
def all_combs_of_2(list1, list2):
    #list1 is a list of lists, list2 is just list
    res = []
    for x in list1:
        for y in list2:
            res.append( x + [y])
    return res
    
def all_combs(lists):
    #function that takes a list L of lists l0,l1,l2... and returns of combination of their elements
    res = [ [x] for x in lists[0]]
    for x in lists[1:]:
        res = all_combs_of_2(res, x)
    return res

def unpack(tree):
    res = []
    if len(tree.children) == 0:
        res = [tree.tag]
    for alt in tree.children:
        children = [ unpack(child) for child in alt] #children is a list of lists
        combinations = all_combs(children)
        for x in combinations:
            res.append (tree.tag + "["+ " ".join(x) + "]")
    return res
        

In [5]:
import copy
class Parser:
    def __init__(self):
        self.sets = []

    def _debug(self, i):
            #debugging 
            print("-" * 30, i, "-"*30)
            for s in self.sets[i]:
                s.str()
                
    def parse(self, word, rules, head):
        self.sets = [ [] for x in range(len(word)+1) ]
        #add the first state
        for RHS in rules[head]:
            state = State()
            state.LHS = head
            state.RHS = RHS
            state.pos = 0
            state.origin = 0
            state.SPPF.tag = head
            self.sets[0].append(state)

        
        for i in range(len(word)):
             
            j = 0
            while j < len(self.sets[i]):
                state = self.sets[i][j]
                if state.pos != len(state.RHS):
                    if state.RHS[state.pos] == word[i]:
                        self.scanner(state, i)
                    else:
                        self.predict(rules, state, i)
                else:
                    self.complete(state, i)
                j += 1
            
        for s in self.sets[i+1]:
            if s.pos == len(s.RHS):
                self.complete(s,i+1)
            else:
                self.predict(rules, s, i+1)

        res = []
        for s in self.sets[i+1]:
            if s.LHS == head and s.pos == len(s.RHS) and s.origin == 0:
                res.append(s)

        return res
            
    def scanner(self, state, i):
        _state = copy.deepcopy(state)
        _state.pos += 1
        leaf = SPPF(state.RHS[state.pos], [])
        if len(_state.SPPF.children) == 0:
            _state.SPPF.children.append([leaf])
        else:
            for child in _state.SPPF.children:
                child.append(leaf)
        
        if _state not in self.sets[i+1]:
            self.sets[i+1].append(_state)

    def predict(self, rules, state, i):
        next_nterm = state.RHS[state.pos]
        for head, RHSs in rules.items():
            if head == next_nterm:
                for RHS in RHSs:
                    _state = State()
                    _state.LHS = head
                    _state.RHS = RHS
                    _state.pos = 0
                    _state.origin = i
                    #or should we keep the old sppf?
                    _state.SPPF = SPPF(head, [])
                    if _state not in self.sets[i]:
                        self.sets[i].append(_state)
                        #no need to call for predict recurently, the new state will be noticed and predicted in the main loop

    def complete(self, state, i):
        for s in self.sets[state.origin]:
            if s.pos < len(s.RHS) and s.RHS[s.pos] == state.LHS:
                _state = copy.deepcopy(s)
                _state.pos += 1

                
                
                leaf = state.SPPF
                if len(_state.SPPF.children) == 0:
                    _state.SPPF.children.append([leaf])
                else:
                    for child in _state.SPPF.children:
                        child.append(leaf)

                if _state not in self.sets[i]:
                    self.sets[i].append(_state)
                else:
                    #we just add the alternative children
                    #print("the evil state is ccaused by: ", state.str())
                    pos = self.sets[i].index(_state)
                    self.sets[i][pos].SPPF.children.extend(_state.SPPF.children)
                
        

In [22]:
def test(rules_str, head, input):
    rules = read_rules(rules_str)
    p = Parser()
    res = p.parse(input, rules, head)
    for i in range(len(p.sets)):
        p._debug(i)
    print("-"*63)
    print ([unpack(x.SPPF) for x in res])

In [23]:
rules = """ S -> S S 
S -> b"""
test(rules, "S", ["b","b","b"])

------------------------------ 0 ------------------------------
	 S -> . S S | 0
	 S -> . b | 0
------------------------------ 1 ------------------------------
	 S -> b . | 0
	 S -> S . S | 0
	 S -> . S S | 1
	 S -> . b | 1
------------------------------ 2 ------------------------------
	 S -> b . | 1
	 S -> S S . | 0
	 S -> S . S | 1
	 S -> S . S | 0
	 S -> . S S | 2
	 S -> . b | 2
------------------------------ 3 ------------------------------
	 S -> b . | 2
	 S -> S S . | 1
	 S -> S S . | 0
	 S -> S . S | 2
	 S -> S . S | 1
	 S -> S . S | 0
	 S -> . S S | 3
	 S -> . b | 3
---------------------------------------------------------------
[['S[S[S[b] S[b]] S[b]]', 'S[S[b] S[S[b] S[b]]]']]


In [24]:
input = ["x", "+", "x", "*", "x"]
rules = """ P -> S
S -> S + M
S -> M
M -> M * T
M -> T
T -> x """
test(rules, "P", input)

------------------------------ 0 ------------------------------
	 P -> . S | 0
	 S -> . S + M | 0
	 S -> . M | 0
	 M -> . M * T | 0
	 M -> . T | 0
	 T -> . x | 0
------------------------------ 1 ------------------------------
	 T -> x . | 0
	 M -> T . | 0
	 S -> M . | 0
	 M -> M . * T | 0
	 P -> S . | 0
	 S -> S . + M | 0
------------------------------ 2 ------------------------------
	 S -> S + . M | 0
	 M -> . M * T | 2
	 M -> . T | 2
	 T -> . x | 2
------------------------------ 3 ------------------------------
	 T -> x . | 2
	 M -> T . | 2
	 S -> S + M . | 0
	 M -> M . * T | 2
	 P -> S . | 0
	 S -> S . + M | 0
------------------------------ 4 ------------------------------
	 M -> M * . T | 2
	 T -> . x | 4
------------------------------ 5 ------------------------------
	 T -> x . | 4
	 M -> M * T . | 2
	 S -> S + M . | 0
	 M -> M . * T | 2
	 P -> S . | 0
	 S -> S . + M | 0
---------------------------------------------------------------
[['P[S[S[M[T[x]]] + M[M[T[x]] * T[x]]]]']]


In [25]:
rules = """ S -> a S
S -> a """
input = ["a","a","a","a"]
test(rules, "S", input)

------------------------------ 0 ------------------------------
	 S -> . a S | 0
	 S -> . a | 0
------------------------------ 1 ------------------------------
	 S -> a . S | 0
	 S -> a . | 0
	 S -> . a S | 1
	 S -> . a | 1
------------------------------ 2 ------------------------------
	 S -> a . S | 1
	 S -> a . | 1
	 S -> . a S | 2
	 S -> . a | 2
	 S -> a S . | 0
------------------------------ 3 ------------------------------
	 S -> a . S | 2
	 S -> a . | 2
	 S -> . a S | 3
	 S -> . a | 3
	 S -> a S . | 1
	 S -> a S . | 0
------------------------------ 4 ------------------------------
	 S -> a . S | 3
	 S -> a . | 3
	 S -> . a S | 4
	 S -> . a | 4
	 S -> a S . | 2
	 S -> a S . | 1
	 S -> a S . | 0
---------------------------------------------------------------
[['S[a S[a S[a S[a]]]]']]


In [26]:
rules = """ S ->  S a
S -> a """
input = ["a","a","a","a"]
test(rules, "S", input)

------------------------------ 0 ------------------------------
	 S -> . S a | 0
	 S -> . a | 0
------------------------------ 1 ------------------------------
	 S -> a . | 0
	 S -> S . a | 0
------------------------------ 2 ------------------------------
	 S -> S a . | 0
	 S -> S . a | 0
------------------------------ 3 ------------------------------
	 S -> S a . | 0
	 S -> S . a | 0
------------------------------ 4 ------------------------------
	 S -> S a . | 0
	 S -> S . a | 0
---------------------------------------------------------------
[['S[S[S[S[a] a] a] a]']]


In [27]:
rules = """ S ->  S A
S -> A
A -> a 
A -> a a"""
input = ["a","a","a","a"]
test(rules, "S", input)

------------------------------ 0 ------------------------------
	 S -> . S A | 0
	 S -> . A | 0
	 A -> . a | 0
	 A -> . a a | 0
------------------------------ 1 ------------------------------
	 A -> a . | 0
	 A -> a . a | 0
	 S -> A . | 0
	 S -> S . A | 0
	 A -> . a | 1
	 A -> . a a | 1
------------------------------ 2 ------------------------------
	 A -> a a . | 0
	 A -> a . | 1
	 A -> a . a | 1
	 S -> A . | 0
	 S -> S A . | 0
	 S -> S . A | 0
	 A -> . a | 2
	 A -> . a a | 2
------------------------------ 3 ------------------------------
	 A -> a a . | 1
	 A -> a . | 2
	 A -> a . a | 2
	 S -> S A . | 0
	 S -> S . A | 0
	 A -> . a | 3
	 A -> . a a | 3
------------------------------ 4 ------------------------------
	 A -> a a . | 2
	 A -> a . | 3
	 A -> a . a | 3
	 S -> S A . | 0
	 S -> S . A | 0
	 A -> . a | 4
	 A -> . a a | 4
---------------------------------------------------------------
[['S[S[A[a a]] A[a a]]', 'S[S[S[A[a]] A[a]] A[a a]]', 'S[S[S[A[a]] A[a a]] A[a]]', 'S[S[S[A[a a]

In [28]:
rules = """ S ->  a S B B
S -> a
B -> b
B -> """
input = ["a","a","a","b"]
test(rules, "S", input)

------------------------------ 0 ------------------------------
	 S -> . a S B B | 0
	 S -> . a | 0
------------------------------ 1 ------------------------------
	 S -> a . S B B | 0
	 S -> a . | 0
	 S -> . a S B B | 1
	 S -> . a | 1
------------------------------ 2 ------------------------------
	 S -> a . S B B | 1
	 S -> a . | 1
	 S -> . a S B B | 2
	 S -> . a | 2
	 S -> a S . B B | 0
	 B -> . b | 2
	 B -> . | 2
	 S -> a S B . B | 0
	 S -> a S B B . | 0
------------------------------ 3 ------------------------------
	 S -> a . S B B | 2
	 S -> a . | 2
	 S -> . a S B B | 3
	 S -> . a | 3
	 S -> a S . B B | 1
	 B -> . b | 3
	 B -> . | 3
	 S -> a S B . B | 1
	 S -> a S B B . | 1
	 S -> a S . B B | 0
------------------------------ 4 ------------------------------
	 B -> b . | 3
	 S -> a S B . B | 1
	 S -> a S B B . | 1
	 S -> a S B . B | 0
	 B -> . b | 4
	 B -> . | 4
	 S -> a S . B B | 0
	 S -> a S B B . | 0
---------------------------------------------------------------
[['S[a S[a S[

In [29]:
rules = """S -> A
A -> B
B -> A
B -> a """
input = ["a"]
test(rules, "S", input)

------------------------------ 0 ------------------------------
	 S -> . A | 0
	 A -> . B | 0
	 B -> . A | 0
	 B -> . a | 0
------------------------------ 1 ------------------------------
	 B -> a . | 0
	 A -> B . | 0
	 S -> A . | 0
	 B -> A . | 0
---------------------------------------------------------------


RecursionError: maximum recursion depth exceeded while calling a Python object

In [58]:
res[0].SPPF.children[0][0].children[1][0].children[0][0].children

[[<__main__.SPPF at 0x266c0d62e10>], [<__main__.SPPF at 0x266bf078650>]]