In [16]:
import re
import pandas as pd
import numpy as np
import itertools
import copy

# Input

In [17]:
inp = """G=(Vn, Vt, P, S)
        Vn={S, A, B, C, D}
        Vt={a, b, c, d, e}
        P={
        S->C
        C->BcA
        A->b
        A->dD
        D->Be
        B->a
        B->Bba
        }
         """

In [18]:
inp = """G=(Vn, Vt, P, S)
        Vn={S, B, D, A}
        Vt={a, b, c, d}
        P={
        S->dB
        B->D
        B->DcB
        D->bA
        A->a
        A->aA
        }
         """

# Grammar Class

In [19]:
class Grammar():
    def __init__(self):
        self.S = ""
        self.Vn = []
        self.Vt = []
        self.P = {}
        
    def read_input(self, inp):
        def iterator(string):
            li = []
            symbol = ""
            for item in string:
                if item != "}" and item != ")":
                    li.append(item)
                else:
                    break
            return li
        
        def construct_P(string):
            p = iterator(string)
            lhs = p[0::2]
            rhs = p[1::2]
            
            for i in range(len(lhs)):
                if lhs[i] not in self.P.keys():
                    self.P[lhs[i]] = [rhs[i]]
                else:
                    self.P[lhs[i]].append(rhs[i])
            
        grammar = re.findall("\w+|\(|\)|{|}", inp)
        
        for i in range(len(grammar)):
            if grammar[i] == "G":
                self.S = grammar[i + 5]
            elif grammar[i] == "Vn":
                self.Vn = iterator(grammar[i+2:])
            elif grammar[i] == "Vt":
                self.Vt = iterator(grammar[i+2:])
            elif grammar[i] == "P":
                construct_P(grammar[i+2:])

In [20]:
g = Grammar()
g.read_input(inp)

In [21]:
g.P

{'S': ['dB'], 'B': ['D', 'DcB'], 'D': ['bA'], 'A': ['a', 'aA']}

# FirstLast Class

In [22]:
class FirstLast():
    def __init__(self):
        self.first = {}
        self.last = {}
    
    def comp_first(self, grammar):
        P = grammar.P
        first = {}
        visited = {}
        # initializing vizited
        for key in P.keys():
            visited[key] = []
        
        # add first letters only
        for key in P.keys():
            for item in P[key]:
                if key not in first.keys():
                    first[key] = [item[0]]
                else:
                    first[key].append(item[0])
                    first[key] = list(set(first[key]))
        
        # add rest of letters
        should_break = False
        while True:
            should_break = True
            for key in first.keys():
                for item in first[key]:
                    if item in grammar.Vn and item not in visited[key]:
                        first[key] += first[item]
                        first[key] = list(set(first[key]))
                        visited[key].append(item)
                        should_break = False
            
            if (should_break == True):
                break
        
        self.first = first
    
    def comp_last(self, grammar):
        P = grammar.P
        first = {}
        visited = {}
        # initializing vizited
        for key in P.keys():
            visited[key] = []
            
        # add first letters only
        for key in P.keys():
            for item in P[key]:
                if key not in first.keys():
                    first[key] = [item[-1]]
                else:
                    first[key].append(item[-1])
                    first[key] = list(set(first[key]))
        
        should_break = False
        while True:
            should_break = True
            for key in first.keys():
                for item in first[key]:
                    if item in grammar.Vn and item not in visited[key]:
                        first[key] += first[item]
                        first[key] = list(set(first[key]))
                        visited[key].append(item)
                        should_break = False
            
            if (should_break == True):
                break
        
        self.last = first

# PrecedenceMatrix Class

In [23]:
class PrecedenceMatrix():
    def __init__(self):
        self.matrix = []
    
    def init_matrix(self, grammar):
        nodes = (grammar.Vn + grammar.Vt)
        nodes.append("$")
        mat = np.full((len(nodes), len(nodes)), np.nan)
        mat = pd.DataFrame(mat, columns=nodes, index=nodes)
        self.matrix = mat
        
    def comp_matrix(self, grammar, first, last):
        self.init_matrix(grammar)
        matrix = self.matrix
        P = grammar.P
        nodes = grammar.Vn + grammar.Vt
        nodes.append("$")
        
        for index in P.keys():
            for item in P[index]:
                for i in range(len(item) - 1):
                    x1, x2 = item[i], item[i + 1]
#                     matrix.loc[x1, x2] = "="
                    if x2 in grammar.Vn:
                        for node in first[x2]:
                            matrix.loc[x1, node] = "<"
                    if x1 in grammar.Vn and x2 in grammar.Vt:
                        for node in last[x1]:
                            matrix.loc[node, x2] = ">"
                    elif x1 in grammar.Vn and x2 in grammar.Vn:
                        for last_y in last[x1]:
                            for first_z_vt in [value for value in first[x2] if value in grammar.Vt]:
                                matrix.loc[last_y, first_z_vt] = ">"
                    matrix.loc[x1, x2] = "="

                            
        # deal with $
        for x in nodes:
            if x in [item for li in first.values() for item in li]:
                matrix.loc["$", x] = "<"
            if x in [item for li in last.values() for item in li]:
                matrix.loc[x, "$"] = ">"
        
        self.matrix = matrix                

# Parse Class

In [24]:
class Parse():
    def __init__(self):
        pass
    
    def init_input(self, inp_string, matrix):
        inp_string = "$" + inp_string + "$"
        modif_string = ""
        
        for i in range(len(inp_string) - 1):
#             print(matrix.loc[inp_string[i], inp_string[i+1]], " - ", inp_string[i+1])
            modif_string += matrix.loc[inp_string[i], inp_string[i+1]] + inp_string[i+1]
        return "$" + modif_string
    
    def find_sequence(self, string):
        sequence = ""
        outter_break = False
        for i in range(len(string)):
            if outter_break:
                break
            if string[i] == ">":
                for j in range(i, 0, -1):
                    if string[j] == "<":
                        sequence = string[j+1:i]
                        outter_break = True
                        break
        
        return re.split("=", sequence), j, i
    
    def replace_sequence(self, string, sequence, j, i, matrix, grammar):
#         print(sequence, string)
        P = grammar.P
        if "".join(sequence) in P["S"] and string[:j] == "$" and string[i:] == "$":
            return "$<S>$"        
        
        if "".join(sequence) in [item for li in P.values() for item in li]:
            for key in P.keys():
                for item in P[key]:
                    if "".join(sequence) == item:
#                         print(string[:j] , matrix.loc[string[j-1], key] , key , matrix.loc[key, string[i]] , string[i:])
                        return string[:j] + matrix.loc[string[j-1], key] + key + matrix.loc[key, string[i]] + string[i:]
        else:
            for column in matrix.columns:
                if matrix[string[j-1], column] == "=":
                    return string[:j] + "=" + column + matrix[column, string[i]] + string[i:]
        
        return "ERROR"
        
    
    def parse_input(self, inp_string, matrix, grammar):
        new_string = self.init_input(inp_string, matrix)
        while True:
            seq, j, i = self.find_sequence(new_string)
            new_string = self.replace_sequence(new_string, seq, j, i, matrix, grammar)
            print(new_string)
            if (new_string == "$<S>$"):
                break
            

# Executing Pipeline

In [25]:
fl = FirstLast()
fl.comp_first(g)
fl.comp_last(g)

In [26]:
fl.first

{'S': ['d'], 'B': ['D', 'b'], 'D': ['b'], 'A': ['a']}

In [27]:
fl.last

{'S': ['B', 'D', 'a', 'A'],
 'B': ['B', 'D', 'a', 'A'],
 'D': ['a', 'A'],
 'A': ['a', 'A']}

In [28]:
pm = PrecedenceMatrix()
pm.comp_matrix(g, fl.first, fl.last)

In [29]:
pm.matrix

Unnamed: 0,S,B,D,A,a,b,c,d,$
S,,,,,,,,,
B,,,,,,,,,>
D,,,,,,,=,,>
A,,,,,,,>,,>
a,,,,=,<,,>,,>
b,,,,=,<,,,,
c,,=,<,,,<,,,
d,,=,<,,,<,,,
$,,,<,,<,<,,<,


In [30]:
Parse().parse_input("dbacbaa", pm.matrix, g)

$<d<b=A>c<b<a<a>$
$<d<D=c<b<a<a>$
$<d<D=c<b<a=A>$
$<d<D=c<b=A>$
$<d<D=c<D>$
$<d<D=c=B>$
$<d=B>$
$<S>$
