In [None]:
from IPython.core.display import HTML
with open('../../style.css', 'r') as file:
    css = file.read()
HTML(css)

# Implementing an SLR-Table-Generator

## A Grammar for Grammars

As the goal is to generate an *SLR-table-generator* we first need to implement a parser for context free grammars.
The file `simple.g` contains an example grammar that describes arithmetic expressions.

In [None]:
!cat simple.g

We use <span style="font-variant:small-caps;">Antlr</span> to develop a parser for context free grammars.  The pure grammar used to parse context free grammars is stored in the file `Pure.g4`.

In [None]:
!cat Pure.g4

The annotated grammar is stored in the file `Grammar.g4`. 

In [None]:
!cat -n Grammar.g4

We start by generating both scanner and parser.  

In [None]:
!antlr4 -Dlanguage=Python3 Grammar.g4

In [None]:
from GrammarLexer  import GrammarLexer
from GrammarParser import GrammarParser
import antlr4

The function `parse_grammar` takes a `filename` as its argument and returns the grammar that is stored in the specified file.  The grammar is represented as list of rules.  Each rule is represented as a tuple.  The example below will clarify this structure.

In [None]:
def parse_grammar(filename):
    input_stream  = antlr4.FileStream(filename)
    lexer         = GrammarLexer(input_stream)
    token_stream  = antlr4.CommonTokenStream(lexer)
    parser        = GrammarParser(token_stream)
    grammar       = parser.start()
    return grammar.g

In [None]:
grammar_arith = parse_grammar('simple.g')
grammar_arith

## SLR-Table-Generation

In [None]:
class MarkedRule():
    def __init__(self, variable, alpha, beta):
        self.mVariable = variable
        self.mAlpha    = alpha
        self.mBeta     = beta
        
    def __eq__(self, other):
        return isinstance(other, MarkedRule)     and \
               self.mVariable == other.mVariable and \
               self.mAlpha    == other.mAlpha    and \
               self.mBeta     == other.mBeta
    
    def __ne__(self, other):
        return not self.__eq__(other)
    
    def __hash__(self):
        return hash(self.__repr__())
    
    def __repr__(self):
        alphaStr = ' '.join(self.mAlpha)
        betaStr  = ' '.join(self.mBeta)
        return f'{self.mVariable} → {alphaStr} • {betaStr}'

Given a *marked rule* `self`, the function `isComplete` checks, whether the Earley item `self` has the form
$$A \rightarrow \alpha \bullet,$$
i.e. whether the $\bullet$ is at the end of the grammar rule.

In [None]:
def is_complete(self):
    return self.mBeta == ()

MarkedRule.is_complete = is_complete
del is_complete

In [None]:
def symbol_after_dot(self):
    if len(self.mBeta) > 0:
        return self.mBeta[0]

MarkedRule.symbol_after_dot = symbol_after_dot
del symbol_after_dot

Given a marked rule, this function returns the name of the variable following the dot.  If there is no variable following the dot, the function returns `None`.  The function can distinguish variables from token names because variable names consist only of lower case letters.

In [None]:
def next_var(self):
    if len(self.mBeta) > 0:
        var = self.mBeta[0]
        if is_var(var):
            return var
    return None

MarkedRule.next_var = next_var
del next_var

The function `move_dot(self)` moves the $\bullet$ in the *marked rule* `self`, where `self` has the form 
$$A \rightarrow \alpha \bullet \beta$$
over the next variable, token, or literal in $\beta$.  It assumes that $\beta$ is not empty.

In [None]:
def move_dot(self):
    return MarkedRule(self.mVariable, 
                      self.mAlpha + (self.mBeta[0],), 
                      self.mBeta[1:])

MarkedRule.move_dot = move_dot
del move_dot

In [None]:
def to_rule(self):
    return GrammarRule(self.mVariable, self.mAlpha + self.mBeta)

MarkedRule.to_rule = to_rule
del to_rule

In [None]:
def is_var(name):
    return name[0] != "'" and name.islower()

In [None]:
def collect_variables(Rules):
    Variables = set()
    for rule in Rules:
        for item in rule:
            if is_var(item):
                Variables.add(item)
    return Variables

In [None]:
def collect_tokens(Rules):
    Tokens = set()
    for rule in Rules:
        for item in rule:
            if not is_var(item):
                Tokens.add(item)
    return Tokens

In [None]:
def initialize_dictionary(Variables):
    return { a: set() for a in Variables }

In [None]:
class GrammarRule:
    def __init__(self, variable, body):
        self.mVariable = variable
        self.mBody     = body
        
    def __repr__(self):
        return f'{self.mVariable} → {" ".join(self.mBody)}'

The class `Grammar` represents a context free grammar.  It stores a list of the rules of the grammar.
Each grammar rule of the form
$$ a \rightarrow \beta $$
is stored as the tuple $(a,) + \beta$.  The start symbol is assumed to be the variable on the left hand side of
the first rule. To distinguish syntactical variables form tokens, variables contain only lower case letters,
while tokens either contain only upper case letters or they start and end with a single quote character "`'`".

In [None]:
class Grammar():
    def __init__(self, Rules):
        self.mRules      = [(head, tuple(body)) for (head, *body) in Rules]
        self.mStart      = Rules[0][0]
        self.mVariables  = collect_variables(Rules)
        self.mTokens     = collect_tokens(Rules)
        self.mStates     = set()
        self.mStateNames = {}
        self.mConflicts  = False
        self.mVariables.add('ŝ')
        self.mTokens.add('$')
        self.mRules.append( ('ŝ', (self.mStart, '$')) )
        self.compute_tables()

In [None]:
def compute_tables(self):
    self.mFirst      = initialize_dictionary(self.mVariables)
    self.mFollow     = initialize_dictionary(self.mVariables)
    self.compute_first()
    self.compute_follow()
    self.all_states()
    self.compute_action_table()
    self.compute_goto_table()
    
Grammar.compute_tables = compute_tables
del compute_tables

In [None]:
def compute_first(self):
    change = True
    while change:
        change = False
        for a, body in self.mRules:
            first_body = self.first_list(body)
            if not (first_body <= self.mFirst[a]):
                change = True
                self.mFirst[a] |= first_body           
    print('First sets:')
    for v in self.mVariables:
        print(f'First({v}) = {self.mFirst[v]}')
        
Grammar.compute_first = compute_first
del compute_first

In [None]:
def first_list(self, alpha):
    if len(alpha) == 0:
        return { '' }
    elif is_var(alpha[0]): 
        v, *r = alpha
        return eps_append(self.mFirst[v], self.first_list(r))
    else:
        t = alpha[0]
        return { t }
    
Grammar.first_list = first_list
del first_list

In [None]:
def eps_append(S, T):
    if '' in S: 
        if '' in T: 
            return S | T
        return (S - { '' }) | T
    return S

In [None]:
def compute_follow(self):
    self.mFollow[self.mStart] = { '$' }
    change = True
    while change:
        change = False
        for a, body in self.mRules:
            for i in range(len(body)):
                if is_var(body[i]):
                    yi        = body[i]
                    Tail      = self.first_list(body[i+1:])
                    firstTail = eps_append(Tail, self.mFollow[a])
                    if not (firstTail <= self.mFollow[yi]): 
                        change = True
                        self.mFollow[yi] |= firstTail
                            
    print('Follow sets (note that "$" denotes the end of file):');
    for v in self.mVariables:
        print(f'Follow({v}) = {self.mFollow[v]}')
        
Grammar.compute_follow = compute_follow
del compute_follow

In [None]:
def cmp_closure(self, Marked_Rules):
    All_Rules = Marked_Rules
    New_Rules = Marked_Rules
    while True:
        More_Rules = set()
        for rule in New_Rules:
            c = rule.next_var()
            if c == None:
                continue
            for head, alpha in self.mRules:
                if c == head:
                    More_Rules |= { MarkedRule(head, (), alpha) }
        if More_Rules <= All_Rules:
            return frozenset(All_Rules)
        New_Rules  = More_Rules - All_Rules
        All_Rules |= New_Rules

Grammar.cmp_closure = cmp_closure
del cmp_closure

In [None]:
def goto(self, Marked_Rules, x):
    Result = set()
    for mr in Marked_Rules:
        if mr.symbol_after_dot() == x:
            Result.add(mr.move_dot())
    return self.cmp_closure(Result)

Grammar.goto = goto
del goto

In [None]:
def all_states(self): 
    self.mStates = { self.cmp_closure({ MarkedRule('ŝ', (), (self.mStart, '$')) }) }
    New_States   = self.mStates
    while True:
        More_States = set()
        for Rule_Set in New_States:
            for mr in Rule_Set: 
                if not mr.is_complete():
                    x = mr.symbol_after_dot()
                    More_States |= { self.goto(Rule_Set, x) }
        if More_States <= self.mStates:
            break
        New_States = More_States - self.mStates;
        self.mStates |= New_States
    print("All SLR-states:")
    counter = 0;
    for state in self.mStates:
        self.mStateNames[state] = f's{counter}'
        print(f's{counter} = {set(state)}')
        counter += 1

Grammar.all_states = all_states
del all_states

In [None]:
def compute_action_table(self):
    print('\nAction Table:')
    for state in self.mStates:
        actionTable = {}
        # compute shift actions
        for token in self.mTokens:
            stateName = self.mStateNames[state]
            newState  = self.goto(state, token)
            if newState != set():
                newName = self.mStateNames[newState]
                actionTable[token] = ('shift', newName)
                print(f'action("{stateName}", {token}) = ("shift", {newName})')
        # compute reduce actions
        for mr in state:
            if mr.is_complete():
                for token in self.mFollow[mr.mVariable]:
                    action1 = actionTable.get(token)
                    action2 = ('reduce', mr.to_rule())
                    if action1 == None:
                        actionTable[token] = action2
                        print(f'action("{stateName}", {token}) = {action2}')
                    elif action1 != action2: 
                        this.mConflicts = True
                        print('')
                        print('conflict in state {self.stateName}:')
                        print('{self.stateName} = {state}')
                        print(f'action("{stateName}", {token}) = {action1}')     
                        print(f'action("{stateName}", {token}) = {action2}')
                        print('')

Grammar.compute_action_table = compute_action_table
del compute_action_table

In [None]:
def compute_goto_table(self):
    print('\nGoto Table:')
    for state in self.mStates:
        for var in self.mVariables:
            newState = self.goto(state, var)
            if newState != set():
                stateName = self.mStateNames[state]
                newName   = self.mStateNames[newState]
                print(f'goto({stateName}, {var}) = {newName}')

Grammar.compute_goto_table = compute_goto_table
del compute_goto_table

In [None]:
g = Grammar(grammar_arith)

The command below cleans the directory.  If you are running windows, you have to replace `rm`with `del`.

In [None]:
!rm GrammarLexer.* GrammarParser.* Grammar.tokens GrammarListener.py Grammar.interp
!rm -r __pycache__

In [None]:
!ls