In [None]:
from IPython.display import HTML
HTML(open('../style.css').read())

In [None]:
%load_ext nb_mypy

# An Interpreter for a Simple Programming Language

In this notebook we develop an interpreter for a small programming language.

In [None]:
import ply.lex as lex

In [None]:
tokens = [ 'NUMBER',     # r'(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[1-9][0-9]*)?'
           'STRING',     # r'("([^"]+|\\")*")'
           'IDENTIFIER', # r'[a-zA-Z][a-zA-Z0-9_]*'
           'ASSIGN',     # r':='
           'AND',        # r'&&'
           'OR',         # r'\|\|'
           'EQ',         # r'=='
           'NE',         # r'!='
           'LE',         # r'<='
           'GE',         # r'>='
           'IF',         # keyword, see below
                         # your code here
          ]

We allow both *single-line comments* and *multi-line comments*.
- The regular expression `/\*(.|\n)*?\*/` recognizes multi-line comments.
  Multi-line comments start with the string `/*` and end with the string `*/`.
  Note the use of the *non-greedy* quantor `*?`.  If we have code like
  ```
  /* blah */ a := 1; /* blub */
  ```
  the greedy quantor would recognize the whole line as one comment. 
- The regular expression `//.*` recognizes single-line comments.
  A single line comment starts with the string `//` and extends to the end of the line.

In [None]:
def t_COMMENT(t):
    r'/\*(.|\n)*?\*/|//.*'
    t.lexer.lineno += t.value.count('\n')
    pass

The token `NUMBER` specifies a natural number.

In [None]:
def t_NUMBER(t):
    r'(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[1-9][0-9]*)?'
    if '.' in t.value or 'e' in t.value or 'E' in t.value:
        t.value = float(t.value)
    else:
        t.value = int(t.value)
    return t

In [None]:
def t_STRING(t):
    r'("([^"]+|\\")*")'
    return t

In [None]:
t_ASSIGN = r':='
t_AND    = r'&&'
t_OR     = r'\|\|'
t_EQ     = r'=='
t_NE     = r'!='
t_LE     = r'<='
t_GE     = r'>='

The keywords 'int', 'if', 'else', 'while', 'return' have to be dealt with separately as they are syntactical identical to identifiers. The dictionary Keywords shown below maps every keyword to its token type.

In [None]:
Keywords = { 'if': 'IF', 'while': 'WHILE' } # your code here

When an identifier is read, we first have to check whether the identifier is one of our keywords.  If so, we assign the corresponding token type that is stored in the dictionary `Keywords`.  Otherwise, the token type is set to `IDENTIFIER`.

In [None]:
def t_IDENTIFIER(t):
    r'[a-zA-Z][a-zA-Z0-9_]*'
    t.type = Keywords.get(t.value, 'IDENTIFIER')
    return t

Operators consisting of a single character do not need an associated token type.
They are declared via the keyword `literals`.

In [None]:
literals = ['+', '-', '*', '/', '%', '(', ')', '{', '}', ';', '<', '>', ',', '!']

White space, i.e. *space characters*, *tabulators*, and *carriage returns* are ignored. 

In [None]:
t_ignore  = ' \t\r'

Syntactically, newline characters are ignored. However, we still need to keep track of them in order to know the current line number, which is used for error messages.

In [None]:
def t_newline(t):
    r'\n'
    t.lexer.lineno += 1
    return

Given a `token`, the function `find_colum` returns the column where `token` starts.  This is possible, because every token contains a reference to the current lexer as `token.lexer` and this lexer in turn stores the string that is given to it via the reference `lexer.lexdata`.  Furthermore, `token.lexpos` is the number of characters that precede `token`.

In [None]:
def find_column(token):
    program    = token.lexer.lexdata  # the complete string given to the scanner
    line_start = program.rfind('\n', 0, token.lexpos)
    return token.lexpos - line_start

The function `t_error` is called for any token `t` that can not be scanned by the lexer.  In this case, `t.value[0]` is the first character that is not recognized by the scanner.  This character is discarded.  After that, scanning proceeeds as if nothing has happened.

In [None]:
def t_error(t):
    column = find_column(t)
    print(f"Illegal character '{t.value[0]}' in line {t.lineno}, column {column}.")
    t.lexer.skip(1)

In [None]:
__file__ = 'main'

In [None]:
lexer = lex.lex()

In [None]:
def test_scanner(file_name):
    with open(file_name, 'r') as handle:
        program = handle.read() 
    print(program)
    lexer.input(program)
    lexer.lineno = 1          # reset line number
    for t in lexer:           # start scanning and collect all tokens
        print(t) 

In [None]:
test_scanner('Examples/sum-for.sl')

In [None]:
import ply.yacc as yacc

Below is the grammar for our language:
```
program
    : 𝜆  
    | stmnt program
    ;
stmnt 
    : FUNCTION IDENTIFIER '(' id_list ')' stmnt              # new
    | IF '(' bool_expr ')' stmnt                 
    | WHILE '(' bool_expr ')' stmnt
    | FOR '(' assign ';' bool_expr ';' assign ')' stmnt      # new
    | '{' program '}' 
    | assign ';'                                             # new
    | expr ';'
    | RETURN expr ';'                                        # new
    ;
assign                                                       # new
    : IDENTIFIER ':=' expr 
    ;
id_list
    : 𝜆 
    | ne_id_list
    ;
ne_id_list
    : IDENTIFIER
    | IDENTIFIER ',' ne_id_list
    ;
bool_expr                                                     # new
    : bool_expr OR conjunction                                # new
    | conjunction
    ;
conjunction                                                   # new
    : conjunction AND negation                                # new
    | negation                                                # new
    ;
negation                                                      # new
    : '!' negation                                            # new
    | bool_atom                                               # new
    ; 
bool_atom
    : '(' bool_expr ')'                                       # new
    | expr '==' expr     
    | expr '!=' expr     
    | expr '<=' expr     
    | expr '>=' expr     
    | expr '<'  expr      
    | expr '>'  expr     
    ;
expr: expr '+' product                 
    | expr '-' product
    | product
    ;          
product
    : product '*' factor               
    | product '/' factor
    | factor
    ;
factor
    : '(' expr ')' 
    | NUMBER
    | IDENTIFIER
    | IDENTIFIER '(' expr_list ')'
    ;
expr_list
    : 𝜆 
    | ne_expr_list
    ;
ne_expr_list
    : expr
    | expr ',' ne_expr_list
    ;
```

The *start variable* of our grammar is `program`.

In [None]:
start = 'program'

An example program that conforms to this grammar is stored in the file `sum-for.sl` in the directory `Examples`.

In [None]:
!cat Examples/sum-for.sl

A program is a list of statements.

In [None]:
def p_program_empty(p):
    "program : "
    p[0] = ('.',)

def p_program_more(p):
    "program : stmnt program"
    p[0] = ('.', p[1]) + p[2][1:]

In [None]:
def p_stmnt_function(p):
    "stmnt : FUNCTION IDENTIFIER '(' id_list ')' stmnt"
    p[0] = ('def', p[2], p[4], p[6])

def p_stmnt_if(p):
    "stmnt : IF '(' bool_expr ')' stmnt"
    p[0] = ('if', p[3], p[5])   

def p_stmnt_while(p):
    "stmnt : WHILE '(' bool_expr ')' stmnt"
    p[0] = ('while', p[3], p[5])

def p_stmnt_for(p):
    "stmnt : FOR '(' assign ';' bool_expr ';' assign ')' stmnt "
    # your code here
    
def p_stmnt_block(p):
    "stmnt : '{' program '}'"
    p[0] = p[2]
    
def p_stmnt_assign(p):
    "stmnt : assign ';'"
    p[0] = p[1]

def p_stmnt_expr(p):
    "stmnt : expr ';'"
    p[0] = ('expr', p[1])

def p_stmnt_return(p):
    "stmnt : RETURN expr ';'"
    # your code here

In [None]:
def p_assign(p):
    "assign : IDENTIFIER ASSIGN expr"
    p[0] = (':=', p[1], p[3])

In [None]:
def p_id_list_empty(p):
    "id_list : "
    p[0] = ('.',)
    
def p_id_list_ne(p):
    "id_list : ne_id_list"
    p[0] = p[1]     

def p_ne_id_list_one(p):
    "ne_id_list : IDENTIFIER"
    p[0] = ('.', p[1])

def p_ne_id_list_more(p):
    "ne_id_list : IDENTIFIER ',' ne_id_list"
    p[0] = ('.', p[1]) + p[3][1:]

In [None]:
def p_bool_expr_or(p):
    "bool_expr : bool_expr OR conjunction"
    # your code here

def p_bool_expr_conj(p):
    "bool_expr : conjunction"
    # your code here

In [None]:
def p_conjunction_and(p):
    "conjunction : conjunction AND negation"
    # your code here

def p_conjunction_neg(p):
    "conjunction : negation"
    # your code here

In [None]:
def p_negation_not(p):
    "negation : '!' negation"
    # your code here

def p_negation_atom(p):
     "negation : bool_atom"
     # your code here

In [None]:
def p_bool_atom_paren(p):
    "bool_atom : '(' bool_expr ')'"
    # your code here
    
def p_bool_atom_eq(p):
    "bool_atom : expr EQ expr"
    p[0] = ('==', p[1], p[3])

def p_bool_atom_ne(p):
    "bool_atom : expr NE expr"
    p[0] = ('!=', p[1], p[3])

def p_bool_atom_le(p):
    "bool_atom : expr LE expr"
    p[0] = ('<=', p[1], p[3])
    
def p_bool_atom_ge(p):
    "bool_atom : expr GE expr"
    p[0] = ('>=', p[1], p[3])
    
def p_bool_atom_lt(p):
    "bool_atom : expr '<' expr"
    p[0] = ('<', p[1], p[3])

def p_bool_atom_gt(p):
    "bool_atom : expr '>' expr"
    p[0] = ('>', p[1], p[3])

In [None]:
def p_expr_plus(p):
    "expr : expr '+' product"
    p[0] = ('+', p[1], p[3])
    
def p_expr_minus(p):
    "expr : expr '-' product"
    p[0] = ('-', p[1], p[3])

def p_expr_product(p):
    "expr : product"
    p[0] = p[1]
    
def p_product_times(p):
    "product : product '*' factor"
    p[0] = ('*', p[1], p[3])
    
def p_product_divide(p):
    "product : product '/' factor"
    p[0] = ('/', p[1], p[3])

def p_product_factor(p):
    "product : factor"
    p[0] = p[1]

def p_factor_paren(p):
    "factor : '(' expr ')'"
    p[0] = p[2]

def p_factor_number(p):
    "factor : NUMBER"
    p[0] = p[1]
    
def p_factor_string(p):
    "factor : STRING"
    p[0] = p[1]

def p_factor_identifier(p):
    "factor : IDENTIFIER"
    p[0] = p[1]

def p_factor_fct_call(p):
    "factor : IDENTIFIER '(' expr_list ')'"
    p[0] = ('call', p[1]) + p[3][1:]

In [None]:
def p_expr_list_empty(p):
    "expr_list : "
    p[0] = ('.',)
    
def p_expr_list_more(p):
    "expr_list : ne_expr_list"
    p[0] = p[1]

def p_ne_expr_list_one(p):
    "ne_expr_list : expr"
    p[0] = ('.', p[1]) 
    
def p_ne_expr_list_more(p):
    "ne_expr_list : expr ',' ne_expr_list"
    p[0] = ('.', p[1]) + p[3][1:] 

In [None]:
def p_error(t):
    column = find_column(t)
    if t:
        print(f'Syntax error at token "{t.value}" in line {t.lineno}, column {column}.')
    else:
        print('Syntax error at end of input.')

In [None]:
parser = yacc.yacc(write_tables=False, debug=True)

The parser shown above will transform the program `sum.sl` into the *nested tuple* stored in the file `sum.ast`.

In [None]:
%run AST2Dot.ipynb

The function `parse` takes a `file_name` as ist sole argument.  The file is read and parsed. 
The resulting parse tree is visualized using `graphviz`.  It is important to reset the
attribute `lineno` of the scanner, for otherwise error messages will not have the correct line numbers.

In [None]:
def parse(file_name):
    lexer.lineno = 1
    with open(file_name, 'r') as handle:
        program = handle.read() 
    ast = yacc.parse(program)
    print(ast)
    return tuple2dot(ast)

In [None]:
!cat Examples/factorial.sl

In [None]:
parse('Examples/factorial.sl')

In [None]:
from typing import TypeVar
NestedTuple = TypeVar('NestedTuple')
NestedTuple = int | str | tuple[NestedTuple, ...]
Number      = int | float

The class `ReturnValue` encapsulates a value that is to be returned.

In [None]:
class ReturnValue(Exception):
    def __init__(self, value):
        self.mValue = value

Below, we can see how we can catch an exception in Python.

In [None]:
try:
    raise ReturnValue(42)
except ReturnValue as result:
    print(result.mValue)

The type checker needs some forward declarations.

In [None]:
def execute(stmnt: NestedTuple, Values: dict[str, Number], Definitions: dict[str, NestedTuple]) -> None:
    return None # type: ignore

In [None]:
def evaluate(expr: NestedTuple, Values: dict[str, Number], Definitions: dict[str, NestedTuple]) -> Number:
    return None # type: ignore

The function `execute_tuple` takes three arguments:
- `Statement_List` is a list of statements,
- `Values` is a dictionary assigning floating point values to variable names.
- `Definitions` is a dictionary assigning function definitions to function names.
The function executes the statements in `Statement_List`.  If an assignment statement is executed, the dictionary `Values` is updated.

In [None]:
def execute_tuple(Statement_List: list[NestedTuple], 
                  Values: dict[str, Number], 
                  Definitions: dict[str, NestedTuple]) -> None:
    for stmnt in Statement_List:
        execute(stmnt, Values, Definitions)

The function `execute` takes three arguments:
- `stmnt` is a statement,
- `Values` is a dictionary assigning values to variable names.
- `Definitions` is a dictionary mapping function names to their definitions.
- 
The function executes the statements in `Statement_List`.  If an assignment statement is executed,
the dictionary `Values` is updated.

In [None]:
def execute(stmnt: NestedTuple, Values: dict[str, Number], Definitions: dict[str, NestedTuple]) -> None:
    match stmnt:
        case '.', *SL:
            execute_tuple(SL, Values, Definitions)
        case # function definions
             # your code here # type: ignore
        case ':=', var, value:
            Values[var] = evaluate(value, Values, Definitions)
        case 'expr', expr:
            evaluate(expr, Values, Definitions)
        case 'if', test, *SL:
            if evaluate(test, Values, Definitions):
                execute_tuple(SL, Values, Definitions)
        case 'while', test, *SL:
            while evaluate(test, Values, Definitions):
                execute_tuple(SL, Values, Definitions)
        case # for statement 
            # your code here
        case # return statement
            # your code here, try to raise an exception
        case 'call', _, *AL: # function calls are evaluated, not executed
            evaluate(stmnt, Values, Definitions)
        case _:
            assert False, f'{stmnt} unexpected'

In [None]:
import math

The function `evaluate` takes three arguments:
- `expr` is a logical expression or an arithmetic expression,
- `Values` is a dictionary assigning integer values to variable names.
- `Definitions` is a dictionary assigning function definitions to function names.
- 
The function evaluates the given expression and returns this value.

In [None]:
def evaluate(expr: NestedTuple, Values: dict[str, Number], Definitions: dict[str, NestedTuple]) -> Number:
    match expr:
        case int(number):
            return number
        case float(number):
            return number
        case str(name):
            if name[0] == '"' and name[-1] == '"':
                return name[1:-1] # type: ignore
            else:
                return Values[name] 
        case '||', lhs, rhs:
            return evaluate(lhs, Values, Definitions) or evaluate(rhs, Values, Definitions)
        case '&&', lhs, rhs:
            return evaluate(lhs, Values, Definitions) and evaluate(rhs, Values, Definitions)
        case '!', lhs:
            return not evaluate(lhs, Values, Definitions)
        case '==', lhs, rhs:
            return evaluate(lhs, Values, Definitions) == evaluate(rhs, Values, Definitions)
        case '!=', lhs, rhs:
            return evaluate(lhs, Values, Definitions) != evaluate(rhs, Values, Definitions)
        case '<', lhs, rhs:
            return evaluate(lhs, Values, Definitions) < evaluate(rhs, Values, Definitions)
        case '>', lhs, rhs:
            return evaluate(lhs, Values, Definitions) > evaluate(rhs, Values, Definitions)
        case '<=', lhs, rhs:
            return evaluate(lhs, Values, Definitions) <= evaluate(rhs, Values, Definitions)
        case '>=', lhs, rhs:
            return evaluate(lhs, Values, Definitions) >= evaluate(rhs, Values, Definitions)
        case '+', lhs, rhs:
            return evaluate(lhs, Values, Definitions) + evaluate(rhs, Values, Definitions)
        case '-', lhs, rhs:
            return evaluate(lhs, Values, Definitions) - evaluate(rhs, Values, Definitions)
        case '*', lhs, rhs:
            return evaluate(lhs, Values, Definitions) * evaluate(rhs, Values, Definitions)
        case '/', lhs, rhs:
            l = evaluate(lhs, Values, Definitions)
            r = evaluate(rhs, Values, Definitions) 
            if isinstance(l, float) or isinstance(r, float):
                return l / r
            else:
                return l // r
        case 'call', fct_name, *AL:
            if fct_name in Definitions:
                # your code here
                # lots of it!
                # remember that the function returns its value by raising an exception!
            
            
            
            
            else:
                match fct_name:
                    case 'read':
                        s = input('Please enter a number: ')
                        if '.' in s or 'e' in s or 'E' in s:
                            return float(s)
                        else:
                            return int(s)
                    case 'print':
                        print(evaluate(AL[0], Values, Definitions)) 
                    case 'sqrt':
                        return math.sqrt(evaluate(AL[0], Values, Definitions))
                    case 'exp':
                        return math.exp(evaluate(AL[0], Values, Definitions))
                    case 'ln':
                        return math.log(evaluate(AL[0], Values, Definitions))
                    case 'sin':
                        return math.sin(evaluate(AL[0], Values, Definitions))    
                    case 'cos':
                        return math.cos(evaluate(AL[0], Values, Definitions))    
                    case 'tan':
                        return math.tan(evaluate(AL[0], Values, Definitions))    
                    case 'arctan':
                        return math.atan(evaluate(AL[0], Values, Definitions))                        
                    case _:
                        assert False, f'function name {fct_name} is unknown'
        case _:
            assert False, f'{expr} unexpected' 
    return None # type: ignore

In [None]:
!cat Examples/factorial.sl

In [None]:
def main(file):
    with open(file, 'r') as handle:
        program = handle.read()
    print(program)
    lexer.lineno = 1
    ast = yacc.parse(program)
    print(ast)
    display(tuple2dot(ast))
    Values = {}
    Definitions = {}
    execute(ast, Values, Definitions)

In [None]:
main('Examples/factorial.sl')

In [None]:
main('Examples/solve.sl')

In [None]:
main('Examples/pi.sl')

In [None]:
main('Examples/sum-for.sl')

In [None]:
main('Examples/e.sl')

In [None]:
math.exp(1)