In [8]:
import os
import re
from typing import NamedTuple
from dataclasses import dataclass

In [9]:
with open("./sample.mb", "r") as f:
    sample = f.read()
print(sample)

PRINT "HELLO THERE!";
LET A = 10;
LET B = 5;
LET C = -3;
LET Z = (A*B) + C;
PRINT "ANSWER IS {Z}";



In [47]:
@dataclass
class Token:
    type: str
    value: str
        
@dataclass
class PrintNode:
    name = 'PrintNode'
    children: list

@dataclass
class LetNode:
    name = "LetNode"
    children: list

@dataclass
class OperatorNode:
    name = 'OperatorNode'
    value: str

@dataclass
class SeparatorNode:
    name = 'SeparatorNode'
    value: str

@dataclass
class LiteralNode:
    name = 'LiteralNode'
    value: str
    
@dataclass
class IdentifierNode:
    name = 'IdentifierNode'
    value: str

In [48]:
keywords = ["LET", "PRINT", "AND", "OR", "NOT", "IF", "ELSE", "GOTO"]
separator = ['[', ']', '(', ')', '{', '}', ';', ',', '"']
operator = ['+', '-', '*', '/', '%', '=', '!=', '>', '<', '==', '<=', '>=']
tokens = []
variables = set()
count = 0
lines = sample.split(';')[:-1]

def parse_string(string):
    tokens.append(Token("LITERAL", string[1:-1]))

def parse_expression(expression):
    for e in expression:
        if e in separator:
            tokens.append(Token("SEPARATOR", e))
        elif e in operator:
            tokens.append(Token("OPERATOR", e))
        elif (e.isalpha()) and (e in variables):
            tokens.append(Token('IDENTIFIER', e))
        elif e.lstrip('-').isnumeric():
            tokens.append(Token("LITERAL", e))
        elif len(e.split('.')) == 2:
            if (e.split('.')[0].lstrip('-').isnumeric()) and (e.split('.')[1].lstrip('-').isnumeric()):
                tokens.append(Token("LITERAL", e))
        else:
            if (e.isalpha()) and not(e in variables):
                raise Exception(f"Variable {e} not defined")
            else:
                raise Exception("Something wrong in the expression")


def parse_print(string):
    assert string[0] == string[-1] == '"', "Error in PRINT. Missing \""
    if not(('{' in string) or ('}' in string)):
        parse_string(string)
    else:
        # strings with vars in a PRINT statement
        string = string[1:-1]
        start = 0
        for i, char in enumerate(string):
            if char == "{":
                if len(string[start:i]):
                    tokens.append(Token("LITERAL", string[start:i]))
                start = i + 1
                tokens.append(Token("SEPARATOR", "{"))
            elif char == "}":
                parse_expression(string[start:i])
                tokens.append(Token("SEPARATOR", "}"))
                start = i + 1
        
def parse_let(expression):
    expression = expression.split('=')
    assert expression[0].isalpha(), "Invalid Variable name"
    if expression[0].isalpha():
        variables.add(expression[0])
        tokens.append(Token('IDENTIFIER', expression[0]))
        tokens.append(Token('OPERATOR', '='))
    if expression[-1].lstrip('-').isnumeric():
        tokens.append(Token("LITERAL", expression[-1]))
    else:
        try:
            if float(expression[-1]):
                tokens.append(Token("LITERAL", expression[-1]))
        except ValueError:
            parse_expression(expression[-1])
            #tokens.append(Token("EXPRESSION", expression[-1]))   

for line in lines:
    line = line.replace('\n', '')
    chunks = line.split(' ')
    if chunks[0] == "PRINT":
        tokens.append(Token("KEYWORD", chunks[0]))
        parse_print(' '.join(chunks[1:]))
    if chunks[0] == "LET":
        tokens.append(Token("KEYWORD", chunks[0]))
        parse_let(''.join(chunks[1:]))
    tokens.append(Token("SEPARATOR", ';'))

In [49]:
tokens

[Token(type='KEYWORD', value='PRINT'),
 Token(type='LITERAL', value='HELLO THERE!'),
 Token(type='SEPARATOR', value=';'),
 Token(type='KEYWORD', value='LET'),
 Token(type='IDENTIFIER', value='A'),
 Token(type='OPERATOR', value='='),
 Token(type='LITERAL', value='10'),
 Token(type='SEPARATOR', value=';'),
 Token(type='KEYWORD', value='LET'),
 Token(type='IDENTIFIER', value='B'),
 Token(type='OPERATOR', value='='),
 Token(type='LITERAL', value='5'),
 Token(type='SEPARATOR', value=';'),
 Token(type='KEYWORD', value='LET'),
 Token(type='IDENTIFIER', value='C'),
 Token(type='OPERATOR', value='='),
 Token(type='LITERAL', value='-3'),
 Token(type='SEPARATOR', value=';'),
 Token(type='KEYWORD', value='LET'),
 Token(type='IDENTIFIER', value='Z'),
 Token(type='OPERATOR', value='='),
 Token(type='SEPARATOR', value='('),
 Token(type='IDENTIFIER', value='A'),
 Token(type='OPERATOR', value='*'),
 Token(type='IDENTIFIER', value='B'),
 Token(type='SEPARATOR', value=')'),
 Token(type='OPERATOR', value=

In [54]:
ast = []
i = 0
while i < len(tokens):
    if tokens[i].type == 'KEYWORD':
        if tokens[i].value =='PRINT':
            ast.append(PrintNode([]))
            i+=1
            while tokens[i].value != ';':
                if tokens[i].type == 'LITERAL':
                    ast[-1].children.append(LiteralNode(value = tokens[i].value))
                if tokens[i].type == 'IDENTIFIER':
                    ast[-1].children.append(IdentifierNode(value = tokens[i].value))
                if tokens[i].type == 'SEPARATOR':
                    ast[-1].children.append(SeparatorNode(value = tokens[i].value))
                i+=1
        if tokens[i].value == 'LET':
            ast.append(LetNode([]))
            i+=1
            while tokens[i].value != ';':
                if tokens[i].type == 'IDENTIFIER':
                    ast[-1].children.append(IdentifierNode(value = tokens[i].value))
                if tokens[i].type == 'OPERATOR':
                    ast[-1].children.append(OperatorNode(value = tokens[i].value))
                if tokens[i].type == 'SEPARATOR':
                    ast[-1].children.append(SeparatorNode(value = tokens[i].value))
                if tokens[i].type == 'LITERAL':
                    ast[-1].children.append(LiteralNode(value = tokens[i].value))
                i+=1
    i+=1

In [55]:
ast

[PrintNode(children=[LiteralNode(value='HELLO THERE!')]),
 LetNode(children=[IdentifierNode(value='A'), OperatorNode(value='='), LiteralNode(value='10')]),
 LetNode(children=[IdentifierNode(value='B'), OperatorNode(value='='), LiteralNode(value='5')]),
 LetNode(children=[IdentifierNode(value='C'), OperatorNode(value='='), LiteralNode(value='-3')]),
 LetNode(children=[IdentifierNode(value='Z'), OperatorNode(value='='), SeparatorNode(value='('), IdentifierNode(value='A'), OperatorNode(value='*'), IdentifierNode(value='B'), SeparatorNode(value=')'), OperatorNode(value='+'), IdentifierNode(value='C')]),
 PrintNode(children=[LiteralNode(value='ANSWER IS '), SeparatorNode(value='{'), IdentifierNode(value='Z'), SeparatorNode(value='}')])]

In [56]:
for node in ast:
    print(node.name)
    print('\t└── ', *node.children)

PrintNode
	└──  LiteralNode(value='HELLO THERE!')
LetNode
	└──  IdentifierNode(value='A') OperatorNode(value='=') LiteralNode(value='10')
LetNode
	└──  IdentifierNode(value='B') OperatorNode(value='=') LiteralNode(value='5')
LetNode
	└──  IdentifierNode(value='C') OperatorNode(value='=') LiteralNode(value='-3')
LetNode
	└──  IdentifierNode(value='Z') OperatorNode(value='=') SeparatorNode(value='(') IdentifierNode(value='A') OperatorNode(value='*') IdentifierNode(value='B') SeparatorNode(value=')') OperatorNode(value='+') IdentifierNode(value='C')
PrintNode
	└──  LiteralNode(value='ANSWER IS ') SeparatorNode(value='{') IdentifierNode(value='Z') SeparatorNode(value='}')


In [59]:
generated_code = ''
#add " to the in the tokens generated. add syntax check in astfor node in ast:
for node in ast:
    if node.name == 'PrintNode':
        generated_code += f'print(f"{"".join([child.value for child in node.children])}")\n'
    if node.name == 'LetNode':
        generated_code += ''.join([child.value for child in node.children])+"\n"

In [61]:
print(generated_code)

print(f"HELLO THERE!")
A=10
B=5
C=-3
Z=(A*B)+C
print(f"ANSWER IS {Z}")



In [62]:
exec(generated_code)

HELLO THERE!
ANSWER IS 47
