In [75]:
tests = [
    "add 1 to 2",
    "let x be 2\nlet y be 5\nadd x to y",
    "let x be 3",
    "does x = y ?",
    "let x be 'foo'\nlet y be 'foo'\ndoes x = y ?",
    "send 'hello world' to out",
]

In [90]:
from collections import defaultdict, namedtuple
from enum import Enum
from sys import stdout

class TokenType(Enum):
    IDENTIFIER = 1
    NUMERAL = 2
    STRING = 3
    KEYWORD = 4
    OPERATOR = 5
    SEPARATOR = 6
    COMMENT = 7

Token = namedtuple('Token', ['value', 'type'])
    
keywords = set([
    'let',
    'does',
    'send',
    'to',
    'be',
    'add',
])
operators = set([
    '=',
    '?',
])

memory = defaultdict(lambda:0)
memory['out'] = stdout
    
def token_type(t):
    if t[0].isdigit():
        return TokenType.NUMERAL
    elif t[0] == ',':
        return TokenType.SEPARATOR
    elif t in keywords:
        return TokenType.KEYWORD
    elif t in operators:
        return TokenType.OPERATOR
    return TokenType.IDENTIFIER

def tokenize(s):
    current_token = ''
    reading_string = False
    reading_comment = False
    out = []
    for char in s:
        if char == ' ' and not reading_string and not reading_comment:
            if len(current_token) > 0:
                out.append(Token(current_token, token_type(current_token)))
                current_token = ''
        elif char == '\'' and not reading_comment:
            if reading_string:
                out.append(Token(current_token, TokenType.STRING))
                current_token = ''
            reading_string = not reading_string
        elif char == '#':
            out.append(Token(current_token, token_type(current_token)))
            current_token = ''
            reading_comment = not reading_comment
        else:
            current_token += char
    if len(current_token) > 0:
        if reading_comment:
            out.append(Token(current_token, TokenType.COMMENT))
        else:
            out.append(Token(current_token, token_type(current_token)))
    return out

def get_value(t):
    if t.type == TokenType.NUMERAL:
        if '.' in t.value:
            return float(t.value)
        else:
            return int(t.value)
    elif t.type == TokenType.STRING:
        return t.value
    elif t.type == TokenType.IDENTIFIER:
        return memory[t.value]
    return t.value

def get_operation(t):
    if t.type != TokenType.OPERATOR:
        raise Exception('Not a valid operator')
    if t.value == '=':
        return lambda a, b: a == b
    raise NotImplementedError
    
def send(address, msg):
    address.write(msg)
    
def cmpl(program):
    lines = program.split('\n')
    results = []
    for line in lines:
        tokens = tokenize(line)
        if tokens[0].type != TokenType.KEYWORD:
            raise Exception('Not a valid expression (for now)')
        if tokens[0].value == 'add':
            a = get_value(tokens[1])
            b = None
            if tokens[2].type != TokenType.KEYWORD:
                b = get_value(tokens[2]) 
            else:
                b = get_value(tokens[3])
            results.append(a + b)
        elif tokens[0].value == 'let':
            identifier = tokens[1].value
            value = get_value(tokens[3])
            memory[identifier] = value
            results.append('Value %s stored in variable %s' % (str(value), identifier))
        elif tokens[0].value == 'does':
            a = get_value(tokens[1])
            op = get_operation(tokens[2])
            b = get_value(tokens[1])
            results.append(op(a, b))
        elif tokens[0].value == 'send':
            msg = get_value(tokens[1])
            address_idx = [i+1 for i in range(len(tokens)) if tokens[i].value == 'to'][0]
            address = get_value(tokens[address_idx])
            send(address, msg)
#         results.append(tokens)
    return results


# temporary rule: everything is an expression.
# the first token in every expression is going to be a keyword that determines what 
# the expression is supposed to do

In [91]:
[cmpl(t) for t in tests]

hello world

[[3],
 ['Value 2 stored in variable x', 'Value 5 stored in variable y', 7],
 ['Value 3 stored in variable x'],
 [True],
 ['Value foo stored in variable x', 'Value foo stored in variable y', True],
 []]

In [68]:
for t in tests:
    for l in t.split('\n'):
        print(l)
        print(tokenize(l))
    print()

add 1 to 2
add
1
to
2
[Token(value='add', type=<TokenType.KEYWORD: 4>), Token(value='1', type=<TokenType.NUMERAL: 2>), Token(value='to', type=<TokenType.KEYWORD: 4>), Token(value='2', type=<TokenType.NUMERAL: 2>)]

let x be 2
let
x
be
2
[Token(value='let', type=<TokenType.KEYWORD: 4>), Token(value='x', type=<TokenType.IDENTIFIER: 1>), Token(value='be', type=<TokenType.KEYWORD: 4>), Token(value='2', type=<TokenType.NUMERAL: 2>)]
let y be 5
let
y
be
5
[Token(value='let', type=<TokenType.KEYWORD: 4>), Token(value='y', type=<TokenType.IDENTIFIER: 1>), Token(value='be', type=<TokenType.KEYWORD: 4>), Token(value='5', type=<TokenType.NUMERAL: 2>)]
add x to y
add
x
to
y
[Token(value='add', type=<TokenType.KEYWORD: 4>), Token(value='x', type=<TokenType.IDENTIFIER: 1>), Token(value='to', type=<TokenType.KEYWORD: 4>), Token(value='y', type=<TokenType.IDENTIFIER: 1>)]

let x be 3
let
x
be
3
[Token(value='let', type=<TokenType.KEYWORD: 4>), Token(value='x', type=<TokenType.IDENTIFIER: 1>), Token(va