In [5]:
json_text = '''{
    "glossary": {
        "title": "example glossary",
		"GlossDiv": {
            "title": "S",
			"GlossList": {
                "GlossEntry": {
                    "ID": "SGML",
					"SortAs": "SGML",
					"GlossTerm": "Standard Generalized Markup Language",
					"Acronym": "SGML",
					"Abbrev": "ISO 8879:1986",
					"GlossDef": {
                        "para": "A meta-markup language, used to create markup languages such as DocBook.",
						"GlossSeeAlso": ["GML", "XML"]
                    },
					"GlossSee": "markup"
                }
            }
        }
    }
}'''

In [6]:
from sys import stdin

from parsy import generate, regex, string

whitespace = regex(r'\s*') #

lexeme = lambda p: p << whitespace
lbrace = lexeme(string('{'))
rbrace = lexeme(string('}'))
lbrack = lexeme(string('['))
rbrack = lexeme(string(']'))
colon  = lexeme(string(':'))
comma  = lexeme(string(','))
true   = lexeme(string('true')).result(True)
false  = lexeme(string('false')).result(False)
null   = lexeme(string('null')).result(None)

number = lexeme(
    regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')
).map(float)

string_part = regex(r'[^"\\]+')

string_esc = string('\\') >> (
    string('\\')
    | string('/')
    | string('"')
    | string('b').result('\b')
    | string('f').result('\f')
    | string('n').result('\n')
    | string('r').result('\r')
    | string('t').result('\t')
    | regex(r'u[0-9a-fA-F]{4}').map(lambda s: chr(int(s[1:], 16)))
)

quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))


# Circular dependency between array and value means we use `generate` form here
@generate
def array():
    yield lbrack
    elements = yield value.sep_by(comma)
    yield rbrack
    return elements


@generate
def object_pair():
    key = yield quoted
    yield colon
    val = yield value
    return (key, val)


json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
value = quoted | number | json_object | array | true | false | null
json = whitespace >> value


print(repr(json.parse(json_text)))

{'glossary': {'title': 'example glossary', 'GlossDiv': {'title': 'S', 'GlossList': {'GlossEntry': {'ID': 'SGML', 'SortAs': 'SGML', 'GlossTerm': 'Standard Generalized Markup Language', 'Acronym': 'SGML', 'Abbrev': 'ISO 8879:1986', 'GlossDef': {'para': 'A meta-markup language, used to create markup languages such as DocBook.', 'GlossSeeAlso': ['GML', 'XML']}, 'GlossSee': 'markup'}}}}}


In [9]:
from sly import Lexer


class LangLexer(Lexer):

    # lexer

    # main tokens - ID, keywords, data types, some funcs, like TYPEOF

    tokens = {ID, INT, FLOAT, ASSIGN, STRING, LET,
              IF, ELSE, EQEQ, SEP, NOTEQ, LESS,
              GREATER, LESSEQ, GREATEREQ, NIL, WHILE,
              FOR, FN, RETURN, LAMBDA, ARROW, TRUE, FALSE,
              AND, OR, SHR, SHL, INC, DEC, PLUSASGN,
              MINUSASGN, STARASGN, SLASHASGN, MODULOASGN,
              ANDASGN, ORASGN, XORASGN, SHLASGN, SHRASGN,
              IMPORT, STRUCT, INT_TYPE, FLOAT_TYPE, BOOL_TYPE,
              LIST_TYPE, DICT_TYPE, STRING_TYPE, TYPEOF,
              LEFTARROW, PIPE, CLASS, DOUBLECOLON}

    # ignore tabs and comments

    ignore = ' \t'
    ignore_comment_slash = r'//.*'

    # one-symbol literals

    literals = {'=', '+', '-', '/', '*',
                '(', ')', ',', '{', '}',
                '%', '[', ']', '!', '&',
                '|', '^', '?', ':', '~',
                '.'}

    # assign of logical and assign operators

    INC = r'\+\+'
    DEC = r'--'
    PIPE = r'\|>'
    PLUSASGN = r'\+='
    MINUSASGN = r'-='
    STARASGN = r'\*='
    SLASHASGN = r'/='
    MODULOASGN = r'%='
    ANDASGN = r'&='
    ORASGN = r'\|='
    XORASGN = r'^='
    SHLASGN = r'<<='
    SHRASGN = r'>>='
    ARROW = r'=>'
    LESSEQ = r'<='
    GREATEREQ = r'>='
    LEFTARROW = r'<-'
    SHR = r'>>'
    SHL = r'<<'
    LESS = r'<'
    GREATER = r'>'
    NOTEQ = r'!='
    EQEQ = r'=='
    ASSIGN = r'='
    SEP = r';'
    DOUBLECOLON = r'::'

    # keywords

    ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
    ID['let'] = LET
    ID['if'] = IF
    ID['else'] = ELSE
    ID['nil'] = NIL
    ID['while'] = WHILE
    ID['for'] = FOR
    ID['fn'] = FN
    ID['return'] = RETURN
    ID['lambda'] = LAMBDA
    ID['true'] = TRUE
    ID['false'] = FALSE
    ID['and'] = AND
    ID['or'] = OR
    ID['import'] = IMPORT
    ID['struct'] = STRUCT
    ID['int'] = INT_TYPE
    ID['float'] = FLOAT_TYPE
    ID['string'] = STRING_TYPE
    ID['bool'] = BOOL_TYPE
    ID['list'] = LIST_TYPE
    ID['dict'] = DICT_TYPE
    ID['typeof'] = TYPEOF
    ID['class'] = CLASS

    @_(r'\d+\.\d+')
    def FLOAT(self, t):
        # float numbers
        t.value = float(t.value)
        return t

    @_(r'\d+')
    def INT(self, t):
        # integer numbers
        t.value = int(t.value)
        return t

    @_(r'\".*?(?<!\\)(\\\\)*\"')
    def STRING(self, t):
        # parsing the string type and cleaning it
        t.value = t.value[1:-1]
        t.value = t.value.replace(r"\n", "\n")
        t.value = t.value.replace(r"\t", "\t")
        t.value = t.value.replace(r"\\", "\\")
        t.value = t.value.replace(r"\"", "\"")
        t.value = t.value.replace(r"\a", "\a")
        t.value = t.value.replace(r"\b", "\b")
        t.value = t.value.replace(r"\r", "\r")
        t.value = t.value.replace(r"\t", "\t")
        t.value = t.value.replace(r"\v", "\v")
        return t

    @_(r'\n+')
    def ignore_newline(self, t):
        self.lineno += len(t.value)

    def error(self, t):
        print("Illegal character '%s' on line %d" % (t.value[0], self.lineno))
        self.index += 1

In [11]:
lex = LangLexer()


from parsy import digit, generate, match_item, regex, string, success, test_item


'''def lexer(code):
    
    whitespace = regex(r'\s*')
    integer = digit.at_least(1).concat().map(int)
    float_ = (
        digit.many() + string('.').result(['.']) + digit.many()
    ).concat().map(float)
    parser = whitespace >> ((
        float_ | integer  | regex(r'[()*/+-]')
    ) << whitespace).many()
    return parser.parse(code)'''


def eval_tokens(tokens):

    lparen = match_item('(')
    rparen = match_item(')')

    @generate
    def additive():
        res = yield multiplicative
        sign = match_item('+') | match_item('-')
        while True:
            operation = yield sign | success('')
            if not operation:
                break
            operand = yield multiplicative
            if operation == '+':
                res += operand
            elif operation == '-':
                res -= operand
        return res

    @generate
    def multiplicative():
        res = yield simple
        op = match_item('*') | match_item('/')
        while True:
            operation = yield op | success('')
            if not operation:
                break
            operand = yield simple
            if operation == '*':
                res *= operand
            elif operation == '/':
                res /= operand
        return res

    @generate
    def number():
        sign = yield match_item('+') | match_item('-') | success('+')
        value = yield test_item(
            lambda x: isinstance(x, (int, float)), 'number')
        return value if sign == '+' else -value

    expr = additive
    simple = (lparen >> expr << rparen) | number

    return expr.parse(tokens)


def simple_eval(expr):
    text = lex.tokenize(expr)
    toks = [token.value for token in text]
    return eval_tokens(toks)


if __name__ == '__main__':
    print(simple_eval(input()))

123
123


In [19]:
class CsParser:

    def eval_tokens(self, tokens):

        lparen = match_item('(')
        rparen = match_item(')')

        @generate
        def additive():
            res = yield multiplicative
            sign = match_item('+') | match_item('-')
            while True:
                operation = yield sign | success('')
                if not operation:
                    break
                operand = yield multiplicative
                if operation == '+':
                    res += operand
                elif operation == '-':
                    res -= operand
            return res

        @generate
        def multiplicative():
            res = yield simple
            op = match_item('*') | match_item('/')
            while True:
                operation = yield op | success('')
                if not operation:
                    break
                operand = yield simple
                if operation == '*':
                    res *= operand
                elif operation == '/':
                    res /= operand
            return res

        @generate
        def number():
            sign = yield match_item('+') | match_item('-') | success('+')
            value = yield test_item(
                lambda x: isinstance(x, (int, float)), 'number')
            return value if sign == '+' else -value

        expr = additive | multiplicative
        simple = (lparen >> expr << rparen) | number

        return expr.parse(tokens)


    def simple_eval(self, expr, lex):
        text = lex.tokenize(expr)
        toks = [token.value for token in text]
        print(toks)
        return self.eval_tokens(toks)

In [20]:
lex = LangLexer()
expression = '(1 + 2) / (3 + 7)'
cs_parser = CsParser()
cs_parser.simple_eval(expression, lex)

['(', 1, '+', 2, ')', '/', '(', 3, '+', 7, ')']


0.3

In [14]:
class ExpParser:

    def eval_tokens(self, tokens):

        lparen = match_item('(')
        rparen = match_item(')')

        @generate
        def additive():
            res = yield multiplicative
            sign = match_item('+') | match_item('-')
            while True:
                operation = yield sign | success('')
                if not operation:
                    break
                operand = yield multiplicative
                if operation == '+':
                    return ('-', res, operand)
                elif operation == '-':
                    return ('-', res, operand)

        @generate
        def multiplicative():
            res = yield simple
            op = match_item('*') | match_item('/')
            while True:
                operation = yield op | success('')
                if not operation:
                    break
                operand = yield simple
                if operation == '*':
                    return ('*', res, operand)
                elif operation == '/':
                    return ('/', res, operand)

        @generate
        def number():
            sign = yield match_item('+') | match_item('-') | success('+')
            value = yield test_item(
                lambda x: isinstance(x, (int, float)), 'number')
            return value if sign == '+' else -value

        expr = additive
        simple = (lparen >> expr << rparen) | number

        return expr.parse(tokens)


    def simple_eval(self, expr, lex):
        text = lex.tokenize(expr)
        toks = [token.value for token in text]
        print(toks)
        return self.eval_tokens(toks)

In [18]:
lex = LangLexer()
expression = '(1 + 2) / (3 + 7)'
exp_parser = ExpParser()
a = exp_parser.simple_eval(expression, lex)

['(', 1, '+', 2, ')', '/', '(', 3, '+', 7, ')']


In [37]:
import attr