In [1]:
from parsec import *

In [2]:
possible_chars = letter() | space() |  one_of('/.,:"[]') | digit()
parser =  many(many(possible_chars) + string("<") >> mark(many(possible_chars)) << string(">"))

In [10]:
ex_str = "/"
parser.parse(ex_str)

[]

In [11]:
mystr = """
<kv>
  key1: "string"
  key2: 1.00005
  key3: [1,2,3]
</kv>
<csv>
date,windspeed,direction
20190805,22,NNW
20190805,23,NW
20190805,20,NE
</csv>"""

In [15]:
from parsec import *

spaces = regex(r'\s*', re.MULTILINE)
name = regex(r'[_a-zA-Z][_a-zA-Z0-9]*')

tag_start = spaces >> string('<') >> name << string('>') << spaces
tag_stop = spaces >> string('</') >> name << string('>') << spaces

@generate
def header_kv():
    key = yield spaces >> name << spaces
    yield string(':')
    value = yield spaces >> regex('[^\n]+')
    return {key: value}

@generate
def header():
    tag_name = yield tag_start
    values = yield sepBy(header_kv, string('\n'))
    tag_name_end = yield tag_stop
    assert tag_name == tag_name_end
    return {
        'type': 'tag',
        'name': tag_name,
        'values': values
    }

'''@generate
def body():
    tag_name = yield tag_start
    values = yield sepBy(sepBy1(regex(r'[^\n<,]+'), string(',')), string('\n'))
    tag_name_end = yield tag_stop
    assert tag_name == tag_name_end
    return {
        'type': 'tag',
        'name': tag_name,
        'values': values
    }'''

parser = header #+ body

In [17]:
parser.parse(mystr)

{'type': 'tag',
 'name': 'kv',
 'values': [{'key1': '"string"'}, {'key2': '1.00005'}, {'key3': '[1,2,3]'}]}

In [1]:
import re

from parsec import *
from parsec import 

# ignore 
whitespace = regex(r'\s+', re.MULTILINE)
comment = regex(r';.*')
ignore = many((whitespace | comment))


lexeme = lambda p: p << ignore  

lparen = lexeme(string('('))
rparen = lexeme(string(')'))
number = lexeme(regex(r'\d+')).parsecmap(int)
symbol = lexeme(regex(r'[\d\w_-]+'))
true = lexeme(string('#t')).result(True)

false = lexeme(string('#f')).result(False)

op = lexeme(regex(r'[\+\-*/]'))


atom = op | number | symbol | (true ^ false)


@generate('a form')
def form():
    '''Parse expression within a pair of parenthesis.'''
    yield lparen
    es = yield many(expr)
    yield rparen
    return es


@generate
def quote():
    '''Parse expression after a quote symbol.'''
    yield string("'")
    e = yield expr
    return ['quote', e]

expr = atom | form | quote

program = ignore >> many(expr)

SyntaxError: invalid syntax (<ipython-input-1-0d2114b58a94>, line 4)

In [28]:
program.parse('20+10;')

[20, '+', 10]

In [30]:
import parsec as psc
digit = psc.regex("[0-9]")

def number():
  @psc.Parser
  def number_parser(text, index):
    res = (digit + number())(text, index)
    return res if res.status else digit(text, index)
  return number_parser

_ = number().parse("42423")
print(_)

('4', ('2', ('4', ('2', '3'))))


In [2]:
from parsita import *

In [53]:
# Just a calculator parser

from parsy import digit, generate, match_item, regex, string, success, test_item


def lexer(code):
    
    whitespace = regex(r'\s*')
    semicolon = regex(r';')
    integer = digit.at_least(1).concat().map(int)
    var = regex(r'[a-zA-Z_][a-zA-Z0-9_]*')
    float_ = (
        digit.many() + string('.').result(['.']) + digit.many()
    ).concat().map(float)
    parser = whitespace >> ((
        float_ | integer  | regex(r'[()*/+-]') | var
    ) << whitespace).many()
    return parser.parse(code)


def eval_tokens(tokens):
    # This function parses and evaluates at the same time.

    lparen = match_item('(')
    rparen = match_item(')')
    env = {}
    
    
    @generate
    def assign():
        res = yield varss
        sign = match_item('=')
        while True:
            operation = yield sign | success('')
            if not operation:
                break
                
            operand = yield simple
            if operation == '=':
                env[res] = operand
            

    @generate
    def additive():
        res = yield multiplicative
        sign = match_item('+') | match_item('-')
        while True:
            operation = yield sign | success('')
            if not operation:
                break
            operand = yield multiplicative
            if operation == '+':
                res += operand
            elif operation == '-':
                res -= operand
        return res

    @generate
    def multiplicative():
        res = yield simple
        op = match_item('*') | match_item('/')
        while True:
            operation = yield op | success('')
            if not operation:
                break
            operand = yield simple
            if operation == '*':
                res *= operand
            elif operation == '/':
                res /= operand
        return res

    @generate
    def number():
        sign = yield match_item('+') | match_item('-') | success('+')
        value = yield test_item(
            lambda x: isinstance(x, (int, float)), 'number')
        return value if sign == '+' else -value
    
    @generate
    def var():
        value = yield test_item(lambda x: isinstance(x, (str)), 'var')
        return value

    expr = additive | multiplicative | assign
    simple = (lparen >> expr << rparen) | number | var
    varss = var

    return expr.parse(tokens)


def simple_eval(expr):
    return eval_tokens(lexer(expr))


if __name__ == '__main__':
    print(simple_eval(input()))

print(1+2)


ParseError: expected one of '*', '+', '-', '/', 'EOF' at 1

In [21]:
from sys import stdin

from parsy import generate, regex, string

whitespace = regex(r'\s*')
lexeme = lambda p: p << whitespace
lbrace = lexeme(string('{'))
rbrace = lexeme(string('}'))
lbrack = lexeme(string('['))
rbrack = lexeme(string(']'))
colon  = lexeme(string(':'))
comma  = lexeme(string(','))
true   = lexeme(string('true')).result(True)
false  = lexeme(string('false')).result(False)
null   = lexeme(string('null')).result(None)
number = lexeme(
    regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')
).map(float)
string_part = regex(r'[^"\\]+')
string_esc = string('\\') >> (
    string('\\')
    | string('/')
    | string('"')
    | string('b').result('\b')
    | string('f').result('\f')
    | string('n').result('\n')
    | string('r').result('\r')
    | string('t').result('\t')
    | regex(r'u[0-9a-fA-F]{4}').map(lambda s: chr(int(s[1:], 16)))
)
quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))


# Circular dependency between array and value means we use `generate` form here
@generate
def array():
    yield lbrack
    elements = yield value.sep_by(comma)
    yield rbrack
    return elements


@generate
def object_pair():
    key = yield quoted
    yield colon
    val = yield value
    return (key, val)


json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
value = quoted | number | json_object | array | true | false | null
json = whitespace >> value

if __name__ == '__main__':
    mystr = '{"text" : "Hello world"}'
    print(repr(json.parse(mystr)))

{'text': 'Hello world'}


In [3]:
from sly import Lexer


class LangLexer(Lexer):

    # lexer

    # main tokens - ID, keywords, data types, some funcs, like TYPEOF

    tokens = {ID, INT, FLOAT, ASSIGN, STRING, LET,
              IF, ELSE, EQEQ, SEP, NOTEQ, LESS,
              GREATER, LESSEQ, GREATEREQ, NIL, WHILE,
              FOR, FN, RETURN, LAMBDA, ARROW, TRUE, FALSE,
              AND, OR, SHR, SHL, INC, DEC, PLUSASGN,
              MINUSASGN, STARASGN, SLASHASGN, MODULOASGN,
              ANDASGN, ORASGN, XORASGN, SHLASGN, SHRASGN,
              IMPORT, STRUCT, INT_TYPE, FLOAT_TYPE, BOOL_TYPE,
              LIST_TYPE, DICT_TYPE, STRING_TYPE, TYPEOF,
              LEFTARROW, PIPE, CLASS, DOUBLECOLON}

    # ignore tabs and comments

    ignore = ' \t'
    ignore_comment_slash = r'//.*'

    # one-symbol literals

    literals = {'=', '+', '-', '/', '*',
                '(', ')', ',', '{', '}',
                '%', '[', ']', '!', '&',
                '|', '^', '?', ':', '~',
                '.'}

    # assign of logical and assign operators

    INC = r'\+\+'
    DEC = r'--'
    PIPE = r'\|>'
    PLUSASGN = r'\+='
    MINUSASGN = r'-='
    STARASGN = r'\*='
    SLASHASGN = r'/='
    MODULOASGN = r'%='
    ANDASGN = r'&='
    ORASGN = r'\|='
    XORASGN = r'^='
    SHLASGN = r'<<='
    SHRASGN = r'>>='
    ARROW = r'=>'
    LESSEQ = r'<='
    GREATEREQ = r'>='
    LEFTARROW = r'<-'
    SHR = r'>>'
    SHL = r'<<'
    LESS = r'<'
    GREATER = r'>'
    NOTEQ = r'!='
    EQEQ = r'=='
    ASSIGN = r'='
    SEP = r';'
    DOUBLECOLON = r'::'

    # keywords

    ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
    ID['let'] = LET
    ID['if'] = IF
    ID['else'] = ELSE
    ID['nil'] = NIL
    ID['while'] = WHILE
    ID['for'] = FOR
    ID['fn'] = FN
    ID['return'] = RETURN
    ID['lambda'] = LAMBDA
    ID['true'] = TRUE
    ID['false'] = FALSE
    ID['and'] = AND
    ID['or'] = OR
    ID['import'] = IMPORT
    ID['struct'] = STRUCT
    ID['int'] = INT_TYPE
    ID['float'] = FLOAT_TYPE
    ID['string'] = STRING_TYPE
    ID['bool'] = BOOL_TYPE
    ID['list'] = LIST_TYPE
    ID['dict'] = DICT_TYPE
    ID['typeof'] = TYPEOF
    ID['class'] = CLASS

    @_(r'\d+\.\d+')
    def FLOAT(self, t):
        # float numbers
        t.value = float(t.value)
        return t

    @_(r'\d+')
    def INT(self, t):
        # integer numbers
        t.value = int(t.value)
        return t

    @_(r'\".*?(?<!\\)(\\\\)*\"')
    def STRING(self, t):
        # parsing the string type and cleaning it
        t.value = t.value[1:-1]
        t.value = t.value.replace(r"\n", "\n")
        t.value = t.value.replace(r"\t", "\t")
        t.value = t.value.replace(r"\\", "\\")
        t.value = t.value.replace(r"\"", "\"")
        t.value = t.value.replace(r"\a", "\a")
        t.value = t.value.replace(r"\b", "\b")
        t.value = t.value.replace(r"\r", "\r")
        t.value = t.value.replace(r"\t", "\t")
        t.value = t.value.replace(r"\v", "\v")
        return t

    @_(r'\n+')
    def ignore_newline(self, t):
        self.lineno += len(t.value)

    def error(self, t):
        print("Illegal character '%s' on line %d" % (t.value[0], self.lineno))
        self.index += 1

In [17]:
from parsy import digit, generate, match_item, regex, string, success, test_item


def lexer(code):
    whitespace = regex(r'\s*')
    semicolon = regex(r';')
    integer = digit.at_least(1).concat().map(int)
    float_ = (
        digit.many() + string('.').result(['.']) + digit.many()
    ).concat().map(float)
    parser = whitespace >> ((
        float_ | integer  | regex(r'[()*/+-]')
    ) << whitespace).many()
    return parser.parse(code)


def eval_tokens(tokens):
    # This function parses and evaluates at the same time.

    lparen = match_item('(')
    rparen = match_item(')')

    @generate
    def additive():
        res = yield multiplicative
        print(multiplicative)
        sign = match_item('+') | match_item('-')
        while True:
            operation = yield sign | success('')
            if not operation:
                break
            operand = yield multiplicative
            if operation == '+':
                return ('+', res, operand)
            elif operation == '-':
                return ('-', res, operand)

    @generate
    def multiplicative():
        res = yield simple
        op = match_item('*') | match_item('/')
        while True:
            operation = yield op | success('')
            if not operation:
                break
            operand = yield simple
            if operation == '*':
                return ('*', res, operand)
            elif operation == '-':
                return ('/', res, operand)

    @generate
    def number():
        sign = yield match_item('+') | match_item('-') | success('+')
        value = yield test_item(
            lambda x: isinstance(x, (int, float)), 'number')
        return value if sign == '+' else -value

    expr = additive
    simple = (lparen >> expr << rparen) | number

    return expr.parse(tokens)


def simple_eval(expr):
    print(lexer(expr))
    return eval_tokens(lexer(expr))


if __name__ == '__main__':
    print(simple_eval(input()))

1 + 2
[1, '+', 2]
<parsy.Parser object at 0x7f1cf1ebac50>
('+', None, None)


In [12]:
lex = LangLexer()
expr = "(2-3)*14;"

toks = lex.tokenize(expr)

class CombParser:
    def parse_input_tokens(self,tokens):
        list_of_tokens = [tok.value for tok in tokens]
        print(list_of_tokens)
    
a = CombParser()
a.parse_input_tokens(toks)

['(', 2, '-', 3, ')', '*', 14, ';']
