In [1]:
# write raw rbnf source code.
from rbnf.easy import build_parser, Language, build_language
from typing import NamedTuple, List

my_lisp_definition = """
ignore [space]
space := R'\s'
term  := R'[^\(\)\s]'
sexpr ::= '(' [sexpr as head sexpr* as tail] ')' | term as atom
          rewrite
            if atom:
                return atom 
            if not head:
                return ()
            return SExpr(head, tail)
"""


class SExpr(NamedTuple):
    head: 'SExpr'
    tail: List['SExpr']


lisp = Language('lisp')

lisp.namespace['SExpr'] = SExpr

build_language(my_lisp_definition, lisp, '<tutorials>')

parse = build_parser(lisp)

parse("(+ 1 (* 2 3))").result


SExpr(head=Tokenizer(name='term', value='+', lineno=0, colno=1), tail=[Tokenizer(name='term', value='1', lineno=0, colno=3), SExpr(head=Tokenizer(name='term', value='*', lineno=0, colno=6), tail=[Tokenizer(name='term', value='2', lineno=0, colno=8), Tokenizer(name='term', value='3', lineno=0, colno=10)])])

In [3]:
# Use Python to write rbnf
from rbnf.easy import Parser, Lexer, Language, auto_context, C, build_parser
from rbnf.core.State import State
from rbnf.core.Tokenizer import Tokenizer
from typing import NamedTuple, List, Optional

lisp = Language('lisp')
lisp.ignore('space')


@lisp
class space(Lexer):
    @staticmethod
    def regex():
        return '\s'


@lisp
class term(Lexer):
    @staticmethod
    def regex():
        return '[^\(\)\s]'


@lisp
class sexpr(Parser):
    @staticmethod
    def bnf():
        return C('(') + (sexpr @ "head" + sexpr.unlimited @ "tail").optional + C(")") | term @ "term"
    @staticmethod
    @auto_context
    def rewrite(state: State):
        head: Optional[SExpr]
        tail: Optional[List[SExpr]]
        term: Optional[Tokenizer]
        if term:
            return term
        return SExpr(head, tail)


class SExpr(NamedTuple):
    head: 'SExpr'
    tail: List['SExpr']


lisp.build()
parse = build_parser(lisp, use_parser='sexpr')
parse("(+ 1 (* 2 3))").result

SExpr(head=Tokenizer(name='term', value='+', lineno=0, colno=1), tail=[Tokenizer(name='term', value='1', lineno=0, colno=3), SExpr(head=Tokenizer(name='term', value='*', lineno=0, colno=6), tail=[Tokenizer(name='term', value='2', lineno=0, colno=8), Tokenizer(name='term', value='3', lineno=0, colno=10)])])

In [4]:

# About literal parsers
from rbnf.core.Tokenizer import Tokenizer
from rbnf.core.CachingPool import ConstStrPool
from rbnf.core.State import State 
from rbnf.easy import N, NC, C, R, V 

const_value = ConstStrPool.cast_to_const('<value>')
name = ConstStrPool.cast_to_const('<name>')

tokens0 = [Tokenizer('<name>x'[:-1], const_value, lineno=0, colno=0)]
tokens1 = [Tokenizer(name, '<value>x'[:-1], lineno=0, colno=0)]
tokens2 = [Tokenizer(name, const_value, lineno=0, colno=0)]

make_state = lambda : State({}, '<playground>')

In [21]:
name_parser = N('<name>')

print('parse tokens0: ', name_parser.match(tokens0, make_state()))
print('parse tokens1:', name_parser.match(tokens1, make_state()))
print('parse tokens2:', name_parser.match(tokens2, make_state()))


parse tokens0:  [31mUnmatched[39m
parse tokens1: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m
parse tokens2: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m


In [20]:
value_parser = V('<value>')

print('parse tokens0: ', value_parser.match(tokens0, make_state()))
print('parse tokens1:', value_parser.match(tokens1, make_state()))
print('parse tokens2:', value_parser.match(tokens2, make_state()))

parse tokens0:  [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m
parse tokens1: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m
parse tokens2: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m


In [19]:
value_parser = C('<value>')

print('parse tokens0: ', value_parser.match(tokens0, make_state()))
print('parse tokens1:', value_parser.match(tokens1, make_state()))
print('parse tokens2:', value_parser.match(tokens2, make_state()))

parse tokens0:  [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m
parse tokens1: [31mUnmatched[39m
parse tokens2: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m


In [18]:
regex_parser = R("\S+")  
# Not recommend to use regex parser when parsing. 
# Lexers use regex to tokenize raw input into tokenizers 
#   with constant names which could be compared by memory address
print('parse tokens0: ', regex_parser.match(tokens0, make_state()))
print('parse tokens1:', regex_parser.match(tokens1, make_state()))
print('parse tokens2:', regex_parser.match(tokens2, make_state()))


parse tokens0:  [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m
parse tokens1: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m
parse tokens2: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m


In [22]:
name_and_value = NC(name, const_value)

print('parse tokens0: ', name_and_value.match(tokens0, make_state()))
print('parse tokens1:', name_and_value.match(tokens1, make_state()))
print('parse tokens2:', name_and_value.match(tokens2, make_state()))


parse tokens0:  [31mUnmatched[39m
parse tokens1: [31mUnmatched[39m
parse tokens2: [32mTokenizer(name='<name>', value='<value>', lineno=0, colno=0)[39m


In [23]:
# TO BE CONTINUE