In [1]:
%pip install sly

Note: you may need to restart the kernel to use updated packages.


In [4]:
from sly import Lexer
from sly import Parser


In [261]:
class CalcLexer(Lexer):
    # Set of token names. This is always required
    tokens = {
        'ID', 'EQUALS', 'LPAREN', 'RPAREN', 'COMMA', 'PLUS', 'DOT','LBRACKET', 'RBRACKET', 'TERNARY', 'NEW_LINE', 'COLON','AMPERSAND', 'EXCLAMATION', 'LBRACE', 'RBRACE'
    }
    
    # String containing ignored characters between tokens
    ignore = ' \t'

    COMMAND_ID = r'c\d+'
    OUTPUT_ID = r'y\d+'
    PARAMETER_Id=r'p\d+'
    ASYNC_OP_ID=r'a\d+' 
    # Regular expression rules for tokens
    ID      = r'[a-zA-Z_][a-zA-Z0-9_]*'
    EQUALS  = r'='
    LPAREN  = r'\('
    RPAREN  = r'\)'
    COMMA   = r','
    PLUS    = r'\+'
    DOT     = r'\.'
    LBRACKET = r'\['
    RBRACKET = r'\]'
    LBRACE = r'\{'
    RBRACE = r'\}'
    TERNARY  = r'\?'
    NEW_LINE = r'\n'
    COLON = r':'
    AMPERSAND = r'&'
    EXCLAMATION=r'!'
    

In [283]:
class Node:
    def __init__(self, type_, children=None, value=None):
        self.type = type_
        self.children = children
        self.value = value


In [275]:

class CalcParser(Parser):
    tokens = CalcLexer.tokens

    precedence = (
        ('left', 'PLUS'),
    )

    def __init__(self):
        self.names = {}


    @_('body')
    def start(self, p):
        return p.body
    
    @_('mapping_list NEW_LINE expression_list')
    def body(self,p):
        return Node('body', children=[p.mapping_list, p.expression_list])
    
    @_('mapping')
    def mapping_list(self, p):
        return Node('MappingList', children=[p.mapping])
    
    @_('mapping_list NEW_LINE mapping')
    def mapping_list(self, p):
        p.mapping_list.children.append(p.mapping)
        return p.mapping_list

    
    @_('AMPERSAND ID EQUALS ID')
    def mapping(self,p):
        return Node('Mapping', value=[p.ID0,p.ID1])
    
    @_('AMPERSAND ID EQUALS ID COLON ID')
    def mapping(self,p):
        return Node('Mapping', value=[p.ID0,p.ID1,p.ID2])
    
   
    
    
    @_('expression')
    def expression_list(self, p):
        return Node('ExpressionList', children=[p.expression])

    @_('expression_list add expression')
    def expression_list(self, p):
        p.expression_list.children.append(p.expression)
        return p.expression_list
    
    @_('yes_block_producer')
    def expression(self, p):
        return p.yes_block
    
    @_('no_block_producer')
    def expression(self, p):
        return p.no_block
    
    @_('yes_block')
    def expression(self, p):
        return p.yes_block
    
    @_('no_block')
    def expression(self, p):
        return p.no_block
    
    @_('block')
    def expression(self, p):
        return p.block

    @_('parallel_expr')
    def expression(self, p):
        return p.parallel_expr

    @_('PLUS')
    def add(self, p):
        return Node('plus')

    # The plus_new_line rule simply includes the PLUS token
    @_('PLUS NEW_LINE')
    def add(self, p):
        return Node('plus')
    
    @_('block block_producer block')
    def yes_block_producer(self, p):
        return Node('YesProducerBlock', value=p.block1, children=[p.block0, p.block_producer])
    
    @_('block block_producer block')
    def no_block_producer(self, p):
        return Node('NoProducerBlock', value=p.block1, children=[p.block0, p.block_producer])
    
    
    @_('block block')
    def conditonal_block(self, p):
        return Node('YesBlock', value=p.block1, children=[p.block0])
    
    
    @_('LBRACE RBRACE')
    def block_producer(self,p):
        return Node('BlockProducer', children=[])
    
    @_('LBRACE expression_list RBRACE')
    def block_producer(self,p):
        return Node('BlockProducer', children=[p.expression_list])
    
    @_('LBRACKET expression_list RBRACKET')
    def block(self,p):
        return Node('Block', children=[p.expression_list])
    
    @_('LBRACKET RBRACKET')
    def block(self,p):
        return Node('Block', children=[])
    

    @_('command')
    def expression(self, p):
        return p.command

    @_('LPAREN expression_list RPAREN')
    def parallel_expr(self, p):
        return Node('ParallelBlock', children=[p.expression_list])
    
    @_('ID LPAREN param_list RPAREN')
    def command(self, p):
        return Node('Command', children=[Node('Id',value=p.ID), p.param_list])

    @_('ID LPAREN RPAREN')
    def command(self, p):
        return Node('Command', children=[Node('Id',value=p.ID)])

    @_('param')
    def param_list(self, p):
        return Node('ParamList',children=[p.param])

    @_('param_list COMMA param')
    def param_list(self, p):
        p.param_list.children.append(p.param)
        return p.param_list

    @_('ID EQUALS ID')
    def param(self, p):
        return Node('Param', children=[Node('Id',value=p.ID0), Node('ParamValue', value=[p.ID1])])

    @_('ID EQUALS param_value')
    def param(self, p):
        return Node('Param', children=[Node('Id',value=p.ID), p.param_value ])

    @_('ID DOT ID')
    def param_value(self, p):
        return Node('ParamValue', value=[p.ID0,p.ID1])
    
    

def tree_to_dict(node):
    if not node:
        return None

    if node.children:
        children_dict = [tree_to_dict(child) for child in node.children]
    else:
        children_dict = None

    if isinstance(node.value, Node):
        value_dict = tree_to_dict(node.value)
    else:
        value_dict = node.value

    return {
        'type': node.type,
        'children': children_dict,
        'value': value_dict
    }

def print_tree(node, indent=0):
    if not node:
        return
    print('  ' * indent + str(node.type))
    if node.children:
        for child in node.children:
            print_tree(child, indent + 1)
    elif node.value is not None:
        print('  ' * (indent + 1) + str(node.value))
        if isinstance(node.value, Node):
            print_tree(node.value, indent + 1)


if __name__ == '__main__':
    lexer = CalcLexer()
    parser = CalcParser()
    #CommandPlan
    data = """&c1=userdata:get_content
(c1(p1=i1)) + (c2(p1=i2,p8=o1.m1) +
[c5(p6=e6)+c6(p7=e7)]o4.m2 +
[c5(p6=e6)+c6(p7=e7)]o4.m3 +
[c8(p1=i8)+c9(p3=i10)]{c1(p1=i3)}o4.m2 +
c3(p1=i2,p8=i3) + c4(p1=i2,p8=i4))"""
    print(data)
    result = parser.parse(lexer.tokenize(data))
    print(tree_to_dict(result))

    
    
    



&c1=userdata:get_content
(c1(p1=i1)) + (c2(p1=i2,p8=o1.m1) +
[c5(p6=e6)+c6(p7=e7)]o4.m2 +
[c5(p6=e6)+c6(p7=e7)]o4.m3 +
[c8(p1=i8)+c9(p3=i10)]{c1(p1=i3)}o4.m2 +
c3(p1=i2,p8=i3) + c4(p1=i2,p8=i4))




AttributeError: No symbol param_value. Must be one of {LBRACE, expression_list, RBRACE}.

In [230]:
"""&c1=userdata:get_content
    &p1=file
    &sl1=sys:l
    &i1=crendentials
    &c2=userdata:append
    &p2=file
    &i2=backup_credentials
    &m1=content
    &m2=word_count
    &c3=userdata:append
    &p8=content
    &i3=Test
    &i4=Hello"""

'&c1=userdata:get_content\n    &p1=file\n    &sl1=sys:l\n    &i1=crendentials\n    &c2=userdata:append\n    &p2=file\n    &i2=backup_credentials\n    &m1=content\n    &m2=word_count\n    &c3=userdata:append\n    &p8=content\n    &i3=Test\n    &i4=Hello'

In [None]:
class ABCLexer(Lexer):
    # Set of token names. This is always required
    tokens = {
        'COMMAND_ID','OUTPUT_ID','PARAMETER_ID','ASYNC_OP_ID','RULE_ID','INPUT_ID',
        'EQUALS', 'LPAREN', 'RPAREN', 'COMMA', 'PLUS', 'DOT','LBRACKET', 'RBRACKET', 'TERNARY', 'NEW_LINE', 'COLON','AMPERSAND', 'EXCLAMATION', 'LBRACE', 'RBRACE'
    }
    
    # String containing ignored characters between tokens
    ignore = ' \t'
    ignore_newline = r'\n+'

    COMMAND_ID = r'c\d+'
    OUTPUT_ID = r'y\d+'
    PARAMETER_ID=r'p\d+'
    ASYNC_OP_ID=r'a\d+'
    RULE_ID=r'r\d+'
    INPUT_ID=r'x\d+'
     
    # Regular expression rules for tokens
    EQUALS  = r'='
    LPAREN  = r'\('
    RPAREN  = r'\)'
    COMMA   = r','
    PLUS    = r'\+'
    DOT     = r'\.'
    LBRACKET = r'\['
    RBRACKET = r'\]'
    LBRACE = r'\{'
    RBRACE = r'\}'
    TERNARY  = r'\?'
    NEW_LINE = r'\n'
    COLON = r':'
    AMPERSAND = r'&'
    EXCLAMATION=r'!'
    
    @_(r'\n+')
    def newline(self, t):
        self.lineno += t.value.count('\n')


In [None]:
a1 - ai
b1 - block memory


h1([r1c1(){0 10 ["sara" "mena" ]}v1 + r2c1(){0 10 ["sara" "mena" ]}v2 + g1r2c2h1]a1){}  +  h2([r1p1{}])  

In [287]:
from sly import Lexer, Parser

class MyLexer(Lexer):
    tokens = {CHAR_SEQUENCE, NUMBER, PARAMETER, LPAREN, RPAREN}
    ignore = ' \t\n'

    CHAR_SEQUENCE = r'[a-zA-Z]+'
    NUMBER = r'\d+'
    PARAMETER = r'\([a-zA-Z]+\=\w+(?: [a-zA-Z]+\=\w+)*\)'
    LPAREN = r'\('
    RPAREN = r'\)'


class MyParser(Parser):
    tokens = MyLexer.tokens

    
    @_('PARAMETER')
    def statement(self, p):
        parameter_str = p.PARAMETER[1:-1]  # Remove parentheses from the parameter string
        parameters = parameter_str.split()  # Split the parameter string by spaces
        parameter_list = [param.split('=') for param in parameters]  # Split each parameter by '='
        return f"PARAMETERS: {parameter_list}"
    
    @_('CHAR_SEQUENCE')
    def chars(self, p):
        return f"CHAR_SEQUENCE: {p.CHAR_SEQUENCE}"

    @_('NUMBER')
    def id(self, p):
        return f"NUMBER: {p.NUMBER}"

lexer = MyLexer()
parser = MyParser()

text = "r1c10(p1=b1 p2=y1p2 p3=b3)"
tokens = lexer.tokenize(text)
print([val for val in tokens])




LexError: Illegal character '=' at index 3