In [1]:

"""

(if (condition)
  "true" (yyy)
  "false" (xxx)
)

(struct
  (var u32 M)
)

"""

SAIR = """
"Define types."
(type u32 (uint 32))
(type f32 (float 32))



(entry matmul

"Declare variables."
(var M u32)
(var K u32)
(var N u32)
(var A (tensor f32) M K)
(var B (tensor f32) K N)
(var C (tensor f32) M N)


"Declare loop control variables."
(var m u32)
(var k u32)
(var n u32)
(var temp f32)

"Inject variables from arguments."
(let M (arg 0))
(let K (arg 1))
(let N (arg 2))
(let A (arg 3))
(let B (arg 4))
(let C (arg 5))

"Loop and MatMul."
(loop (range m 0 M)
  (loop (range n 0 N)
    (let temp 0.0)
    (loop (range k 0 K)
      (let temp (+ (* (load A m k) (load B k n)) temp))
    )
    (store temp C m n)
  )
)

)
"""

In [2]:
def tokenize(sair):
    tokens = []
    beg = 0
    in_comment = False
    for i in range(len(sair)):
        c = sair[i]
        c_is_token = False
        end_token = True
        if c == "\"":
            in_comment = not in_comment
        elif not in_comment:
            if c == "(" or c == ")":
                c_is_token = True
            elif c == " " or c == "\r" or c == "\n" or c == "\t":
                pass
            else:
                end_token = False

        if end_token:
            if i != beg:
                token = sair[beg:i]
                tokens += [token]
            beg = i + 1
        if c_is_token:
            tokens += [c]
            
    return tokens

print(tokenize(SAIR))

['(', 'type', 'u32', '(', 'uint', '32', ')', ')', '(', 'type', 'f32', '(', 'float', '32', ')', ')', '(', 'entry', 'matmul', '(', 'var', 'M', 'u32', ')', '(', 'var', 'K', 'u32', ')', '(', 'var', 'N', 'u32', ')', '(', 'var', 'A', '(', 'tensor', 'f32', ')', 'M', 'K', ')', '(', 'var', 'B', '(', 'tensor', 'f32', ')', 'K', 'N', ')', '(', 'var', 'C', '(', 'tensor', 'f32', ')', 'M', 'N', ')', '(', 'var', 'm', 'u32', ')', '(', 'var', 'k', 'u32', ')', '(', 'var', 'n', 'u32', ')', '(', 'var', 'temp', 'f32', ')', '(', 'let', 'M', '(', 'arg', '0', ')', ')', '(', 'let', 'K', '(', 'arg', '1', ')', ')', '(', 'let', 'N', '(', 'arg', '2', ')', ')', '(', 'let', 'A', '(', 'arg', '3', ')', ')', '(', 'let', 'B', '(', 'arg', '4', ')', ')', '(', 'let', 'C', '(', 'arg', '5', ')', ')', '(', 'loop', '(', 'range', 'm', '0', 'M', ')', '(', 'loop', '(', 'range', 'n', '0', 'N', ')', '(', 'let', 'temp', '0.0', ')', '(', 'loop', '(', 'range', 'k', '0', 'K', ')', '(', 'let', 'temp', '(', '+', '(', '*', '(', 'load', 'A'

In [3]:
def build_ast(tokens):
    stack = [0]
    ast = []
    ncall = 0
    for token in tokens:
        node = ast
        
        for i in stack[:-1]:
            node = node[i]

        if token == "(":
            stack.append(0)
            node.append([])
        elif token == ")":
            stack.pop()
            stack[-1] += 1
            ncall += 1
        else:
            stack[-1] += 1
            node.append(token)
    return (ncall, ast)

build_ast(tokenize(SAIR))

(43,
 [['type', 'u32', ['uint', '32']],
  ['type', 'f32', ['float', '32']],
  ['entry',
   'matmul',
   ['var', 'M', 'u32'],
   ['var', 'K', 'u32'],
   ['var', 'N', 'u32'],
   ['var', 'A', ['tensor', 'f32'], 'M', 'K'],
   ['var', 'B', ['tensor', 'f32'], 'K', 'N'],
   ['var', 'C', ['tensor', 'f32'], 'M', 'N'],
   ['var', 'm', 'u32'],
   ['var', 'k', 'u32'],
   ['var', 'n', 'u32'],
   ['var', 'temp', 'f32'],
   ['let', 'M', ['arg', '0']],
   ['let', 'K', ['arg', '1']],
   ['let', 'N', ['arg', '2']],
   ['let', 'A', ['arg', '3']],
   ['let', 'B', ['arg', '4']],
   ['let', 'C', ['arg', '5']],
   ['loop',
    ['range', 'm', '0', 'M'],
    ['loop',
     ['range', 'n', '0', 'N'],
     ['let', 'temp', '0.0'],
     ['loop',
      ['range', 'k', '0', 'K'],
      ['let',
       'temp',
       ['+',
        ['*', ['load', 'A', 'm', 'k'], ['load', 'B', 'k', 'n']],
        'temp']]],
     ['store', 'temp', 'C', 'm', 'n']]]]])