In [1]:
import sys
from antlr4 import *
from antlr4.tree.Tree import *
from CustomLexer import CustomLexer
from PythonQLParser import PythonQLParser
from functools import reduce

In [22]:
f = "../../examples/test1.pql"

In [53]:
f = "../../examples/test2.pql"

In [88]:
f = "../../examples/test3.pql"

In [28]:
f = "../../examples/test4.pql"

In [70]:
f = "../../examples/indent.py"

In [29]:
inputStream = FileStream(f)
lexer = CustomLexer(inputStream)
stream = CommonTokenStream(lexer)
parser = PythonQLParser(stream)
tree = parser.file_input()

In [4]:
def printSubTree(tree,indent=''):
    if isinstance(tree,TerminalNodeImpl):
        print ("%s'%s'\n" % (indent,tree.getText()))
    else:
        print ("%s%s\n" % (indent,parser.ruleNames[tree.getRuleIndex()]))
        for c in tree.children:
            printSubTree(c,indent+"  ")

In [5]:
class MyToken(TerminalNodeImpl):
    def __init__(self,text):
        self.text = text
    def getText(self):
        return self.text

In [6]:
def get_text(terms):
    return [t.getText() for t in terms]

In [7]:
def isPathExpression(tree):
    if isinstance(tree,TerminalNodeImpl):
        return False
    return tree.getRuleIndex()==parser.RULE_test and len(tree.children)>1

In [8]:
def isQuery(tree):
    if isinstance(tree,TerminalNodeImpl):
        return False
    return tree.getRuleIndex()==parser.RULE_query_expression

In [9]:
def get_all_terminals(tree):
    if isinstance(tree,TerminalNodeImpl):
        return [tree]
    if isPathExpression(tree):
        return get_path_expression_terminals(tree)
    elif isQuery(tree):
        return get_query_terminals(tree)
    else:
        children = []
        if tree.children:
            children = reduce( lambda x,y: x+y, [get_all_terminals(c) for c in tree.children])
        return children

In [10]:
def getText(tree):
    if isinstance(tree,TerminalNodeImpl):
        return tree.getText()
    else:
        res = ""
        for c in tree.children:
            res += getText(c)
        return res

In [11]:
def isChildStep(tree):
    return tree.getRuleIndex()==parser.RULE_path_step and tree.children[0].getRuleIndex()==parser.RULE_child_path_step

In [12]:
def isDescStep(tree):
    return tree.getRuleIndex()==parser.RULE_path_step and tree.children[0].getRuleIndex()==parser.RULE_desc_path_step

In [13]:
def isPredStep(tree):
    return tree.getRuleIndex()==parser.RULE_path_step and tree.children[0].getRuleIndex()==parser.RULE_pred_path_step

In [14]:
def mk_tok(items):
    if isinstance(items,list):
        res = []
        for i in items:
            if isinstance(i,str):
                res.append(MyToken(i))
            elif isinstance(i,list):
                res += i
            else:
                res.append(i)
        return res
    else:
        return [MyToken(items)]

In [15]:
def get_path_expression_terminals(tree):
    children = tree.children
    #printSubTree(tree)
    #print("N children:",len(tree.children))
    
    baseExpr = children[0]
    result = get_all_terminals(baseExpr)
    
    for c in children[1:]:
        if isChildStep(c):
            result = mk_tok([ "child_path", "(", result, ")" ])
        elif isDescStep(c):
            result = mk_tok([ "desc_path", "(", result, ")"])
        elif isPredStep(c):
            condition = mk_tok([ '"""', get_all_terminals(c)[1:-1], '"""'])
            result = mk_tok([ "pred_path", "(", result, ",", condition, ")"])
    
    return result

In [16]:
def ruleType(tree,t):
    if isinstance(tree,TerminalNodeImpl):
        return False
    return tree.getRuleIndex()==t

In [17]:
def process_select_clause(tree):
    sel_vars = [t for t in tree.children[1].children if ruleType(t,parser.RULE_selectvar)]
    res = []
    for sv in sel_vars:
        v = sv.children[0]
        if v.getRuleIndex()==parser.RULE_selectvar_star:
            res.append( mk_tok(["(", "'*'", "None", ")"]) )
        else:
            value_toks = mk_tok(['"""',get_all_terminals(v),'"""'])
            if len(v.children)==1:
                res.append(mk_tok(["(", value_toks, ",", "None",")"]))
            else:
                value = v.children[0]
                alias = v.children[2]
                alias_toks = mk_tok(['"""',get_all_terminals(alias),'"""'])
                res.append(mk_tok(["(", value_toks, ",", alias_toks,")"]))
    res = reduce(lambda x,y: x + mk_tok([","]) + y, res)
    return mk_tok(["[", res, "]"])

In [18]:
def process_from_clause(tree):
    clauses = [c for c in tree.children if ruleType(c,parser.RULE_from_clause_entry)]
    res = []
    for cl in clauses:
        variable = '"'+getText(cl.children[0])+'"'
        type_of_access = '"'+getText(cl.children[1])+'"'
        expression = get_all_terminals(cl.children[2])
        res.append( mk_tok(["(", variable, ",", type_of_access, ",", '"""', expression, '"""',")"]) )
    res = reduce(lambda x,y: x + mk_tok([","]) + y, res)
    return mk_tok(["[", res, "]"])

In [19]:
def process_orderby_clause(tree):
    res = []
    orderlist = tree.children[2]
    elements = [el for el in orderlist.children if ruleType(el,parser.RULE_orderlist_el)]
    for e in elements:
        ascdesc = "asc" if len(e.children)==1 else getText(e.children[1])
        ascdesc = '"'+ascdesc+'"'
        res.append(mk_tok(["(", '"""',get_all_terminals(e.children[0]),'"""',",", ascdesc, ")"]))
    res = reduce(lambda x,y: x + mk_tok([","]) + y, res)
    return mk_tok(["[",res,"]"])

In [20]:
def process_groupby_clause(tree):
    res = []
    groupby_list = tree.children[2]
    for e in [e for e in groupby_list.children if ruleType(e,parser.RULE_group_by_var)]:
        res.append(mk_tok(['"',getText(e),'"']))
    res = reduce(lambda x,y: x + mk_tok([","]) + y, res)
    return mk_tok(["[",res,"]"])

In [32]:
def process_where_clause(tree):
    return mk_tok(['"""', get_all_terminals(tree.children[1]),'"""']) 

In [26]:
def get_query_terminals(tree):
    empty_list = mk_tok(["[]"])
    children = tree.children
    select_cl = children[0]
    from_cl = children[1]
    orderby_cl = next((c for c in children if c.getRuleIndex()==parser.RULE_order_by_clause),None)
    groupby_cl = next((c for c in children if c.getRuleIndex()==parser.RULE_group_by_clause),None)
    where_cl = next((c for c in children if c.getRuleIndex()==parser.RULE_where_clause),None)
    
    result = []
    select_tokens = process_select_clause(select_cl)
    from_tokens = process_from_clause(from_cl)
    orderby_tokens = empty_list if not orderby_cl else process_orderby_clause(orderby_cl)
    groupby_tokens = empty_list if not groupby_cl else process_groupby_clause(groupby_cl)
    where_tokens = mk_tok(["None"]) if not where_cl else process_where_clause(where_cl)
    return mk_tok(["PyQuery", "(", 
                    select_tokens,",",
                    from_tokens,",",
                    groupby_tokens,",",
                    orderby_tokens,",",
                    where_tokens,")"])

In [23]:
def makeIndent(i):
    return "  "*(2*i)

def all_ws(t):
    return all([x==' ' for x in t])

def printProgram(tokens):
    indent = 0
    buffer = ""
    for t in tokens:
        if buffer!="":
            if t==' ' or t=='\n':
                print(buffer)
                buffer = ""
            else:
                buffer += t + " "
        else:
            if t==' ':
                indent = indent -1
            elif t=='\n':
                indent -= 1
                indent = indent if indent>=0 else 0
            elif all_ws(t):
                indent = len(t)//2
            else:
                buffer = makeIndent(indent)
                buffer += t + " "

In [83]:
get_all_terminals(tree)

[<antlr4.tree.Tree.TerminalNodeImpl at 0x104483cf8>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x104487710>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x104483080>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044be198>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044beef0>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044be908>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044be550>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044be400>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x104496080>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x104496860>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044967f0>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044961d0>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044962b0>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x104496518>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x104496a58>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044969b0>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x1044875c0>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x104487358>,
 <antlr4.tree.Tree.TerminalNodeImpl at 0x106a6

In [31]:
get_text(get_all_terminals(tree))

['x',
 '=',
 '[',
 '1',
 ',',
 '2',
 ',',
 '3',
 ',',
 '4',
 ',',
 '5',
 ']',
 '\n',
 'y',
 '=',
 '[',
 '6',
 ',',
 '7',
 ',',
 '8',
 ',',
 '9',
 ',',
 '10',
 ']',
 '\n',
 '(',
 'PyQuery',
 '(',
 '[',
 '(',
 '"""',
 'z',
 '"""',
 ',',
 'None',
 ')',
 ']',
 ',',
 '[',
 '(',
 '"z"',
 ',',
 '"in"',
 ',',
 '"""',
 'x',
 '"""',
 ')',
 ',',
 '(',
 '"w"',
 ',',
 '"in"',
 ',',
 '"""',
 'y',
 '"""',
 ')',
 ']',
 ',',
 '[]',
 ',',
 '[]',
 ',',
 '"""',
 'z',
 '%',
 '2',
 '==',
 '0',
 '""""',
 ')',
 ')',
 '\n',
 '<EOF>']

In [33]:
printProgram(get_text(get_all_terminals(tree)))

x = [ 1 , 2 , 3 , 4 , 5 ] 
y = [ 6 , 7 , 8 , 9 , 10 ] 
( PyQuery ( [ ( """ z """ , None ) ] , [ ( "z" , "in" , """ x """ ) , ( "w" , "in" , """ y """ ) ] , [] , [] , """ z % 2 == 0 """ ) ) 
