Skip to content

Commit

Permalink
Implemented the spath command.
Browse files Browse the repository at this point in the history
  • Loading branch information
salspaugh committed Apr 2, 2014
1 parent 9133896 commit 8511b66
Show file tree
Hide file tree
Showing 7 changed files with 330 additions and 2 deletions.
231 changes: 231 additions & 0 deletions splparser/lexers/spathlexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#!/usr/bin/env python

import ply.lex
from ply.lex import TOKEN
import re

from splparser.regexes.searchregexes import *
from splparser.exceptions import SPLSyntaxError

tokens = [
'WILDCARD',
'EQ',
'IPV4ADDR', 'IPV6ADDR',
'EMAIL','HOSTNAME', 'URL', 'PATH', 'US_PHONE',
'WORD',
'INT', 'BIN', 'OCT', 'HEX', 'FLOAT',
'ID',
'NBSTR', # non-breaking string
'LITERAL', # in quotes
'SPATH_OPT',
'INTERNAL_FIELD',
'DEFAULT_FIELD',
'DEFAULT_DATETIME_FIELD'
]

reserved = {
'spath' : 'SPATH',
}

tokens = tokens + list(reserved.values())

t_ignore = ' '

t_EQ = r'='

# !!! The order in which these functions are defined determine matchine. The
# first to match is used. Take CARE when reordering.

states = (
('ipunchecked', 'inclusive'),
)

def is_ipv4addr(addr):
addr = addr.replace('*', '0')
addr = addr.strip()
addr = addr.strip('"')
port = addr.find(':')
if port > 0:
addr = addr[:port]
slash = addr.find('/')
if slash > 0:
addr = addr[:slash]
addr = addr.strip()
import socket
try:
socket.inet_pton(socket.AF_INET, addr)
except socket.error:
return False
return True

def is_ipv6addr(addr):
addr = addr.replace('*', '0')
addr = addr.strip()
addr = addr.strip('"')
addr = addr.strip('[')
port = addr.find(']')
if port > 0:
addr = addr[:port]
slash = addr.find('/')
if slash > 0:
addr = addr[:slash]
addr = addr.strip()
import socket
try:
socket.inet_pton(socket.AF_INET6, addr)
except socket.error:
return False
return True

def type_if_reserved(t, default):
if re.match(spath_opt, t.value):
return 'SPATH_OPT'
elif re.match(internal_field, t.value):
return 'INTERNAL_FIELD'
elif re.match(default_field, t.value):
return 'DEFAULT_FIELD',
elif re.match(default_datetime_field, t.value):
return 'DEFAULT_DATETIME_FIELD'
else:
return reserved.get(t.value, default)

def t_MACRO(t):
r"""(`[^`]*`)"""
return t

@TOKEN(ipv4_addr)
def t_ipunchecked_IPV4ADDR(t):
if is_ipv4addr(t.value):
return t
t.lexer.lexpos -= len(t.value)
t.lexer.begin('INITIAL')
return

@TOKEN(ipv6_addr)
def t_ipunchecked_IPV6ADDR(t):
if is_ipv6addr(t.value):
return t
t.lexer.lexpos -= len(t.value)
t.lexer.begin('INITIAL')
return

@TOKEN(internal_field)
def t_INTERNAL_FIELD(t):
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(default_field)
def t_DEFAULT_FIELD(t):
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(default_datetime_field)
def t_DEFAULT_DATETIME_FIELD(t):
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(wildcard)
def t_WILDCARD(t):
t.lexer.begin('ipunchecked')
return t

@TOKEN(literal)
def t_LITERAL(t):
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(bin)
def t_BIN(t):
t.lexer.begin('ipunchecked')
return t

@TOKEN(oct)
def t_OCT(t):
t.lexer.begin('ipunchecked')
return t

@TOKEN(hex)
def t_HEX(t):
t.lexer.begin('ipunchecked')
return t

@TOKEN(float)
def t_FLOAT(t):
t.lexer.begin('ipunchecked')
return t

@TOKEN(word)
def t_WORD(t):
t.type = type_if_reserved(t, 'WORD')
t.lexer.begin('ipunchecked')
return t

@TOKEN(int)
def t_INT(t):
t.lexer.begin('ipunchecked')
return t

@TOKEN(id)
def t_ID(t):
t.type = type_if_reserved(t, 'ID')
t.lexer.begin('ipunchecked')
return t

@TOKEN(email)
def t_EMAIL(t):
t.type = type_if_reserved(t, 'EMAIL')
t.lexer.begin('ipunchecked')
return t

@TOKEN(hostname)
def t_HOSTNAME(t):
t.type = type_if_reserved(t, 'HOSTNAME')
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(path)
def t_PATH(t):
t.type = type_if_reserved(t, 'PATH')
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(url)
def t_URL(t):
t.type = type_if_reserved(t, 'URL')
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(us_phone)
def t_US_PHONE(t):
t.lexer.begin('ipunchecked')
return(t)

@TOKEN(nbstr)
def t_NBSTR(t): # non-breaking string
t.type = type_if_reserved(t, 'NBSTR')
t.lexer.begin('ipunchecked')
return t

def t_error(t):
badchar = t.value[0]
t.lexer.skip(1)
t.lexer.begin('ipunchecked')
raise SPLSyntaxError("Illegal character in xpath lexer '%s'" % badchar)

def lex():
return ply.lex.lex()

def tokenize(data, debug=False, debuglog=None):
lexer = ply.lex.lex(debug=debug, debuglog=debuglog)
lexer.input(data)
lexer.begin('ipunchecked')
tokens = []
while True:
tok = lexer.token()
if not tok: break
tokens.append(tok)
return tokens

if __name__ == "__main__":
import sys
print tokenize(' '.join(sys.argv[1:]))
2 changes: 2 additions & 0 deletions splparser/regexes/searchregexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@

xpath_opt = r'(?:outfield|field|default)' + end_of_token

spath_opt = r'(?:input|output|path)' + end_of_token

xmlkv_opt = r'(?:maxinputs|id)'

extractkv_opt = r'(?:auto|clean_keys|kvdelim|limit|maxchars|mv_add|pairdelim|reload|segment)'
Expand Down
5 changes: 3 additions & 2 deletions splparser/rules/commandrules.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,9 +508,10 @@ def p_cmdexpr_sort(p):
"""cmdexpr : SORT arglist
| SORT MACRO"""

@notimplemented
@splcommandrule
def p_cmdexpr_spath(p):
"""cmdexpr : SPATH arglist
"""cmdexpr : SPATH
| SPATH arglist
| SPATH MACRO"""

@splcommandrule
Expand Down
51 changes: 51 additions & 0 deletions splparser/rules/spathrules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python

import ply.yacc
import logging

from splparser.parsetree import *
from splparser.exceptions import SPLSyntaxError

from splparser.rules.common.valuerules import *

from splparser.lexers.spathlexer import tokens

start = 'cmdexpr'

def p_cmdexpr(p):
"""cmdexpr : spathcmd"""
p[0] = p[1]

def p_cmdexpr_spath(p):
"""spathcmd : SPATH
| SPATH spatharglist"""
p[0] = ParseTreeNode('COMMAND', raw='spath')
if len(p) > 2:
p[0].add_children(p[2].children)

def p_spatharglist_spathargs(p):
"""spatharglist : spatharg
| spatharg spatharglist"""
p[0] = ParseTreeNode('_SPATHARGLIST')
p[0].add_child(p[1])
if len(p) > 2:
p[0].add_children(p[2].children)

def p_spatharg_opt(p):
"""spatharg : SPATH_OPT EQ value"""
p[0] = ParseTreeNode('EQ', raw='assign')
opt = ParseTreeNode('OPTION', raw=p[1])
if opt.raw in ["input", "output"]:
p[3].role = 'FIELD'
opt.values.append(p[3])
p[0].add_children([opt, p[3]])

def p_spatharg_path(p):
"""spatharg : value"""
p[0] = ParseTreeNode('EQ', raw='assign')
opt = ParseTreeNode('OPTION', raw='path')
opt.values.append(p[1])
p[0].add_children([opt, p[1]])

def p_error(p):
raise SPLSyntaxError("Syntax error in spath parser input!")
Empty file.
4 changes: 4 additions & 0 deletions test/splparser/rules/spathrules/test_spathrules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python

import doctest
doctest.testfile('test_spathrules.txt')
39 changes: 39 additions & 0 deletions test/splparser/rules/spathrules/test_spathrules.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

Below are clips from an interactive session in which I test various aspects of the parser.

>>> import splparser.parser

>>> splparser.parse('spath output=repository path=repository.url').print_tree()
('ROOT')
('STAGE')
('COMMAND': 'spath')
('EQ': 'assign')
('OPTION': 'output')
('FIELD': 'repository')
('EQ': 'assign')
('OPTION': 'path')
('VALUE': 'repository.url')

>>> splparser.parse('spath input=commit_author path=commits.author.name').print_tree()
('ROOT')
('STAGE')
('COMMAND': 'spath')
('EQ': 'assign')
('OPTION': 'input')
('FIELD': 'commit_author')
('EQ': 'assign')
('OPTION': 'path')
('VALUE': 'commits.author.name')

>>> splparser.parse('spath path=vendorProductSet.product.desc.locDesc{4}{@locale}').print_tree()
('ROOT')
('STAGE')
('COMMAND': 'spath')
('EQ': 'assign')
('OPTION': 'path')
('VALUE': 'vendorProductSet.product.desc.locDesc{4}{@locale}')

>>> splparser.parse('spath').print_tree()
('ROOT')
('STAGE')
('COMMAND': 'spath')

0 comments on commit 8511b66

Please sign in to comment.