Skip to content

Commit 8511b66

Browse files
committed
Implemented the spath command.
1 parent 9133896 commit 8511b66

File tree

7 files changed

+330
-2
lines changed

7 files changed

+330
-2
lines changed

splparser/lexers/spathlexer.py

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
#!/usr/bin/env python
2+
3+
import ply.lex
4+
from ply.lex import TOKEN
5+
import re
6+
7+
from splparser.regexes.searchregexes import *
8+
from splparser.exceptions import SPLSyntaxError
9+
10+
tokens = [
11+
'WILDCARD',
12+
'EQ',
13+
'IPV4ADDR', 'IPV6ADDR',
14+
'EMAIL','HOSTNAME', 'URL', 'PATH', 'US_PHONE',
15+
'WORD',
16+
'INT', 'BIN', 'OCT', 'HEX', 'FLOAT',
17+
'ID',
18+
'NBSTR', # non-breaking string
19+
'LITERAL', # in quotes
20+
'SPATH_OPT',
21+
'INTERNAL_FIELD',
22+
'DEFAULT_FIELD',
23+
'DEFAULT_DATETIME_FIELD'
24+
]
25+
26+
reserved = {
27+
'spath' : 'SPATH',
28+
}
29+
30+
tokens = tokens + list(reserved.values())
31+
32+
t_ignore = ' '
33+
34+
t_EQ = r'='
35+
36+
# !!! The order in which these functions are defined determine matchine. The
37+
# first to match is used. Take CARE when reordering.
38+
39+
states = (
40+
('ipunchecked', 'inclusive'),
41+
)
42+
43+
def is_ipv4addr(addr):
44+
addr = addr.replace('*', '0')
45+
addr = addr.strip()
46+
addr = addr.strip('"')
47+
port = addr.find(':')
48+
if port > 0:
49+
addr = addr[:port]
50+
slash = addr.find('/')
51+
if slash > 0:
52+
addr = addr[:slash]
53+
addr = addr.strip()
54+
import socket
55+
try:
56+
socket.inet_pton(socket.AF_INET, addr)
57+
except socket.error:
58+
return False
59+
return True
60+
61+
def is_ipv6addr(addr):
62+
addr = addr.replace('*', '0')
63+
addr = addr.strip()
64+
addr = addr.strip('"')
65+
addr = addr.strip('[')
66+
port = addr.find(']')
67+
if port > 0:
68+
addr = addr[:port]
69+
slash = addr.find('/')
70+
if slash > 0:
71+
addr = addr[:slash]
72+
addr = addr.strip()
73+
import socket
74+
try:
75+
socket.inet_pton(socket.AF_INET6, addr)
76+
except socket.error:
77+
return False
78+
return True
79+
80+
def type_if_reserved(t, default):
81+
if re.match(spath_opt, t.value):
82+
return 'SPATH_OPT'
83+
elif re.match(internal_field, t.value):
84+
return 'INTERNAL_FIELD'
85+
elif re.match(default_field, t.value):
86+
return 'DEFAULT_FIELD',
87+
elif re.match(default_datetime_field, t.value):
88+
return 'DEFAULT_DATETIME_FIELD'
89+
else:
90+
return reserved.get(t.value, default)
91+
92+
def t_MACRO(t):
93+
r"""(`[^`]*`)"""
94+
return t
95+
96+
@TOKEN(ipv4_addr)
97+
def t_ipunchecked_IPV4ADDR(t):
98+
if is_ipv4addr(t.value):
99+
return t
100+
t.lexer.lexpos -= len(t.value)
101+
t.lexer.begin('INITIAL')
102+
return
103+
104+
@TOKEN(ipv6_addr)
105+
def t_ipunchecked_IPV6ADDR(t):
106+
if is_ipv6addr(t.value):
107+
return t
108+
t.lexer.lexpos -= len(t.value)
109+
t.lexer.begin('INITIAL')
110+
return
111+
112+
@TOKEN(internal_field)
113+
def t_INTERNAL_FIELD(t):
114+
t.lexer.begin('ipunchecked')
115+
return(t)
116+
117+
@TOKEN(default_field)
118+
def t_DEFAULT_FIELD(t):
119+
t.lexer.begin('ipunchecked')
120+
return(t)
121+
122+
@TOKEN(default_datetime_field)
123+
def t_DEFAULT_DATETIME_FIELD(t):
124+
t.lexer.begin('ipunchecked')
125+
return(t)
126+
127+
@TOKEN(wildcard)
128+
def t_WILDCARD(t):
129+
t.lexer.begin('ipunchecked')
130+
return t
131+
132+
@TOKEN(literal)
133+
def t_LITERAL(t):
134+
t.lexer.begin('ipunchecked')
135+
return(t)
136+
137+
@TOKEN(bin)
138+
def t_BIN(t):
139+
t.lexer.begin('ipunchecked')
140+
return t
141+
142+
@TOKEN(oct)
143+
def t_OCT(t):
144+
t.lexer.begin('ipunchecked')
145+
return t
146+
147+
@TOKEN(hex)
148+
def t_HEX(t):
149+
t.lexer.begin('ipunchecked')
150+
return t
151+
152+
@TOKEN(float)
153+
def t_FLOAT(t):
154+
t.lexer.begin('ipunchecked')
155+
return t
156+
157+
@TOKEN(word)
158+
def t_WORD(t):
159+
t.type = type_if_reserved(t, 'WORD')
160+
t.lexer.begin('ipunchecked')
161+
return t
162+
163+
@TOKEN(int)
164+
def t_INT(t):
165+
t.lexer.begin('ipunchecked')
166+
return t
167+
168+
@TOKEN(id)
169+
def t_ID(t):
170+
t.type = type_if_reserved(t, 'ID')
171+
t.lexer.begin('ipunchecked')
172+
return t
173+
174+
@TOKEN(email)
175+
def t_EMAIL(t):
176+
t.type = type_if_reserved(t, 'EMAIL')
177+
t.lexer.begin('ipunchecked')
178+
return t
179+
180+
@TOKEN(hostname)
181+
def t_HOSTNAME(t):
182+
t.type = type_if_reserved(t, 'HOSTNAME')
183+
t.lexer.begin('ipunchecked')
184+
return(t)
185+
186+
@TOKEN(path)
187+
def t_PATH(t):
188+
t.type = type_if_reserved(t, 'PATH')
189+
t.lexer.begin('ipunchecked')
190+
return(t)
191+
192+
@TOKEN(url)
193+
def t_URL(t):
194+
t.type = type_if_reserved(t, 'URL')
195+
t.lexer.begin('ipunchecked')
196+
return(t)
197+
198+
@TOKEN(us_phone)
199+
def t_US_PHONE(t):
200+
t.lexer.begin('ipunchecked')
201+
return(t)
202+
203+
@TOKEN(nbstr)
204+
def t_NBSTR(t): # non-breaking string
205+
t.type = type_if_reserved(t, 'NBSTR')
206+
t.lexer.begin('ipunchecked')
207+
return t
208+
209+
def t_error(t):
210+
badchar = t.value[0]
211+
t.lexer.skip(1)
212+
t.lexer.begin('ipunchecked')
213+
raise SPLSyntaxError("Illegal character in xpath lexer '%s'" % badchar)
214+
215+
def lex():
216+
return ply.lex.lex()
217+
218+
def tokenize(data, debug=False, debuglog=None):
219+
lexer = ply.lex.lex(debug=debug, debuglog=debuglog)
220+
lexer.input(data)
221+
lexer.begin('ipunchecked')
222+
tokens = []
223+
while True:
224+
tok = lexer.token()
225+
if not tok: break
226+
tokens.append(tok)
227+
return tokens
228+
229+
if __name__ == "__main__":
230+
import sys
231+
print tokenize(' '.join(sys.argv[1:]))

splparser/regexes/searchregexes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636

3737
xpath_opt = r'(?:outfield|field|default)' + end_of_token
3838

39+
spath_opt = r'(?:input|output|path)' + end_of_token
40+
3941
xmlkv_opt = r'(?:maxinputs|id)'
4042

4143
extractkv_opt = r'(?:auto|clean_keys|kvdelim|limit|maxchars|mv_add|pairdelim|reload|segment)'

splparser/rules/commandrules.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -508,9 +508,10 @@ def p_cmdexpr_sort(p):
508508
"""cmdexpr : SORT arglist
509509
| SORT MACRO"""
510510

511-
@notimplemented
511+
@splcommandrule
512512
def p_cmdexpr_spath(p):
513-
"""cmdexpr : SPATH arglist
513+
"""cmdexpr : SPATH
514+
| SPATH arglist
514515
| SPATH MACRO"""
515516

516517
@splcommandrule

splparser/rules/spathrules.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/usr/bin/env python
2+
3+
import ply.yacc
4+
import logging
5+
6+
from splparser.parsetree import *
7+
from splparser.exceptions import SPLSyntaxError
8+
9+
from splparser.rules.common.valuerules import *
10+
11+
from splparser.lexers.spathlexer import tokens
12+
13+
start = 'cmdexpr'
14+
15+
def p_cmdexpr(p):
16+
"""cmdexpr : spathcmd"""
17+
p[0] = p[1]
18+
19+
def p_cmdexpr_spath(p):
20+
"""spathcmd : SPATH
21+
| SPATH spatharglist"""
22+
p[0] = ParseTreeNode('COMMAND', raw='spath')
23+
if len(p) > 2:
24+
p[0].add_children(p[2].children)
25+
26+
def p_spatharglist_spathargs(p):
27+
"""spatharglist : spatharg
28+
| spatharg spatharglist"""
29+
p[0] = ParseTreeNode('_SPATHARGLIST')
30+
p[0].add_child(p[1])
31+
if len(p) > 2:
32+
p[0].add_children(p[2].children)
33+
34+
def p_spatharg_opt(p):
35+
"""spatharg : SPATH_OPT EQ value"""
36+
p[0] = ParseTreeNode('EQ', raw='assign')
37+
opt = ParseTreeNode('OPTION', raw=p[1])
38+
if opt.raw in ["input", "output"]:
39+
p[3].role = 'FIELD'
40+
opt.values.append(p[3])
41+
p[0].add_children([opt, p[3]])
42+
43+
def p_spatharg_path(p):
44+
"""spatharg : value"""
45+
p[0] = ParseTreeNode('EQ', raw='assign')
46+
opt = ParseTreeNode('OPTION', raw='path')
47+
opt.values.append(p[1])
48+
p[0].add_children([opt, p[1]])
49+
50+
def p_error(p):
51+
raise SPLSyntaxError("Syntax error in spath parser input!")

test/splparser/rules/spathrules/__init__.py

Whitespace-only changes.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/usr/bin/env python
2+
3+
import doctest
4+
doctest.testfile('test_spathrules.txt')
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
2+
Below are clips from an interactive session in which I test various aspects of the parser.
3+
4+
>>> import splparser.parser
5+
6+
>>> splparser.parse('spath output=repository path=repository.url').print_tree()
7+
('ROOT')
8+
('STAGE')
9+
('COMMAND': 'spath')
10+
('EQ': 'assign')
11+
('OPTION': 'output')
12+
('FIELD': 'repository')
13+
('EQ': 'assign')
14+
('OPTION': 'path')
15+
('VALUE': 'repository.url')
16+
17+
>>> splparser.parse('spath input=commit_author path=commits.author.name').print_tree()
18+
('ROOT')
19+
('STAGE')
20+
('COMMAND': 'spath')
21+
('EQ': 'assign')
22+
('OPTION': 'input')
23+
('FIELD': 'commit_author')
24+
('EQ': 'assign')
25+
('OPTION': 'path')
26+
('VALUE': 'commits.author.name')
27+
28+
>>> splparser.parse('spath path=vendorProductSet.product.desc.locDesc{4}{@locale}').print_tree()
29+
('ROOT')
30+
('STAGE')
31+
('COMMAND': 'spath')
32+
('EQ': 'assign')
33+
('OPTION': 'path')
34+
('VALUE': 'vendorProductSet.product.desc.locDesc{4}{@locale}')
35+
36+
>>> splparser.parse('spath').print_tree()
37+
('ROOT')
38+
('STAGE')
39+
('COMMAND': 'spath')

0 commit comments

Comments
 (0)