-
Notifications
You must be signed in to change notification settings - Fork 405
/
rfc4515_parser.erl
125 lines (93 loc) · 5.06 KB
/
rfc4515_parser.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
%% Parsing functions for RFC4515 (String Representation of LDAP Search Filters)
%%
%% TODO: extensibleMatch features not implemented.
%%
%% Author : Pablo Polvorin <ppolv@yahoo.com.ar>
-module(rfc4515_parser).
-author('ppolv@yahoo.com.ar').
-export([tokenize/1,filter/1,filter_to_string/1]).
%% tokenize/1
%% Tokenize the input string into a token list. Generated tokens:
%% '(' , ')' , '=' , '<=' ,'>=' , '~=' , '*' , '&' , '|' , '*' , {text,Value}
%% Ej:
%% ldap_parser:tokenize("(&(!(prpr=a*s*sse*y))(pr~=sss))").
%% --> ['(', '&','(', '!', '(', {text,"prpr"}, '=', {text,"a"}, '*', {text,"s"}, '*',
%% {text,"sse"},'*', {text,"y"}, ')', ')', '(', {text,"pr"}, '~=', {text,"sss"}, ')', ')']
%%
tokenize(L) -> tokenizer(lists:flatten(L),[],[]). %%flatten because & ,etc. in xml attributes ends in deep lists ej:[[38]]
tokenizer([$(|L],Current,Tokens) -> tokenizer(L,[],['('|add_current(Current,Tokens)]);
tokenizer([$)|L],Current,Tokens) -> tokenizer(L,[],[')'|add_current(Current,Tokens)]);
tokenizer([$&|L],Current,Tokens) -> tokenizer(L,[],['&'|add_current(Current,Tokens)]);
tokenizer([$||L],Current,Tokens) -> tokenizer(L,[],['|'|add_current(Current,Tokens)]);
tokenizer([$!|L],Current,Tokens) -> tokenizer(L,[],['!'|add_current(Current,Tokens)]);
tokenizer([$=|L],Current,Tokens) -> tokenizer(L,[],['='|add_current(Current,Tokens)]);
tokenizer([$*|L],Current,Tokens) -> tokenizer(L,[],['*'|add_current(Current,Tokens)]);
tokenizer([$>,$=|L],Current,Tokens) -> tokenizer(L,[],['>='|add_current(Current,Tokens)]);
tokenizer([$<,$=|L],Current,Tokens) -> tokenizer(L,[],['<='|add_current(Current,Tokens)]);
tokenizer([$~,$=|L],Current,Tokens) -> tokenizer(L,[],['~='|add_current(Current,Tokens)]);
%% an encoded valued start with a backslash '\' character followed by the
%%two hexadecimal digits representing the ASCII value of the encoded character
tokenizer([92|L],Current,Tokens) -> [H1,H2|L2] = L,
tokenizer(L2,[decode([H1,H2])|Current],Tokens);
tokenizer([C|L],Current,Tokens) -> tokenizer(L,[C|Current],Tokens);
tokenizer([],[],Tokens) -> lists:reverse(Tokens). %%FIXME: accept trailing whitespaces
add_current(Current,Tokens) ->
case string:strip(Current) of
[] -> Tokens ;
X -> [{text,lists:reverse(X)}|Tokens]
end.
decode(Hex) ->
{ok,[C],[]} = io_lib:fread("~#","16#" ++ Hex),
C.
%% filter/1
%% parse a token list into an AST-like structure.
%% Ej:
%% ldap_parser:filter(ldap_parser:tokenize("(&(!(prpr=a*s*sse*y))(pr~=sss))")).
%% --> {{'and',[{'not',{substring,"prpr",
%% [{initial,"a"},
%% {any,"s"},
%% {any,"sse"},
%% {final,"y"}]}},
%% {aprox,"pr","sss"}]},
%% []}
filter(['('|L]) -> {R, [')'|L2]} =filtercomp(L),
{R,L2}.
filtercomp(['&'|L]) -> {R,L2} = filterlist(L),
{{'and',R},L2};
filtercomp(['|'|L]) -> {R,L2} = filterlist(L),
{{'or',R},L2};
filtercomp(['!'|L]) -> {R,L2} = filter(L),
{{'not',R},L2};
filtercomp(L) -> item(L).
filterlist(L) -> filterlist(L,[]).
filterlist(L=[')'|_],List) -> {lists:reverse(List),L}; %% ')' marks the end of the filter list
filterlist(L,List) -> {R,L2} = filter(L),
filterlist(L2,[R|List]).
item([{text,T}|L]) -> item2(L,T).
item2(['~=',{text,V}|L],Attr) -> {{aprox,Attr,V},L};
item2(['>=',{text,V}|L],Attr) -> {{get,Attr,V},L};
item2(['<=',{text,V}|L],Attr) -> {{'let',Attr,V},L};
item2(['='|L],Attr) -> item3(L,Attr). % could be a presence, equality or substring match
item3(L = [')'|_],Attr) -> {{eq,Attr,""},L}; %empty attr ej: (description=)
item3(['*',')'|L],Attr) -> {{present,Attr},[')'|L]}; %presence ej: (description=*)
item3([{text,V},')'|L],Attr) -> {{eq,Attr,V},[')'|L]}; %eq ej : = (description=some description)
item3(L,Attr) -> {R,L2} = substring(L),
{{substring,Attr,R},L2}.
substring([{text,V},'*'|L]) -> any(L,[{initial,V}]);
substring(['*'|L]) -> any(L,[]).
any([{text,V},'*'|L],Subs) -> any(L,[{'any',V}|Subs]);
any([{text,V},')'|L],Subs) -> {lists:reverse([{final,V}|Subs]),[')'|L]};
any(L = [')'|_],Subs) -> {lists:reverse(Subs),L}.
filter_to_string({'and',L}) -> io_lib:format("(& ~s)",[lists:map(fun filter_to_string/1,L)]);
filter_to_string({'or',L}) -> io_lib:format("(| ~s)",[lists:map(fun filter_to_string/1,L)]);
filter_to_string({'not',I}) -> io_lib:format("(! ~s)",[filter_to_string(I)]);
filter_to_string({'present',Attr}) -> io_lib:format("(~s=*)",[Attr]);
filter_to_string({'substring',Attr,Subs}) -> io_lib:format("(~s=~s)",[Attr,print_substrings(Subs)]);
filter_to_string({'aprox',Attr,Value}) -> io_lib:format("(~s~~=~s)",[Attr,Value]);
filter_to_string({'let',Attr,Value}) -> io_lib:format("(~s<=~s)",[Attr,Value]);
filter_to_string({'get',Attr,Value}) -> io_lib:format("(~s>=~s)",[Attr,Value]);
filter_to_string({'eq',Attr,Value}) -> io_lib:format("(~s=~s)",[Attr,Value]).
print_substrings(Subs) -> lists:map(fun print_substring/1,Subs).
print_substring({initial,V}) -> io_lib:format("~s",[V]);
print_substring({final,V}) -> io_lib:format("*~s",[V]);
print_substring({any,V}) -> io_lib:format("*~s",[V]).