-
Notifications
You must be signed in to change notification settings - Fork 0
/
api_test.clj
201 lines (194 loc) · 16.5 KB
/
api_test.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
;
; Copyright © 2023 Peter Monks
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
; http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; SPDX-License-Identifier: Apache-2.0
;
(ns rencg.api-test
(:require [clojure.test :refer [deftest testing is]]
[rencg.api :refer [re-named-groups re-matches-ncg re-find-ncg re-seq-ncg]]))
(def apache-re #"(?i)(?<name>Apache)(\s+Software)?(\s+License(s)?(\s*[,-])?)?(\s+V(ersion)?)?\s*(?<version>\d+(\.\d+)?)?")
(deftest re-named-groups-tests
(testing "Nil, empty or blank regexes"
(is (nil? (re-named-groups nil)))
(is (= #{} (re-named-groups #"")))
(is (= #{} (re-named-groups #" ")))
(is (= #{} (re-named-groups #"\n\t\r"))))
(testing "Regexes with no named-capturing groups"
(is (= #{} (re-named-groups #".*")))
(is (= #{} (re-named-groups #"(.*)"))))
(testing "Regexes with named-capturing groups"
(is (= #{"namedGroup"} (re-named-groups #"(?<namedGroup>.*)")))
(is (= #{"givenName" "familyName"} (re-named-groups #"(?<givenName>.*)\s+(?<familyName>.*)")))
(is (= #{"name" "version"} (re-named-groups apache-re)))
(is (= #{"outer" "inner"} (re-named-groups #"(?<outer>foo(?<inner>bar)?)"))) ; Nested named groups
(is (= #{"outer" "inner"} (re-named-groups #"(?<outer>foo)(\s+blah(?<inner>\s+bar)?)?"))))) ; Nested named groups, but in different groups
(deftest re-matches-ncg-tests
(testing "Nil regexes and/or input strings"
; Not a fan of throwing exceptions in these cases, but for better or worse this behaviour is compatible with clojure.core/re-matches
(is (thrown? java.lang.NullPointerException (re-matches-ncg nil nil)))
(is (thrown? java.lang.NullPointerException (re-matches-ncg #".*" nil)))
(is (thrown? java.lang.NullPointerException (re-matches-ncg nil ""))))
(testing "Non-matches that don't have named-capturing groups"
(is (nil? (re-matches-ncg #"foo" "")))
(is (nil? (re-matches-ncg #"foo" "bar")))
(is (nil? (re-matches-ncg #"(foo)" ""))))
(testing "Non-matches that do have named-capturing groups"
(is (nil? (re-matches-ncg #"(?<foo>foo)" "")))
(is (nil? (re-matches-ncg apache-re "Mozilla"))))
(testing "Matches that don't have named-capturing groups"
(is (= {:start 0 :end 0} (re-matches-ncg #".*" "")))
(is (= {:start 0 :end 3} (re-matches-ncg #"foo" "foo"))))
(testing "Matches that do have named-capturing groups, but they don't have values in the matched text"
(is (= {:start 0 :end 3} (re-matches-ncg #"(?<foo>foo)?.*" "bar"))))
(testing "Matches that do have named-capturing groups, and some or all of them have values"
(is (= {:start 0 :end 3 "foo" "foo"} (re-matches-ncg #"(?<foo>foo)" "foo")))
(is (= {:start 0 :end 6 "content" "foobar"} (re-matches-ncg #"(?<content>.*)" "foobar")))
(is (= {:start 0 :end 6 "name" "Apache"} (re-matches-ncg apache-re "Apache")))
(is (= {:start 0 :end 6 "name" "apache"} (re-matches-ncg apache-re "apache")))
(is (= {:start 0 :end 10 "name" "Apache" "version" "2.0"} (re-matches-ncg apache-re "Apache 2.0")))
(is (= {:start 0 :end 8 "name" "Apache" "version" "1"} (re-matches-ncg apache-re "Apache 1")))
(is (= {:start 0 :end 33 "name" "Apache" "version" "2"} (re-matches-ncg apache-re "Apache Software License Version 2"))))
(testing "Matches with pre-computed ncgs"
(let [ncgs (re-named-groups apache-re)]
; Note: these cases are nonsensical since the names in ncgs don't correlate to the regexes, but we test these cases anyway to ensure reasonable behaviour
(is (nil? (re-matches-ncg #"foo" "" ncgs)))
(is (nil? (re-matches-ncg #"(?<foo>foo)" "" ncgs)))
(is (= {:start 0 :end 3} (re-matches-ncg #"foo" "foo" ncgs)))
; These cases make more sense
(is (= {:start 0 :end 3 "foo" "foo"} (re-matches-ncg #"(?<foo>foo)" "foo" #{"foo"})))
(is (nil? (re-matches-ncg apache-re "Mozilla" ncgs)))
(is (= {:start 0 :end 6 "name" "Apache"} (re-matches-ncg apache-re "Apache" ncgs)))
(is (= {:start 0 :end 6 "name" "apache"} (re-matches-ncg apache-re "apache" ncgs)))
(is (= {:start 0 :end 10 "name" "Apache" "version" "2.0"} (re-matches-ncg apache-re "Apache 2.0" ncgs)))
(is (= {:start 0 :end 8 "name" "Apache" "version" "1"} (re-matches-ncg apache-re "Apache 1" ncgs)))
(is (= {:start 0 :end 33 "name" "Apache" "version" "2"} (re-matches-ncg apache-re "Apache Software License Version 2" ncgs))))))
(deftest re-find-ncg-tests
(testing "Nil regexes and/or input strings"
; Not a fan of throwing exceptions in these cases, but for better or worse this behaviour is compatible with clojure.core/re-find
(is (thrown? java.lang.NullPointerException (re-find-ncg nil nil)))
(is (thrown? java.lang.NullPointerException (re-find-ncg #".*" nil)))
(is (thrown? java.lang.NullPointerException (re-find-ncg nil ""))))
(testing "Non-finds that don't have named-capturing groups"
(is (nil? (re-find-ncg #"foo" "")))
(is (nil? (re-find-ncg #"foo" "bar")))
(is (nil? (re-find-ncg #"(foo)" ""))))
(testing "Non-finds that do have named-capturing groups"
(is (nil? (re-find-ncg #"(?<foo>foo)" "")))
(is (nil? (re-find-ncg apache-re "Mozilla"))))
(testing "Finds that don't have named-capturing groups"
(is (= {:start 0 :end 0} (re-find-ncg #".*" "")))
(is (= {:start 0 :end 3} (re-find-ncg #"foo" "foo"))))
(testing "Finds that do have named-capturing groups, but they don't have values in the matched text"
(is (= {:start 0 :end 3} (re-find-ncg #"(?<foo>foo)?.*" "bar"))))
(testing "Finds that do have named-capturing groups, and some or all of them have values"
(is (= {:start 0 :end 3 "foo" "foo"} (re-find-ncg #"(?<foo>foo)" "foo")))
(is (= {:start 7 :end 10 "foo" "foo"} (re-find-ncg #"(?<foo>foo)" "prefix foo suffix")))
(is (= {:start 0 :end 6 "content" "foobar"} (re-find-ncg #"(?<content>.*)" "foobar")))
(is (= {:start 0 :end 6 "name" "Apache"} (re-find-ncg apache-re "Apache")))
(is (= {:start 0 :end 6 "name" "apache"} (re-find-ncg apache-re "apache")))
(is (= {:start 0 :end 10 "name" "Apache" "version" "2.0"} (re-find-ncg apache-re "Apache 2.0")))
(is (= {:start 0 :end 8 "name" "Apache" "version" "1"} (re-find-ncg apache-re "Apache 1")))
(is (= {:start 0 :end 33 "name" "Apache" "version" "2"} (re-find-ncg apache-re "Apache Software License Version 2")))
(is (= {:start 7 :end 40 "name" "Apache" "version" "2"} (re-find-ncg apache-re "prefix Apache Software License Version 2 suffix"))))
(testing "Repeated finds, reusing the same matcher"
(let [re #"(?<foo>foo)"
s "foofoofoo"
ncgs (re-named-groups re)
m (re-matcher re s)]
(is (= {:start 0 :end 3 "foo" "foo"} (re-find-ncg m ncgs))) ; First foo in s
(is (= {:start 3 :end 6 "foo" "foo"} (re-find-ncg m ncgs))) ; Second foo in s
(is (= {:start 6 :end 9 "foo" "foo"} (re-find-ncg m ncgs))) ; Third foo
(is (nil? (re-find-ncg m ncgs)))) ; No more foos in s
(let [re #"(?<foo>foo)"
s "prefix foo interstitial text foo suffix"
ncgs (re-named-groups re)
m (re-matcher re s)]
(is (= {:start 7 :end 10 "foo" "foo"} (re-find-ncg m ncgs))) ; First foo in s
(is (= {:start 29 :end 32 "foo" "foo"} (re-find-ncg m ncgs))) ; Second foo in s
(is (nil? (re-find-ncg m ncgs))))) ; No more foos in s
(testing "Finds with pre-computed ncgs"
(let [ncgs (re-named-groups apache-re)]
; Note: these cases are nonsensical since the names in ncgs don't correlate to the regexes, but we test these cases anyway to ensure reasonable behaviour
(is (nil? (re-find-ncg #"foo" "" ncgs)))
(is (nil? (re-find-ncg #"(?<foo>foo)" "" ncgs)))
(is (nil? (re-find-ncg #"(?<foo>foo)" "bar" ncgs)))
(is (= {:start 0 :end 3} (re-find-ncg #"foo" "foo" ncgs)))
(is (= {:start 7 :end 10} (re-find-ncg #"foo" "prefix foo suffix" ncgs)))
; These cases make more sense
(is (= {:start 0 :end 3 "foo" "foo"} (re-find-ncg #"(?<foo>foo)" "foo" #{"foo"})))
(is (nil? (re-find-ncg apache-re "Mozilla" ncgs)))
(is (= {:start 0 :end 6 "name" "Apache"} (re-find-ncg apache-re "Apache" ncgs)))
(is (= {:start 0 :end 6 "name" "apache"} (re-find-ncg apache-re "apache" ncgs)))
(is (= {:start 0 :end 10 "name" "Apache" "version" "2.0"} (re-find-ncg apache-re "Apache 2.0" ncgs)))
(is (= {:start 0 :end 8 "name" "Apache" "version" "1"} (re-find-ncg apache-re "Apache 1" ncgs)))
(is (= {:start 0 :end 33 "name" "Apache" "version" "2"} (re-find-ncg apache-re "Apache Software License Version 2" ncgs)))
(is (= {:start 7 :end 40 "name" "Apache" "version" "2"} (re-find-ncg apache-re "prefix Apache Software License Version 2 suffix" ncgs))))))
(deftest re-seq-ncg-test
(testing "Nil regexes and/or input strings"
; Not a fan of throwing exceptions in these cases, but for better or worse this behaviour is compatible with clojure.core/re-seq
(is (thrown? java.lang.NullPointerException (re-seq-ncg nil nil)))
(is (thrown? java.lang.NullPointerException (re-seq-ncg #".*" nil)))
(is (thrown? java.lang.NullPointerException (re-seq-ncg nil ""))))
(testing "Non-matching seqs that don't have named-capturing groups"
(is (nil? (re-seq-ncg #"foo" "")))
(is (nil? (re-seq-ncg #"foo" "bar")))
(is (nil? (re-seq-ncg #"(foo)" ""))))
(testing "Non-matching seqs that do have named-capturing groups"
(is (nil? (re-seq-ncg #"(?<foo>foo)" "")))
(is (nil? (re-seq-ncg apache-re "Mozilla"))))
(testing "Matching seqs that don't have named-capturing groups"
(is (= '({:start 0 :end 0}) (re-seq-ncg #".*" "")))
(is (= '({:start 0 :end 3}) (re-seq-ncg #"foo" "foo")))
(is (= '({:start 0 :end 3} {:start 3 :end 6}) (re-seq-ncg #"foo" "foofoo"))))
(testing "Matching seqs that do have named-capturing groups, but they don't have values in the matched text"
(is (= '({:start 0 :end 3} {:start 3 :end 3}) (re-seq-ncg #"(?<foo>foo)?.*" "bar")))) ; Note: .* matches twice here - compare to (re-seq #".*" "bar")
(testing "Matching seqs that do have named-capturing groups, and some or all of them have values"
(is (= '({:start 0 :end 3 "foo" "foo"}) (re-seq-ncg #"(?<foo>foo)" "foo")))
(is (= '({:start 7 :end 10 "foo" "foo"}) (re-seq-ncg #"(?<foo>foo)" "prefix foo suffix")))
(is (= '({:start 0 :end 6 "content" "foobar"} {:start 6 :end 6 "content" ""}) (re-seq-ncg #"(?<content>.*)" "foobar"))) ; Note: .* matches twice here - compare to (re-seq #".*" "foobar")
(is (= '({:start 0 :end 6 "name" "Apache"}) (re-seq-ncg apache-re "Apache")))
(is (= '({:start 0 :end 6 "name" "apache"}) (re-seq-ncg apache-re "apache")))
(is (= '({:start 0 :end 10 "name" "Apache" "version" "2.0"}) (re-seq-ncg apache-re "Apache 2.0")))
(is (= '({:start 0 :end 8 "name" "Apache" "version" "1"}) (re-seq-ncg apache-re "Apache 1")))
(is (= '({:start 0 :end 33 "name" "Apache" "version" "2"}) (re-seq-ncg apache-re "Apache Software License Version 2")))
(is (= '({:start 7 :end 40 "name" "Apache" "version" "2"}) (re-seq-ncg apache-re "prefix Apache Software License Version 2 suffix"))))
(testing "Matching seqs with multiple matches"
(is (= '({:start 0 :end 3 "foo" "foo"} {:start 3 :end 6 "foo" "foo"})
(re-seq-ncg #"(?<foo>foo)" "foofoo")))
(is (= '({:start 7 :end 10 "foo" "foo"} {:start 29 :end 32 "foo" "foo"})
(re-seq-ncg #"(?<foo>foo)" "prefix foo interstitial text foo suffix")))
(is (= '({:start 0 :end 6 "name" "Apache"} {:start 6 :end 12 "name" "apache"})
(re-seq-ncg apache-re "Apacheapache")))
(is (= '({:start 0 :end 10 "name" "apache" "version" "2.0"} {:start 11 :end 21 "name" "Apache" "version" "2.0"})
(re-seq-ncg apache-re "apache 2.0 Apache 2.0")))
(is (= '({:start 7 :end 15 "name" "Apache" "version" "1"} {:start 34 :end 69 "name" "Apache" "version" "2.0"})
(re-seq-ncg apache-re "prefix Apache 1 interstitial text Apache Software License Version 2.0 suffix"))))
(testing "Matching seqs with pre-computed ncgs"
(let [ncgs (re-named-groups apache-re)]
; Note: these cases are nonsensical since the names in ncgs don't correlate to the regexes, but we test these cases anyway to ensure reasonable behaviour
(is (nil? (re-seq-ncg #"foo" "" ncgs)))
(is (nil? (re-seq-ncg #"(?<foo>foo)" "" ncgs)))
(is (nil? (re-seq-ncg #"(?<foo>foo)" "bar" ncgs)))
(is (= '({:start 0 :end 3}) (re-seq-ncg #"foo" "foo" ncgs)))
(is (= '({:start 7 :end 10}) (re-seq-ncg #"foo" "prefix foo suffix" ncgs)))
; These cases make more sense
(is (= '({:start 0 :end 3 "foo" "foo"}) (re-seq-ncg #"(?<foo>foo)" "foo" #{"foo"})))
(is (nil? (re-seq-ncg apache-re "Mozilla" ncgs)))
(is (= '({:start 0 :end 6 "name" "Apache"}) (re-seq-ncg apache-re "Apache" ncgs)))
(is (= '({:start 0 :end 6 "name" "apache"}) (re-seq-ncg apache-re "apache" ncgs)))
(is (= '({:start 0 :end 10 "name" "Apache" "version" "2.0"}) (re-seq-ncg apache-re "Apache 2.0" ncgs)))
(is (= '({:start 0 :end 8 "name" "Apache" "version" "1"}) (re-seq-ncg apache-re "Apache 1" ncgs)))
(is (= '({:start 0 :end 33 "name" "Apache" "version" "2"}) (re-seq-ncg apache-re "Apache Software License Version 2" ncgs)))
(is (= '({:start 7 :end 40 "name" "Apache" "version" "2"}) (re-seq-ncg apache-re "prefix Apache Software License Version 2 suffix" ncgs))))))