forked from shauvik/htmlparser
/
PageTests.java
411 lines (390 loc) · 13.6 KB
/
PageTests.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// HTMLParser Library - A java-based parser for HTML
// http://htmlparser.org
// Copyright (C) 2006 Derrick Oswald
//
// Revision Control Information
//
// $URL$
// $Author$
// $Date$
// $Revision$
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the Common Public License; either
// version 1.0 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// Common Public License for more details.
//
// You should have received a copy of the Common Public License
// along with this library; if not, the license is available from
// the Open Source Initiative (OSI) website:
// http://opensource.org/licenses/cpl1.0.php
package org.htmlparser.tests.lexerTests;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import org.htmlparser.lexer.Page;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.ParserException;
public class PageTests extends ParserTestCase
{
static
{
System.setProperty ("org.htmlparser.tests.lexerTests.PageTests", "PageTests");
}
/**
* The default charset.
* This should be <code>ISO-8859-1</code>,
* see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616) section 3.7.1
* Another alias is "8859_1".
*/
public static final String DEFAULT_CHARSET = "ISO-8859-1";
/**
* Base URI for absolute URL tests.
*/
static final String BASEURI = "http://a/b/c/d;p?q";
/**
* Page for absolute URL tests.
*/
public static Page mPage;
static
{
mPage = new Page ();
mPage.setBaseUrl (BASEURI);
}
/**
* Test the third level page class.
*/
public PageTests (String name)
{
super (name);
}
/**
* Test initialization with a null value.
*/
public void testNull () throws ParserException
{
try
{
new Page ((URLConnection)null);
assertTrue ("null value in constructor", false);
}
catch (IllegalArgumentException iae)
{
// expected outcome
}
try
{
new Page ((String)null);
assertTrue ("null value in constructor", false);
}
catch (IllegalArgumentException iae)
{
// expected outcome
}
}
/**
* Test initialization with a real value.
*/
public void testURLConnection () throws ParserException, IOException
{
String link;
URL url;
link = "http://www.ibm.com/jp/";
url = new URL (link);
new Page (url.openConnection ());
}
/**
* Test initialization with non-existant URL.
*/
public void testBadURLConnection () throws IOException
{
String link;
URL url;
link = "http://www.bigbogosity.org/";
url = new URL (link);
try
{
new Page (url.openConnection ());
}
catch (ParserException pe)
{
// expected response
}
}
//
// Tests from Appendix C Examples of Resolving Relative URI References
// RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
// T. Berners-Lee et al.
// http://www.ietf.org/rfc/rfc2396.txt
// Within an object with a well-defined base URI of
// http://a/b/c/d;p?q
// the relative URI would be resolved as follows:
// C.1. Normal Examples
// g:h = g:h
// g = http://a/b/c/g
// ./g = http://a/b/c/g
// g/ = http://a/b/c/g/
// /g = http://a/g
// //g = http://g
// ?y = http://a/b/c/?y
// g?y = http://a/b/c/g?y
// #s = (current document)#s
// g#s = http://a/b/c/g#s
// g?y#s = http://a/b/c/g?y#s
// ;x = http://a/b/c/;x
// g;x = http://a/b/c/g;x
// g;x?y#s = http://a/b/c/g;x?y#s
// . = http://a/b/c/
// ./ = http://a/b/c/
// .. = http://a/b/
// ../ = http://a/b/
// ../g = http://a/b/g
// ../.. = http://a/
// ../../ = http://a/
// ../../g = http://a/g
public void test1 ()
{
assertEquals ("test1 failed", "https:h", mPage.getAbsoluteURL ("https:h"));
}
public void test2 ()
{
assertEquals ("test2 failed", "http://a/b/c/g", mPage.getAbsoluteURL ("g"));
}
public void test3 ()
{
assertEquals ("test3 failed", "http://a/b/c/g", mPage.getAbsoluteURL ("./g"));
}
public void test4 ()
{
assertEquals ("test4 failed", "http://a/b/c/g/", mPage.getAbsoluteURL ("g/"));
}
public void test5 ()
{
assertEquals ("test5 failed", "http://a/g", mPage.getAbsoluteURL ("/g"));
}
public void test6 ()
{
assertEquals ("test6 failed", "http://g", mPage.getAbsoluteURL ("//g"));
}
public void test7 ()
{
assertEquals ("test7 strict failed", "http://a/b/c/?y", mPage.getAbsoluteURL ("?y", true));
assertEquals ("test7 non-strict failed", "http://a/b/c/d;p?y", mPage.getAbsoluteURL ("?y"));
}
public void test8 ()
{
assertEquals ("test8 failed", "http://a/b/c/g?y", mPage.getAbsoluteURL ("g?y"));
}
public void test9 ()
{
assertEquals ("test9 failed", "https:h", mPage.getAbsoluteURL ("https:h"));
}
public void test10 ()
{
assertEquals ("test10 failed", "https:h", mPage.getAbsoluteURL ("https:h"));
}
// #s = (current document)#s
public void test11 ()
{
assertEquals ("test11 failed", "http://a/b/c/g#s", mPage.getAbsoluteURL ("g#s"));
}
public void test12 ()
{
assertEquals ("test12 failed", "http://a/b/c/g?y#s", mPage.getAbsoluteURL ("g?y#s"));
}
public void test13 ()
{
assertEquals ("test13 failed", "http://a/b/c/;x", mPage.getAbsoluteURL (";x"));
}
public void test14 ()
{
assertEquals ("test14 failed", "http://a/b/c/g;x", mPage.getAbsoluteURL ("g;x"));
}
public void test15 ()
{
assertEquals ("test15 failed", "http://a/b/c/g;x?y#s", mPage.getAbsoluteURL ("g;x?y#s"));
}
public void test16 ()
{
assertEquals ("test16 failed", "http://a/b/c/", mPage.getAbsoluteURL ("."));
}
public void test17 ()
{
assertEquals ("test17 failed", "http://a/b/c/", mPage.getAbsoluteURL ("./"));
}
public void test18 ()
{
assertEquals ("test18 failed", "http://a/b/", mPage.getAbsoluteURL (".."));
}
public void test19 ()
{
assertEquals ("test19 failed", "http://a/b/", mPage.getAbsoluteURL ("../"));
}
public void test20 ()
{
assertEquals ("test20 failed", "http://a/b/g", mPage.getAbsoluteURL ("../g"));
}
public void test21 ()
{
assertEquals ("test21 failed", "http://a/", mPage.getAbsoluteURL ("../.."));
}
public void test22 ()
{
assertEquals ("test22 failed", "http://a/g", mPage.getAbsoluteURL ("../../g"));
}
// C.2. Abnormal Examples
// Although the following abnormal examples are unlikely to occur in
// normal practice, all URI parsers should be capable of resolving them
// consistently. Each example uses the same base as above.
//
// An empty reference refers to the start of the current document.
//
// <> = (current document)
//
// Parsers must be careful in handling the case where there are more
// relative path ".." segments than there are hierarchical levels in the
// base URI's path. Note that the ".." syntax cannot be used to change
// the authority component of a URI.
//
// ../../../g = http://a/../g
// ../../../../g = http://a/../../g
//
// In practice, some implementations strip leading relative symbolic
// elements (".", "..") after applying a relative URI calculation, based
// on the theory that compensating for obvious author errors is better
// than allowing the request to fail. Thus, the above two references
// will be interpreted as "http://a/g" by some implementations.
//
// Similarly, parsers must avoid treating "." and ".." as special when
// they are not complete components of a relative path.
//
// /./g = http://a/./g
// /../g = http://a/../g
// g. = http://a/b/c/g.
// .g = http://a/b/c/.g
// g.. = http://a/b/c/g..
// ..g = http://a/b/c/..g
//
// Less likely are cases where the relative URI uses unnecessary or
// nonsensical forms of the "." and ".." complete path segments.
//
// ./../g = http://a/b/g
// ./g/. = http://a/b/c/g/
// g/./h = http://a/b/c/g/h
// g/../h = http://a/b/c/h
// g;x=1/./y = http://a/b/c/g;x=1/y
// g;x=1/../y = http://a/b/c/y
//
// All client applications remove the query component from the base URI
// before resolving relative URI. However, some applications fail to
// separate the reference's query and/or fragment components from a
// relative path before merging it with the base path. This error is
// rarely noticed, since typical usage of a fragment never includes the
// hierarchy ("/") character, and the query component is not normally
// used within relative references.
//
// g?y/./x = http://a/b/c/g?y/./x
// g?y/../x = http://a/b/c/g?y/../x
// g#s/./x = http://a/b/c/g#s/./x
// g#s/../x = http://a/b/c/g#s/../x
//
// Some parsers allow the scheme name to be present in a relative URI if
// it is the same as the base URI scheme. This is considered to be a
// loophole in prior specifications of partial URI [RFC1630]. Its use
// should be avoided.
//
// http:g = http:g ; for validating parsers
// | http://a/b/c/g ; for backwards compatibility
// public void test23 () throws HTMLParserException
// {
// assertEquals ("test23 failed", "http://a/../g", mPage.getAbsoluteURL ("../../../g"));
// }
// public void test24 () throws HTMLParserException
// {
// assertEquals ("test24 failed", "http://a/../../g", mPage.getAbsoluteURL ("../../../../g"));
// }
public void test23 ()
{
assertEquals ("test23 failed", "http://a/g", mPage.getAbsoluteURL ("../../../g"));
}
public void test24 ()
{
assertEquals ("test24 failed", "http://a/g", mPage.getAbsoluteURL ("../../../../g"));
}
public void test25 ()
{
assertEquals ("test25 failed", "http://a/./g", mPage.getAbsoluteURL ("/./g"));
}
public void test26 ()
{
assertEquals ("test26 failed", "http://a/../g", mPage.getAbsoluteURL ("/../g"));
}
public void test27 ()
{
assertEquals ("test27 failed", "http://a/b/c/g.", mPage.getAbsoluteURL ("g."));
}
public void test28 ()
{
assertEquals ("test28 failed", "http://a/b/c/.g", mPage.getAbsoluteURL (".g"));
}
public void test29 ()
{
assertEquals ("test29 failed", "http://a/b/c/g..", mPage.getAbsoluteURL ("g.."));
}
public void test30 ()
{
assertEquals ("test30 failed", "http://a/b/c/..g", mPage.getAbsoluteURL ("..g"));
}
public void test31 ()
{
assertEquals ("test31 failed", "http://a/b/g", mPage.getAbsoluteURL ("./../g"));
}
public void test32 ()
{
assertEquals ("test32 failed", "http://a/b/c/g/", mPage.getAbsoluteURL ("./g/."));
}
public void test33 ()
{
assertEquals ("test33 failed", "http://a/b/c/g/h", mPage.getAbsoluteURL ("g/./h"));
}
public void test34 ()
{
assertEquals ("test34 failed", "http://a/b/c/h", mPage.getAbsoluteURL ("g/../h"));
}
public void test35 ()
{
assertEquals ("test35 failed", "http://a/b/c/g;x=1/y", mPage.getAbsoluteURL ("g;x=1/./y"));
}
public void test36 ()
{
assertEquals ("test36 failed", "http://a/b/c/y", mPage.getAbsoluteURL ("g;x=1/../y"));
}
public void test37 ()
{
assertEquals ("test37 failed", "http://a/b/c/g?y/./x", mPage.getAbsoluteURL ("g?y/./x"));
}
public void test38 ()
{
assertEquals ("test38 failed", "http://a/b/c/g?y/../x", mPage.getAbsoluteURL ("g?y/../x"));
}
public void test39 ()
{
assertEquals ("test39 failed", "http://a/b/c/g#s/./x", mPage.getAbsoluteURL ("g#s/./x"));
}
public void test40 ()
{
assertEquals ("test40 failed", "http://a/b/c/g#s/../x", mPage.getAbsoluteURL ("g#s/../x"));
}
// public void test41 ()
// {
// assertEquals ("test41 failed", "http:g", mPage.getAbsoluteURL ("http:g"));
// }
public void test41 ()
{
assertEquals ("test41 failed", "http://a/b/c/g", mPage.getAbsoluteURL ("http:g"));
}
}