@@ -4,49 +4,64 @@ class RFC3986_Parser # :nodoc:
4
4
# this regexp is modified not to host is not empty string
5
5
RFC3986_URI = /\A (?<URI>(?<scheme>[A-Za-z][+\- .0-9A-Za-z]*):(?<hier-part>\/ \/ (?<authority>(?:(?<userinfo>(?:%\h \h |[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[ (?:(?<IPv6address>(?:\h {1,4}:){6}(?<ls32>\h {1,4}:\h {1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d |1\d {2}|2[0-4]\d |25[0-5]|\d )\. \g <dec-octet>\. \g <dec-octet>\. \g <dec-octet>))|::(?:\h {1,4}:){5}\g <ls32>|\h {1,4}?::(?:\h {1,4}:){4}\g <ls32>|(?:(?:\h {1,4}:)?\h {1,4})?::(?:\h {1,4}:){3}\g <ls32>|(?:(?:\h {1,4}:){,2}\h {1,4})?::(?:\h {1,4}:){2}\g <ls32>|(?:(?:\h {1,4}:){,3}\h {1,4})?::\h {1,4}:\g <ls32>|(?:(?:\h {1,4}:){,4}\h {1,4})?::\g <ls32>|(?:(?:\h {1,4}:){,5}\h {1,4})?::\h {1,4}|(?:(?:\h {1,4}:){,6}\h {1,4})?::)|(?<IPvFuture>v\h +\. [!$&-.0-;=A-Z_a-z~]+))\] )|\g <IPv4address>|(?<reg-name>(?:%\h \h |[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d *))?)(?<path-abempty>(?:\/ (?<segment>(?:%\h \h |[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/ (?:(?<segment-nz>(?:%\h \h |[!$&-.0-;=@-Z_a-z~])+)(?:\/ \g <segment>)*)?)|(?<path-rootless>\g <segment-nz>(?:\/ \g <segment>)*)|(?<path-empty>))(?:\? (?<query>[^#]*))?(?:\# (?<fragment>(?:%\h \h |[!$&-.0-;=@-Z_a-z~\/ ?])*))?)\z /
6
6
RFC3986_relative_ref = /\A (?<relative-ref>(?<relative-part>\/ \/ (?<authority>(?:(?<userinfo>(?:%\h \h |[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[ (?<IPv6address>(?:\h {1,4}:){6}(?<ls32>\h {1,4}:\h {1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d |1\d {2}|2[0-4]\d |25[0-5]|\d )\. \g <dec-octet>\. \g <dec-octet>\. \g <dec-octet>))|::(?:\h {1,4}:){5}\g <ls32>|\h {1,4}?::(?:\h {1,4}:){4}\g <ls32>|(?:(?:\h {1,4}:){,1}\h {1,4})?::(?:\h {1,4}:){3}\g <ls32>|(?:(?:\h {1,4}:){,2}\h {1,4})?::(?:\h {1,4}:){2}\g <ls32>|(?:(?:\h {1,4}:){,3}\h {1,4})?::\h {1,4}:\g <ls32>|(?:(?:\h {1,4}:){,4}\h {1,4})?::\g <ls32>|(?:(?:\h {1,4}:){,5}\h {1,4})?::\h {1,4}|(?:(?:\h {1,4}:){,6}\h {1,4})?::)|(?<IPvFuture>v\h +\. [!$&-.0-;=A-Z_a-z~]+)\] )|\g <IPv4address>|(?<reg-name>(?:%\h \h |[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d *))?)(?<path-abempty>(?:\/ (?<segment>(?:%\h \h |[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/ (?:(?<segment-nz>(?:%\h \h |[!$&-.0-;=@-Z_a-z~])+)(?:\/ \g <segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h \h |[!$&-.0-9;=@-Z_a-z~])+)(?:\/ \g <segment>)*)|(?<path-empty>))(?:\? (?<query>[^#]*))?(?:\# (?<fragment>(?:%\h \h |[!$&-.0-;=@-Z_a-z~\/ ?])*))?)\z /
7
+ attr_reader :regexp
8
+
9
+ def initialize
10
+ @regexp = default_regexp . each_value ( &:freeze ) . freeze
11
+ end
7
12
8
13
def split ( uri ) #:nodoc:
9
14
begin
10
15
uri = uri . to_str
11
16
rescue NoMethodError
12
17
raise InvalidURIError , "bad URI(is not URI?): #{ uri } "
13
18
end
14
- unless uri . ascii_only?
19
+ uri . ascii_only? or
15
20
raise InvalidURIError , "URI must be ascii only #{ uri . dump } "
16
- end
17
21
if m = RFC3986_URI . match ( uri )
18
- ary = [ ]
19
- ary << m [ "scheme" ]
20
- if m [ "path-rootless" ] # opaque
21
- ary << nil # userinfo
22
- ary << nil # host
23
- ary << nil # port
24
- ary << nil # registry
25
- ary << nil # path
26
- ary << m [ "path-rootless" ]
27
- ary [ -1 ] << '?' << m [ "query" ] if m [ "query" ]
28
- ary << nil # query
29
- ary << m [ "fragment" ]
22
+ query = m [ "query" . freeze ]
23
+ scheme = m [ "scheme" . freeze ]
24
+ opaque = m [ "path-rootless" . freeze ]
25
+ if opaque
26
+ opaque << "?#{ query } " if query
27
+ [ scheme ,
28
+ nil , # userinfo
29
+ nil , # host
30
+ nil , # port
31
+ nil , # registry
32
+ nil , # path
33
+ opaque ,
34
+ nil , # query
35
+ m [ "fragment" . freeze ]
36
+ ]
30
37
else # normal
31
- ary << m [ "userinfo" ]
32
- ary << m [ "host" ]
33
- ary << m [ "port" ]
34
- ary << nil # registry
35
- ary << ( m [ "path-abempty" ] || m [ "path-absolute" ] || m [ "path-empty" ] )
36
- ary << nil # opaque
37
- ary << m [ "query" ]
38
- ary << m [ "fragment" ]
38
+ [ scheme ,
39
+ m [ "userinfo" . freeze ] ,
40
+ m [ "host" . freeze ] ,
41
+ m [ "port" . freeze ] ,
42
+ nil , # registry
43
+ ( m [ "path-abempty" . freeze ] ||
44
+ m [ "path-absolute" . freeze ] ||
45
+ m [ "path-empty" . freeze ] ) ,
46
+ nil , # opaque
47
+ query ,
48
+ m [ "fragment" . freeze ]
49
+ ]
39
50
end
40
51
elsif m = RFC3986_relative_ref . match ( uri )
41
- ary = [ nil ]
42
- ary << m [ "userinfo" ]
43
- ary << m [ "host" ]
44
- ary << m [ "port" ]
45
- ary << nil # registry
46
- ary << ( m [ "path-abempty" ] || m [ "path-absolute" ] || m [ "path-noscheme" ] || m [ "path-empty" ] )
47
- ary << nil # opaque
48
- ary << m [ "query" ]
49
- ary << m [ "fragment" ]
52
+ [ nil , # scheme
53
+ m [ "userinfo" . freeze ] ,
54
+ m [ "host" . freeze ] ,
55
+ m [ "port" . freeze ] ,
56
+ nil , # registry,
57
+ ( m [ "path-abempty" . freeze ] ||
58
+ m [ "path-absolute" . freeze ] ||
59
+ m [ "path-noscheme" . freeze ] ||
60
+ m [ "path-empty" . freeze ] ) ,
61
+ nil , # opaque
62
+ m [ "query" . freeze ] ,
63
+ m [ "fragment" . freeze ]
64
+ ]
50
65
else
51
66
raise InvalidURIError , "bad URI(is not URI?): #{ uri } "
52
67
end
@@ -55,11 +70,11 @@ def split(uri) #:nodoc:
55
70
def parse ( uri ) # :nodoc:
56
71
scheme , userinfo , host , port ,
57
72
registry , path , opaque , query , fragment = self . split ( uri )
58
-
59
- if scheme && URI . scheme_list . include? ( scheme . upcase )
60
- URI . scheme_list [ scheme . upcase ] . new ( scheme , userinfo , host , port ,
61
- registry , path , opaque , query ,
62
- fragment , self )
73
+ scheme_list = URI . scheme_list
74
+ if scheme && scheme_list . include? ( uc = scheme . upcase )
75
+ scheme_list [ uc ] . new ( scheme , userinfo , host , port ,
76
+ registry , path , opaque , query ,
77
+ fragment , self )
63
78
else
64
79
Generic . new ( scheme , userinfo , host , port ,
65
80
registry , path , opaque , query ,
@@ -78,7 +93,9 @@ def inspect
78
93
@@to_s . bind ( self ) . call
79
94
end
80
95
81
- def regexp
96
+ private
97
+
98
+ def default_regexp # :nodoc:
82
99
{
83
100
SCHEME : /\A [A-Za-z][A-Za-z0-9+\- .]*\z / ,
84
101
USERINFO : /\A (?:%\h \h |[!$&-.0-;=A-Z_a-z~])*\z / ,
@@ -92,8 +109,6 @@ def regexp
92
109
}
93
110
end
94
111
95
- private
96
-
97
112
def convert_to_uri ( uri )
98
113
if uri . is_a? ( URI ::Generic )
99
114
uri
0 commit comments