|
1 |
| -# frozen_string_literal: false |
| 1 | +# frozen_string_literal: true |
2 | 2 | module URI
|
3 | 3 | class RFC3986_Parser # :nodoc:
|
4 | 4 | # URI defined in RFC3986
|
5 |
| - RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*+):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*+)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h++\.[!$&-.0-;=A-Z_a-z~]++))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*+))(?::(?<port>\d*+))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*+))*+)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])++)(?:\/\g<segment>)*+)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*+)|(?<path-empty>))(?:\?(?<query>[^#]*+))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*+))?)\z/ |
6 |
| - RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*+)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h++\.[!$&-.0-;=A-Z_a-z~]++))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])++))?(?::(?<port>\d*+))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*+))*+)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])++)(?:\/\g<segment>)*+)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])++)(?:\/\g<segment>)*+)|(?<path-empty>))(?:\?(?<query>[^#]*+))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*+))?)\z/ |
| 5 | + HOST = %r[ |
| 6 | + (?<IP-literal>\[(?: |
| 7 | + (?<IPv6address> |
| 8 | + (?:\h{1,4}:){6} |
| 9 | + (?<ls32>\h{1,4}:\h{1,4} |
| 10 | + | (?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d) |
| 11 | + \.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>) |
| 12 | + ) |
| 13 | + | ::(?:\h{1,4}:){5}\g<ls32> |
| 14 | + | \h{1,4}?::(?:\h{1,4}:){4}\g<ls32> |
| 15 | + | (?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32> |
| 16 | + | (?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32> |
| 17 | + | (?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32> |
| 18 | + | (?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32> |
| 19 | + | (?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4} |
| 20 | + | (?:(?:\h{1,4}:){,6}\h{1,4})?:: |
| 21 | + ) |
| 22 | + | (?<IPvFuture>v\h++\.[!$&-.0-9:;=A-Z_a-z~]++) |
| 23 | + )\]) |
| 24 | + | \g<IPv4address> |
| 25 | + | (?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*+) |
| 26 | + ]x |
| 27 | + |
| 28 | + USERINFO = /(?:%\h\h|[!$&-.0-9:;=A-Z_a-z~])*+/ |
| 29 | + AUTHORITY = %r[ |
| 30 | + (?:(?<userinfo>#{USERINFO.source})@)? |
| 31 | + (?<host>#{HOST.source.delete(" \n")}) |
| 32 | + (?::(?<port>\d*+))? |
| 33 | + ]x |
| 34 | + |
| 35 | + SCHEME = %r[[A-Za-z][+\-.0-9A-Za-z]*+].source |
| 36 | + SEG = %r[(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/])].source |
| 37 | + FRAGMENT = %r[(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/?])*+].source |
| 38 | + |
| 39 | + RFC3986_URI = %r[\A |
| 40 | + (?<seg>#{SEG}){0} |
| 41 | + (?<URI> |
| 42 | + (?<scheme>#{SCHEME}): |
| 43 | + (?<hier-part>// |
| 44 | + (?<authority>#{AUTHORITY}) |
| 45 | + (?<path-abempty>(?:/\g<seg>*+)?) |
| 46 | + | (?<path-absolute>/\g<seg>*+) |
| 47 | + | (?<path-rootless>(?!=/)\g<seg>++) |
| 48 | + | (?<path-empty>) |
| 49 | + ) |
| 50 | + (?:\?(?<query>[^\#]*+))? |
| 51 | + (?:\#(?<fragment>#{FRAGMENT}))? |
| 52 | + )\z]x |
| 53 | + |
| 54 | + RFC3986_relative_ref = %r[\A |
| 55 | + (?<seg>#{SEG}){0} |
| 56 | + (?<relative-ref> |
| 57 | + (?<relative-part>// |
| 58 | + (?<authority>#{AUTHORITY}) |
| 59 | + (?<path-abempty>(?:/\g<seg>*+)?) |
| 60 | + | (?<path-absolute>/\g<seg>*+) |
| 61 | + | (?<path-noscheme>(?!=[:/])\g<seg>++) |
| 62 | + | (?<path-empty>) |
| 63 | + ) |
| 64 | + (?:\?(?<query>[^#]*+))? |
| 65 | + (?:\#(?<fragment>#{FRAGMENT}))? |
| 66 | + )\z]x |
7 | 67 | attr_reader :regexp
|
8 | 68 |
|
9 | 69 | def initialize
|
@@ -92,14 +152,14 @@ def inspect
|
92 | 152 |
|
93 | 153 | def default_regexp # :nodoc:
|
94 | 154 | {
|
95 |
| - SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/, |
96 |
| - USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/, |
97 |
| - HOST: /\A(?:(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{,4}::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))\z/, |
98 |
| - ABS_PATH: /\A\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, |
99 |
| - REL_PATH: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, |
100 |
| - QUERY: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, |
101 |
| - FRAGMENT: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, |
102 |
| - OPAQUE: /\A(?:[^\/].*)?\z/, |
| 155 | + SCHEME: %r[\A#{SCHEME}\z]o, |
| 156 | + USERINFO: %r[\A#{USERINFO}\z]o, |
| 157 | + HOST: %r[\A#{HOST}\z]o, |
| 158 | + ABS_PATH: %r[\A/#{SEG}*+\z]o, |
| 159 | + REL_PATH: %r[\A(?!=/)#{SEG}++\z]o, |
| 160 | + QUERY: %r[\A(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/?])*+\z], |
| 161 | + FRAGMENT: %r[\A#{FRAGMENT}\z]o, |
| 162 | + OPAQUE: %r[\A(?:[^/].*)?\z], |
103 | 163 | PORT: /\A[\x09\x0a\x0c\x0d ]*\d*[\x09\x0a\x0c\x0d ]*\z/,
|
104 | 164 | }
|
105 | 165 | end
|
|
0 commit comments