Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes needed for gecko-integration #201

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

@@ -28,6 +28,7 @@ rustc-serialize = "0.3"
[features]
query_encoding = ["encoding"]
heap_size = ["heapsize", "heapsize_plugin"]
only_percent_decode_hostname_valid = []

[dependencies]
idna = { version = "0.1.0", path = "./idna" }
@@ -12,7 +12,7 @@ use std::io;
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs};
use std::vec;
use parser::{ParseResult, ParseError};
use percent_encoding::percent_decode;
use percent_encoding::percent_decode_hostname;
use idna;

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -77,10 +77,10 @@ impl Host<String> {
}
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6)
}
let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
let domain = percent_decode_hostname(input.as_bytes()).decode_utf8_lossy();
let domain = try!(idna::domain_to_ascii(&domain));
if domain.find(|c| matches!(c,
'\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
'\0' | '\t' | '\n' | '\r' | ' ' | '#' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
)).is_some() {
return Err(ParseError::InvalidDomainCharacter)
}
@@ -110,6 +110,14 @@ define_encode_set! {
}
}

define_encode_set! {
/// This encode set is used to decide which characters are allowed in the hostname.
pub HOSTNAME_ENCODE_SET = [SIMPLE_ENCODE_SET] | {

This comment has been minimized.

@SimonSapin

SimonSapin May 28, 2016

Member

Encode sets are normally used for percent encoding, not decoding. So I’d rather not expose this in the public API. In after_percent_sign you could "inline" !HOSTNAME_ENCODE_SET.contains(c) to !(matches(c, ' ' | '!' | '"' | /* ...*/ '~') || c < '\u{20}' || c > '\u{7E}'), or invert the logic and use matches!(c, 'a'...'z' | 'A'...'Z' | '0'...'9' | '.' | '-' | '_' | '*'). (I think that’s the complementary set, but I’m not sure * should be there.)

' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '+', ',', '/', ':', ';',
'<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~'
}
}

/// Return the percent-encoding of the given bytes.
///
/// This is unconditional, unlike `percent_encode()` which uses an encode set.
@@ -244,26 +252,44 @@ impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
#[inline]
pub fn percent_decode<'a>(input: &'a [u8]) -> PercentDecode<'a> {
PercentDecode {
bytes: input.iter()
bytes: input.iter(),
only_hostname_valid: false,
}
}

#[cfg(feature = "only_percent_decode_hostname_valid")]
pub fn percent_decode_hostname<'a>(input: &'a [u8]) -> PercentDecode<'a> {
PercentDecode {
bytes: input.iter(),
only_hostname_valid: true,
}
}

#[cfg(not(feature = "only_percent_decode_hostname_valid"))]
pub fn percent_decode_hostname<'a>(input: &'a [u8]) -> PercentDecode<'a> {
percent_decode(input)
}

/// The return type of `percent_decode()`.
#[derive(Clone)]
pub struct PercentDecode<'a> {
bytes: slice::Iter<'a, u8>,
only_hostname_valid: bool,
}

fn after_percent_sign(iter: &mut slice::Iter<u8>) -> Option<u8> {
fn after_percent_sign(iter: &mut slice::Iter<u8>, only_hostname_valid: bool) -> Option<u8> {
let initial_iter = iter.clone();
let h = iter.next().and_then(|&b| (b as char).to_digit(16));
let l = iter.next().and_then(|&b| (b as char).to_digit(16));
if let (Some(h), Some(l)) = (h, l) {
Some(h as u8 * 0x10 + l as u8)
} else {
*iter = initial_iter;
None
let c = h as u8 * 0x10 + l as u8;
if !only_hostname_valid ||
!HOSTNAME_ENCODE_SET.contains(c) {
return Some(c);
}
}
*iter = initial_iter;
return None
}

impl<'a> Iterator for PercentDecode<'a> {
@@ -272,7 +298,7 @@ impl<'a> Iterator for PercentDecode<'a> {
fn next(&mut self) -> Option<u8> {
self.bytes.next().map(|&byte| {
if byte == b'%' {
after_percent_sign(&mut self.bytes).unwrap_or(byte)
after_percent_sign(&mut self.bytes, self.only_hostname_valid).unwrap_or(byte)
} else {
byte
}
@@ -299,13 +325,14 @@ impl<'a> PercentDecode<'a> {
pub fn if_any(&self) -> Option<Vec<u8>> {
let mut bytes_iter = self.bytes.clone();
while bytes_iter.find(|&&b| b == b'%').is_some() {
if let Some(decoded_byte) = after_percent_sign(&mut bytes_iter) {
if let Some(decoded_byte) = after_percent_sign(&mut bytes_iter, self.only_hostname_valid) {
let initial_bytes = self.bytes.as_slice();
let unchanged_bytes_len = initial_bytes.len() - bytes_iter.len() - 3;
let mut decoded = initial_bytes[..unchanged_bytes_len].to_owned();
decoded.push(decoded_byte);
decoded.extend(PercentDecode {
bytes: bytes_iter
bytes: bytes_iter,
only_hostname_valid: self.only_hostname_valid,
});
return Some(decoded)
}
@@ -8,6 +8,7 @@

use std::ops::{Range, RangeFrom, RangeTo, RangeFull, Index};
use Url;
use std::mem;

impl Index<RangeFull> for Url {
type Output = str;
@@ -77,6 +78,7 @@ impl Index<Range<Position>> for Url {
/// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL,
/// so `&url[BeforeScheme..X]` is the same as `&url[..X]`
/// and `&url[X..AfterFragment]` is the same as `&url[X..]`.
#[repr(u32)]

This comment has been minimized.

@SimonSapin

SimonSapin May 28, 2016

Member

Why u32? u8 is more than large enough for 16 variants.

#[derive(Copy, Clone, Debug)]
pub enum Position {
BeforeScheme,
@@ -97,6 +99,14 @@ pub enum Position {
AfterFragment
}

impl From<u32> for Position {
fn from(f: u32) -> Self {
unsafe {
mem::transmute(f)

This comment has been minimized.

@SimonSapin

SimonSapin May 28, 2016

Member

I think this transmute is unsound. This method itself is not unsafe, and there’s nothing stopping users from calling it with a value that doesn’t have a corresponding enum variant. This should return an option, and avoid transmute. That code would be tedious and error-prone to write by hand, but it can be generated with a macro:

macro_rules! c_like_enum {
    ( $type_name: ident: $( $variant: ident, )+ ) => {
        c_like_enum! {
            $type_name
            [ $($variant)* ]
            [ ]
            next = 0
        }
    };

    (
        $type_name: ident
        [ $next_variant: ident $( $variant: ident )* ]
        [ $($assigned_variant: ident = $value: expr),* ]
        next = $next_value: expr
    ) => {
        c_like_enum! {
            $type_name
            [ $($variant)* ]
            [ $($assigned_variant = $value,)* $next_variant = $next_value ]
            next = $next_value + 1
        }
    };

    (
        $type_name: ident
        [ ]
        [ $($variant: ident = $value: expr),+ ]
        next = $next_value: expr
    ) => {
        #[derive(Copy, Clone, Debug)]
        #[repr(u8)]
        pub enum $type_name {
            $($variant = $value,)+
        }

        impl $type_name {
            #[allow(non_upper_case_globals)]
            pub fn from_u8(value: u8) -> Option<Self> {
                $(
                    const $variant: u8 = $value;
                )+
                match value {
                    $(
                        $variant => Some($type_name::$variant),
                    )+
                    _ => None
                }
            }
        }
    };
}

c_like_enum! { Position:
    BeforeScheme,
    AfterScheme,
    BeforeUsername,
    AfterUsername,
    BeforePassword,
    AfterPassword,
    BeforeHost,
    AfterHost,
    BeforePort,
    AfterPort,
    BeforePath,
    AfterPath,
    BeforeQuery,
    AfterQuery,
    BeforeFragment,
    AfterFragment,
}
}
}
}

impl Url {
#[inline]
fn index(&self, position: Position) -> usize {
@@ -268,3 +268,9 @@ fn issue_197() {
assert_eq!(url, Url::parse("file:///").unwrap());
url.path_segments_mut().unwrap().pop_if_empty();
}

#[test]
#[cfg(feature = "only_percent_decode_hostname_valid")]
fn test_percent_encoded_hostname() {
assert_eq!(Url::parse("http://example.com%0a%23.google.com/").unwrap().domain(), Some("example.com%0a%23.google.com"));
}
@@ -3514,11 +3514,6 @@
"base": "http://other.com/",
"failure": true
},
{
"input": "http://hello%00",
"base": "http://other.com/",
"failure": true
},
"Escaped numbers should be treated like IP addresses if they are.",
{
"input": "http://%30%78%63%30%2e%30%32%35%30.01",
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.