Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yay! New language/Cargo features! #52

Merged
merged 7 commits into from
Nov 21, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
language: rust

script:
- cargo test --features query_encoding
- cargo test

after_success: |
[ $TRAVIS_BRANCH = master ] &&
[ $TRAVIS_PULL_REQUEST = false ] &&
cargo doc &&
cargo doc --features query_encoding &&
echo '<meta http-equiv=refresh content=0;url=url/index.html>' > target/doc/index.html &&
cp github.png target/doc/ &&
sudo pip install ghp-import &&
Expand Down
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ name = "url"
version = "0.1.0"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

[features]

query_encoding = ["encoding"]

[dependencies.encoding]

git = "https://github.com/lifthrasiir/rust-encoding"
optional = true
103 changes: 103 additions & 0 deletions src/encoding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2013-2014 Simon Sapin.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.


//! Abstraction that conditionally compiles either to rust-encoding,
//! or to only support UTF-8.

#[cfg(feature = "query_encoding")]
extern crate encoding;

#[cfg(feature = "query_encoding")]
use self::encoding::types::{DecoderTrap, EncoderTrap};

#[cfg(feature = "query_encoding")]
use self::encoding::label::encoding_from_whatwg_label;

#[cfg(feature = "query_encoding")]
pub use self::encoding::types::EncodingRef;


#[cfg(feature = "query_encoding")]
pub struct EncodingOverride {
/// `None` means UTF-8.
encoding: Option<EncodingRef>
}

#[cfg(feature = "query_encoding")]
impl EncodingOverride {
pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> EncodingOverride {
encoding.map(EncodingOverride::from_encoding).unwrap_or_else(EncodingOverride::utf8)
}

pub fn from_encoding(encoding: EncodingRef) -> EncodingOverride {
EncodingOverride {
encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) }
}
}

pub fn utf8() -> EncodingOverride {
EncodingOverride { encoding: None }
}

pub fn lookup(label: &[u8]) -> Option<EncodingOverride> {
::std::str::from_utf8(label.as_slice())
.and_then(encoding_from_whatwg_label)
.map(EncodingOverride::from_encoding)
}

pub fn is_utf8(&self) -> bool {
self.encoding.is_none()
}

pub fn decode(&self, input: &[u8]) -> String {
match self.encoding {
Some(encoding) => encoding.decode(input, DecoderTrap::Replace).unwrap(),
None => String::from_utf8_lossy(input).into_string(),
}
}

pub fn encode<'a>(&self, pair: &'a mut (&str, Vec<u8>)) -> &'a [u8] {
let &(ref input, ref mut tmp) = pair;
match self.encoding {
Some(encoding) => {
*tmp = encoding.encode(input.as_slice(), EncoderTrap::NcrEscape).unwrap();
tmp.as_slice()
},
None => input.as_bytes() // UTF-8
}
}
}


#[cfg(not(feature = "query_encoding"))]
pub struct EncodingOverride;

#[cfg(not(feature = "query_encoding"))]
impl EncodingOverride {
pub fn utf8() -> EncodingOverride {
EncodingOverride
}

pub fn lookup(_label: &[u8]) -> Option<EncodingOverride> {
None
}

pub fn is_utf8(&self) -> bool {
true
}

pub fn decode(&self, input: &[u8]) -> String {
String::from_utf8_lossy(input).into_string()
}

pub fn encode<'a>(&self, pair: &'a mut (&str, Vec<u8>)) -> &'a [u8] {
let &(ref query, _) = pair;
query.as_bytes()
}
}
114 changes: 59 additions & 55 deletions src/form_urlencoded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,114 +13,118 @@
//! Converts between a string (such as an URL’s query string)
//! and a sequence of (name, value) pairs.

use std::str;

use encoding;
use encoding::EncodingRef;
use encoding::all::UTF_8;
use encoding::label::encoding_from_whatwg_label;

use encoding::EncodingOverride;
use percent_encoding::{percent_encode_to, percent_decode, FORM_URLENCODED_ENCODE_SET};


/// Convert a string in the `application/x-www-form-urlencoded` format
/// Convert a byte string in the `application/x-www-form-urlencoded` format
/// into a vector of (name, value) pairs.
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
#[inline]
pub fn parse_str(input: &str) -> Vec<(String, String)> {
parse_bytes(input.as_bytes(), None, false, false).unwrap()
pub fn parse(input: &[u8]) -> Vec<(String, String)> {
parse_internal(input, EncodingOverride::utf8(), false).unwrap()
}


/// Convert a byte string in the `application/x-www-form-urlencoded` format
/// into a vector of (name, value) pairs.
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
///
/// This function is only available if the `query_encoding` Cargo feature is enabled.
///
/// Arguments:
///
/// * `encoding_override`: The character encoding each name and values is decoded as
/// after percent-decoding. Defaults to UTF-8.
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
/// * `isindex`: The *isindex flag*. If in doubt, set to `false`.
pub fn parse_bytes(input: &[u8], encoding_override: Option<EncodingRef>,
mut use_charset: bool, mut isindex: bool) -> Option<Vec<(String, String)>> {
let mut encoding_override = encoding_override.unwrap_or(UTF_8 as EncodingRef);
#[cfg(feature = "query_encoding")]
#[inline]
pub fn parse_with_encoding(input: &[u8], encoding_override: Option<::encoding::EncodingRef>,
use_charset: bool)
-> Option<Vec<(String, String)>> {
parse_internal(input, EncodingOverride::from_opt_encoding(encoding_override), use_charset)
}


fn parse_internal(input: &[u8], mut encoding_override: EncodingOverride, mut use_charset: bool)
-> Option<Vec<(String, String)>> {
let mut pairs = Vec::new();
for piece in input.split(|&b| b == b'&') {
if piece.is_empty() {
if isindex {
pairs.push((Vec::new(), Vec::new()))
}
} else {
if !piece.is_empty() {
let (name, value) = match piece.position_elem(&b'=') {
Some(position) => (piece.slice_to(position), piece.slice_from(position + 1)),
None => if isindex { ([].as_slice(), piece) } else { (piece, [].as_slice()) }
None => (piece, [].as_slice())
};

#[inline]
fn replace_plus(input: &[u8]) -> Vec<u8> {
input.iter().map(|&b| if b == b'+' { b' ' } else { b }).collect()
}

let name = replace_plus(name);
let value = replace_plus(value);
if use_charset && name.as_slice() == b"_charset_" {
// Non-UTF8 here is ok, encoding_from_whatwg_label only matches in the ASCII range.
match encoding_from_whatwg_label(unsafe { str::raw::from_utf8(value.as_slice()) }) {
match EncodingOverride::lookup(value.as_slice()) {
Some(encoding) => encoding_override = encoding,
None => (),
}
use_charset = false;
}
pairs.push((name, value));
}
isindex = false;
}
if encoding_override.name() != "utf-8" && !input.is_ascii() {
if !(encoding_override.is_utf8() || input.is_ascii()) {
return None
}

#[inline]
fn replace_plus(input: &[u8]) -> Vec<u8> {
input.iter().map(|&b| if b == b'+' { b' ' } else { b }).collect()
}

#[inline]
fn decode(input: Vec<u8>, encoding_override: EncodingRef) -> String {
encoding_override.decode(
percent_decode(input.as_slice()).as_slice(),
encoding::DecoderTrap::Replace).unwrap()
}

Some(pairs.into_iter().map(
|(name, value)| (decode(name, encoding_override), decode(value, encoding_override))
).collect())
Some(pairs.into_iter().map(|(name, value)| (
encoding_override.decode(percent_decode(name.as_slice()).as_slice()),
encoding_override.decode(percent_decode(value.as_slice()).as_slice())
)).collect())
}


/// Convert a slice of owned (name, value) pairs
/// into a string in the `application/x-www-form-urlencoded` format.
#[inline]
pub fn serialize_owned(pairs: &[(String, String)]) -> String {
serialize(pairs.iter().map(|&(ref n, ref v)| (n.as_slice(), v.as_slice())), None)
serialize(pairs.iter().map(|&(ref n, ref v)| (n.as_slice(), v.as_slice())))
}


/// Convert an iterator of (name, value) pairs
/// into a string in the `application/x-www-form-urlencoded` format.
#[inline]
pub fn serialize<'a, I>(pairs: I) -> String where I: Iterator<(&'a str, &'a str)> {
serialize_internal(pairs, EncodingOverride::utf8())
}

/// Convert an iterator of (name, value) pairs
/// into a string in the `application/x-www-form-urlencoded` format.
///
/// This function is only available if the `query_encoding` Cargo feature is enabled.
///
/// Arguments:
///
/// * `encoding_override`: The character encoding each name and values is encoded as
/// before percent-encoding. Defaults to UTF-8.
pub fn serialize<'a, I: Iterator<(&'a str, &'a str)>>(
mut pairs: I, encoding_override: Option<EncodingRef>)
-> String {
#[cfg(feature = "query_encoding")]
#[inline]
pub fn serialize_with_encoding<'a, I>(pairs: I, encoding_override: Option<::encoding::EncodingRef>)
-> String
where I: Iterator<(&'a str, &'a str)> {
serialize_internal(pairs, EncodingOverride::from_opt_encoding(encoding_override))
}

fn serialize_internal<'a, I>(mut pairs: I, encoding_override: EncodingOverride) -> String
where I: Iterator<(&'a str, &'a str)> {
#[inline]
fn byte_serialize(input: &str, output: &mut String,
encoding_override: Option<EncodingRef>) {
let keep_alive;
let input = match encoding_override {
None => input.as_bytes(), // "Encode" to UTF-8
Some(encoding) => {
keep_alive = encoding.encode(input, encoding::EncoderTrap::NcrEscape).unwrap();
keep_alive.as_slice()
}
};

for &byte in input.iter() {
encoding_override: EncodingOverride) {
let mut pair = (input, vec![]);
for &byte in encoding_override.encode(&mut pair).iter() {
if byte == b' ' {
output.push_str("+")
} else {
Expand Down Expand Up @@ -151,5 +155,5 @@ fn test_form_urlencoded() {
];
let encoded = serialize_owned(pairs.as_slice());
assert_eq!(encoded.as_slice(), "foo=%C3%A9%26&bar=&foo=%23");
assert_eq!(parse_str(encoded.as_slice()), pairs.as_slice().to_vec());
assert_eq!(parse(encoded.as_bytes()), pairs.as_slice().to_vec());
}
Loading