Skip to content

Commit

Permalink
Auto merge of #49698 - SimonSapin:unicode-for-everyone, r=alexcrichton
Browse files Browse the repository at this point in the history
Merge the std_unicode crate into the core crate

[The standard library facade](#27783) has historically contained a number of crates with different roles, but that number has decreased over time. `rand` and `libc` have moved to crates.io, and [`collections` was merged into `alloc`](#42648). Today we have `core` that applies everywhere, `std` that expects a full operating system, and `alloc` in-between that only requires a memory allocator (which can be provided by users)… and `std_unicode`, which doesn’t really have a reason to be separate anymore. It contains functionality based on Unicode data tables that can be large, but as long as relevant functions are not called the tables should be removed from binaries by linkers.

This deprecates the unstable `std_unicode` crate and moves all of its contents into `core`, replacing them with `pub use` reexports. The crate can be removed later. This also removes the `CharExt` trait (replaced with inherent methods in libcore) and `UnicodeStr` trait (merged into `StrExt`). There traits were both unstable and not intended to be used or named directly.

A number of new items are newly-available in libcore and instantly stable there, but only if they were already stable in libstd.

Fixes #49319.
  • Loading branch information
bors committed Apr 12, 2018
2 parents e28ef22 + ef41788 commit d26f9e4
Show file tree
Hide file tree
Showing 42 changed files with 1,398 additions and 1,537 deletions.
1 change: 0 additions & 1 deletion src/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion src/ci/docker/wasm32-unknown/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,3 @@ ENV SCRIPT python2.7 /checkout/x.py test --target $TARGETS \
src/test/mir-opt \
src/test/codegen-units \
src/libcore \
src/libstd_unicode/ \
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/language-features/lang-items.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ the source code.
- `usize`: `libcore/num/mod.rs`
- `f32`: `libstd/f32.rs`
- `f64`: `libstd/f64.rs`
- `char`: `libstd_unicode/char.rs`
- `char`: `libcore/char.rs`
- `slice`: `liballoc/slice.rs`
- `str`: `liballoc/str.rs`
- `const_ptr`: `libcore/ptr.rs`
Expand Down
1 change: 0 additions & 1 deletion src/liballoc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ path = "lib.rs"

[dependencies]
core = { path = "../libcore" }
std_unicode = { path = "../libstd_unicode" }
compiler_builtins = { path = "../rustc/compiler_builtins_shim" }

[dev-dependencies]
Expand Down
4 changes: 1 addition & 3 deletions src/liballoc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
#![feature(trusted_len)]
#![feature(try_reserve)]
#![feature(unboxed_closures)]
#![feature(unicode)]
#![feature(unicode_internals)]
#![feature(unsize)]
#![feature(allocator_internals)]
#![feature(on_unimplemented)]
Expand All @@ -135,8 +135,6 @@ extern crate test;
#[cfg(test)]
extern crate rand;

extern crate std_unicode;

// Module with internal macros used by other modules (needs to be included before other modules).
#[macro_use]
mod macros;
Expand Down
44 changes: 30 additions & 14 deletions src/liballoc/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,10 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use core::mem;
use core::ptr;
use core::iter::FusedIterator;
use std_unicode::str::{UnicodeStr, Utf16Encoder};

use vec_deque::VecDeque;
use borrow::{Borrow, ToOwned};
use string::String;
use std_unicode;
use vec::Vec;
use slice::{SliceConcatExt, SliceIndex};
use boxed::Box;
Expand All @@ -75,7 +73,7 @@ pub use core::str::{from_utf8, from_utf8_mut, Chars, CharIndices, Bytes};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError};
#[stable(feature = "rust1", since = "1.0.0")]
pub use std_unicode::str::SplitWhitespace;
pub use core::str::SplitWhitespace;
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::pattern;

Expand Down Expand Up @@ -147,7 +145,8 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
#[derive(Clone)]
#[stable(feature = "encode_utf16", since = "1.8.0")]
pub struct EncodeUtf16<'a> {
encoder: Utf16Encoder<Chars<'a>>,
chars: Chars<'a>,
extra: u16,
}

#[stable(feature = "collection_debug", since = "1.17.0")]
Expand All @@ -163,12 +162,29 @@ impl<'a> Iterator for EncodeUtf16<'a> {

#[inline]
fn next(&mut self) -> Option<u16> {
self.encoder.next()
if self.extra != 0 {
let tmp = self.extra;
self.extra = 0;
return Some(tmp);
}

let mut buf = [0; 2];
self.chars.next().map(|ch| {
let n = ch.encode_utf16(&mut buf).len();
if n == 2 {
self.extra = buf[1];
}
buf[0]
})
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.encoder.size_hint()
let (low, high) = self.chars.size_hint();
// every char gets either one u16 or two u16,
// so this iterator is between 1 or 2 times as
// long as the underlying iterator.
(low, high.and_then(|n| n.checked_mul(2)))
}
}

Expand Down Expand Up @@ -801,7 +817,7 @@ impl str {
#[stable(feature = "split_whitespace", since = "1.1.0")]
#[inline]
pub fn split_whitespace(&self) -> SplitWhitespace {
UnicodeStr::split_whitespace(self)
StrExt::split_whitespace(self)
}

/// An iterator over the lines of a string, as string slices.
Expand Down Expand Up @@ -871,7 +887,7 @@ impl str {
/// ```
#[stable(feature = "encode_utf16", since = "1.8.0")]
pub fn encode_utf16(&self) -> EncodeUtf16 {
EncodeUtf16 { encoder: Utf16Encoder::new(self[..].chars()) }
EncodeUtf16 { chars: self[..].chars(), extra: 0 }
}

/// Returns `true` if the given pattern matches a sub-slice of
Expand Down Expand Up @@ -1571,7 +1587,7 @@ impl str {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn trim(&self) -> &str {
UnicodeStr::trim(self)
StrExt::trim(self)
}

/// Returns a string slice with leading whitespace removed.
Expand Down Expand Up @@ -1607,7 +1623,7 @@ impl str {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn trim_left(&self) -> &str {
UnicodeStr::trim_left(self)
StrExt::trim_left(self)
}

/// Returns a string slice with trailing whitespace removed.
Expand Down Expand Up @@ -1643,7 +1659,7 @@ impl str {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn trim_right(&self) -> &str {
UnicodeStr::trim_right(self)
StrExt::trim_right(self)
}

/// Returns a string slice with all prefixes and suffixes that match a
Expand Down Expand Up @@ -1960,7 +1976,7 @@ impl str {
}

fn case_ignoreable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
use std_unicode::derived_property::{Cased, Case_Ignorable};
use core::unicode::derived_property::{Cased, Case_Ignorable};
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
Some(c) => Cased(c),
None => false,
Expand Down Expand Up @@ -2142,7 +2158,7 @@ impl str {
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
#[inline]
pub fn is_whitespace(&self) -> bool {
UnicodeStr::is_whitespace(self)
StrExt::is_whitespace(self)
}

/// Returns true if this `str` is entirely alphanumeric, and false otherwise.
Expand All @@ -2161,7 +2177,7 @@ impl str {
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
#[inline]
pub fn is_alphanumeric(&self) -> bool {
UnicodeStr::is_alphanumeric(self)
StrExt::is_alphanumeric(self)
}

/// Checks if all characters in this string are within the ASCII range.
Expand Down
4 changes: 2 additions & 2 deletions src/liballoc/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@

#![stable(feature = "rust1", since = "1.0.0")]

use core::char::{decode_utf16, REPLACEMENT_CHARACTER};
use core::fmt;
use core::hash;
use core::iter::{FromIterator, FusedIterator};
use core::ops::Bound::{Excluded, Included, Unbounded};
use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
use core::ptr;
use core::str::pattern::Pattern;
use std_unicode::lossy;
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
use core::str::lossy;

use borrow::{Cow, ToOwned};
use str::{self, from_boxed_utf8_unchecked, FromStr, Utf8Error, Chars};
Expand Down
3 changes: 1 addition & 2 deletions src/liballoc/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@
#![feature(string_retain)]
#![feature(try_reserve)]
#![feature(unboxed_closures)]
#![feature(unicode)]
#![feature(exact_chunks)]
#![feature(inclusive_range_fields)]

extern crate alloc_system;
extern crate std_unicode;
extern crate core;
extern crate rand;

use std::hash::{Hash, Hasher};
Expand Down
3 changes: 1 addition & 2 deletions src/liballoc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1204,8 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() {

#[test]
fn test_utf16_code_units() {
use std_unicode::str::Utf16Encoder;
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
assert_eq!(\u{1F4A9}".encode_utf16().collect::<Vec<u16>>(),
[0xE9, 0xD83D, 0xDCA9])
}

Expand Down
2 changes: 1 addition & 1 deletion src/liballoc/tests/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ fn test_from_utf16() {
let s_as_utf16 = s.encode_utf16().collect::<Vec<u16>>();
let u_as_string = String::from_utf16(&u).unwrap();

assert!(::std_unicode::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok()));
assert!(::core::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok()));
assert_eq!(s_as_utf16, u);

assert_eq!(u_as_string, s);
Expand Down
Loading

0 comments on commit d26f9e4

Please sign in to comment.