Skip to content

Commit

Permalink
deprecate Unicode functions that will be moved to crates.io
Browse files Browse the repository at this point in the history
This patch
1. renames libunicode to librustc_unicode,
2. deprecates several pieces of libunicode (see below), and
3. removes references to deprecated functions from
   librustc_driver and libsyntax. This may change pretty-printed
   output from these modules in cases involving wide or combining
   characters used in filenames, identifiers, etc.

The following functions are marked deprecated:

1. char.width() and str.width():
   --> use unicode-width crate

2. str.graphemes() and str.grapheme_indices():
   --> use unicode-segmentation crate

3. str.nfd_chars(), str.nfkd_chars(), str.nfc_chars(), str.nfkc_chars(),
   char.compose(), char.decompose_canonical(), char.decompose_compatible(),
   char.canonical_combining_class():
   --> use unicode-normalization crate
  • Loading branch information
kwantam committed Apr 16, 2015
1 parent 288809c commit 29d1252
Show file tree
Hide file tree
Showing 27 changed files with 114 additions and 57 deletions.
12 changes: 6 additions & 6 deletions mk/crates.mk
Expand Up @@ -52,7 +52,7 @@
TARGET_CRATES := libc std flate arena term \
serialize getopts collections test rand \
log graphviz core rbml alloc \
unicode rustc_bitflags
rustc_unicode rustc_bitflags
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
rustc_trans rustc_back rustc_llvm rustc_privacy rustc_lint
HOST_CRATES := syntax $(RUSTC_CRATES) rustdoc fmt_macros
Expand All @@ -61,9 +61,9 @@ TOOLS := compiletest rustdoc rustc rustbook

DEPS_core :=
DEPS_libc := core
DEPS_unicode := core
DEPS_rustc_unicode := core
DEPS_alloc := core libc native:jemalloc
DEPS_std := core libc rand alloc collections unicode \
DEPS_std := core libc rand alloc collections rustc_unicode \
native:rust_builtin native:backtrace native:rustrt_native \
rustc_bitflags
DEPS_graphviz := std
Expand Down Expand Up @@ -94,7 +94,7 @@ DEPS_serialize := std log
DEPS_rbml := std log serialize
DEPS_term := std log
DEPS_getopts := std
DEPS_collections := core alloc unicode
DEPS_collections := core alloc rustc_unicode
DEPS_num := std
DEPS_test := std getopts serialize rbml term native:rust_test_helpers
DEPS_rand := core
Expand All @@ -115,11 +115,11 @@ ONLY_RLIB_libc := 1
ONLY_RLIB_alloc := 1
ONLY_RLIB_rand := 1
ONLY_RLIB_collections := 1
ONLY_RLIB_unicode := 1
ONLY_RLIB_rustc_unicode := 1
ONLY_RLIB_rustc_bitflags := 1

# Documented-by-default crates
DOC_CRATES := std alloc collections core libc unicode
DOC_CRATES := std alloc collections core libc rustc_unicode

################################################################################
# You should not need to edit below this line
Expand Down
4 changes: 2 additions & 2 deletions mk/tests.mk
Expand Up @@ -15,14 +15,14 @@

# The names of crates that must be tested

# libcore/libunicode tests are in a separate crate
# libcore/librustc_unicode tests are in a separate crate
DEPS_coretest :=
$(eval $(call RUST_CRATE,coretest))

DEPS_collectionstest :=
$(eval $(call RUST_CRATE,collectionstest))

TEST_TARGET_CRATES = $(filter-out core unicode,$(TARGET_CRATES)) \
TEST_TARGET_CRATES = $(filter-out core rustc_unicode,$(TARGET_CRATES)) \
collectionstest coretest
TEST_DOC_CRATES = $(DOC_CRATES)
TEST_HOST_CRATES = $(filter-out rustc_typeck rustc_borrowck rustc_resolve \
Expand Down
11 changes: 7 additions & 4 deletions src/etc/unicode.py
Expand Up @@ -518,11 +518,14 @@ def comp_pfun(char):
emit_table(f, "combining_class_table", combine, "&'static [(char, char, u8)]", is_pub=False,
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))

f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
+ " bsearch_range_value_table(c, combining_class_table)\n"
+ " }\n")
f.write(""" #[deprecated(reason = "use the crates.io `unicode-normalization` lib instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality will be moved to crates.io")]
pub fn canonical_combining_class(c: char) -> u8 {
bsearch_range_value_table(c, combining_class_table)
}
f.write("""
}
""")
Expand Down
2 changes: 1 addition & 1 deletion src/libcollections/lib.rs
Expand Up @@ -50,7 +50,7 @@
#[macro_use]
extern crate core;

extern crate unicode;
extern crate rustc_unicode;
extern crate alloc;

#[cfg(test)] #[macro_use] extern crate std;
Expand Down
44 changes: 35 additions & 9 deletions src/libcollections/str.rs
Expand Up @@ -59,13 +59,13 @@ use core::result::Result;
use core::str as core_str;
use core::str::pattern::Pattern;
use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use unicode::str::{UnicodeStr, Utf16Encoder};
use rustc_unicode::str::{UnicodeStr, Utf16Encoder};

use core::convert::AsRef;
use vec_deque::VecDeque;
use borrow::{Borrow, ToOwned};
use string::String;
use unicode;
use rustc_unicode;
use vec::Vec;
use slice::SliceConcatExt;

Expand All @@ -78,7 +78,7 @@ pub use core::str::{Matches, RMatches};
pub use core::str::{MatchIndices, RMatchIndices};
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
pub use core::str::{from_utf8_unchecked, ParseBoolError};
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
pub use core::str::pattern;

/*
Expand Down Expand Up @@ -161,6 +161,9 @@ enum DecompositionType {
/// External iterator for a string decomposition's characters.
///
/// For use with the `std::iter` module.
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[derive(Clone)]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -172,6 +175,7 @@ pub struct Decompositions<'a> {
sorted: bool
}

#[allow(deprecated)]
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Decompositions<'a> {
type Item = char;
Expand All @@ -198,7 +202,7 @@ impl<'a> Iterator for Decompositions<'a> {
{
let callback = |d| {
let class =
unicode::char::canonical_combining_class(d);
rustc_unicode::char::canonical_combining_class(d);
if class == 0 && !*sorted {
canonical_sort(buffer);
*sorted = true;
Expand All @@ -207,10 +211,10 @@ impl<'a> Iterator for Decompositions<'a> {
};
match self.kind {
Canonical => {
unicode::char::decompose_canonical(ch, callback)
rustc_unicode::char::decompose_canonical(ch, callback)
}
Compatible => {
unicode::char::decompose_compatible(ch, callback)
rustc_unicode::char::decompose_compatible(ch, callback)
}
}
}
Expand Down Expand Up @@ -254,6 +258,9 @@ enum RecompositionState {
/// External iterator for a string recomposition's characters.
///
/// For use with the `std::iter` module.
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[derive(Clone)]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -266,6 +273,7 @@ pub struct Recompositions<'a> {
last_ccc: Option<u8>
}

#[allow(deprecated)]
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Recompositions<'a> {
type Item = char;
Expand All @@ -276,7 +284,7 @@ impl<'a> Iterator for Recompositions<'a> {
match self.state {
Composing => {
for ch in self.iter.by_ref() {
let ch_class = unicode::char::canonical_combining_class(ch);
let ch_class = rustc_unicode::char::canonical_combining_class(ch);
if self.composee.is_none() {
if ch_class != 0 {
return Some(ch);
Expand All @@ -288,7 +296,7 @@ impl<'a> Iterator for Recompositions<'a> {

match self.last_ccc {
None => {
match unicode::char::compose(k, ch) {
match rustc_unicode::char::compose(k, ch) {
Some(r) => {
self.composee = Some(r);
continue;
Expand Down Expand Up @@ -316,7 +324,7 @@ impl<'a> Iterator for Recompositions<'a> {
self.last_ccc = Some(ch_class);
continue;
}
match unicode::char::compose(k, ch) {
match rustc_unicode::char::compose(k, ch) {
Some(r) => {
self.composee = Some(r);
continue;
Expand Down Expand Up @@ -465,6 +473,9 @@ impl str {

/// Returns an iterator over the string in Unicode Normalization Form D
/// (canonical decomposition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -480,6 +491,9 @@ impl str {

/// Returns an iterator over the string in Unicode Normalization Form KD
/// (compatibility decomposition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -495,6 +509,9 @@ impl str {

/// An Iterator over the string in Unicode Normalization Form C
/// (canonical decomposition followed by canonical composition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -511,6 +528,9 @@ impl str {

/// An Iterator over the string in Unicode Normalization Form KC
/// (compatibility decomposition followed by canonical composition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand Down Expand Up @@ -1690,6 +1710,8 @@ impl str {
///
/// assert_eq!(&gr2[..], b);
/// ```
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn graphemes(&self, is_extended: bool) -> Graphemes {
Expand All @@ -1709,6 +1731,8 @@ impl str {
///
/// assert_eq!(&gr_inds[..], b);
/// ```
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
Expand Down Expand Up @@ -1749,6 +1773,8 @@ impl str {
/// recommends that these
/// characters be treated as 1 column (i.e., `is_cjk = false`) if the
/// locale is unknown.
#[deprecated(reason = "use the crates.io `unicode-width` library instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn width(&self, is_cjk: bool) -> usize {
Expand Down
4 changes: 2 additions & 2 deletions src/libcollections/string.rs
Expand Up @@ -25,8 +25,8 @@ use core::ops::{self, Deref, Add, Index};
use core::ptr;
use core::slice;
use core::str::pattern::Pattern;
use unicode::str as unicode_str;
use unicode::str::Utf16Item;
use rustc_unicode::str as unicode_str;
use rustc_unicode::str::Utf16Item;

use borrow::{Cow, IntoCow};
use str::{self, FromStr, Utf8Error};
Expand Down
2 changes: 1 addition & 1 deletion src/libcollectionstest/lib.rs
Expand Up @@ -27,7 +27,7 @@

extern crate collections;
extern crate test;
extern crate unicode;
extern crate rustc_unicode;

#[cfg(test)] #[macro_use] mod bench;

Expand Down
10 changes: 8 additions & 2 deletions src/libcollectionstest/str.rs
Expand Up @@ -19,6 +19,7 @@ fn test_le() {
assert!("foo" != "bar");
}

#[allow(deprecated)]
#[test]
fn test_len() {
assert_eq!("".len(), 0);
Expand Down Expand Up @@ -498,7 +499,7 @@ fn test_is_utf8() {

#[test]
fn test_is_utf16() {
use unicode::str::is_utf16;
use rustc_unicode::str::is_utf16;

macro_rules! pos {
($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
Expand Down Expand Up @@ -944,6 +945,7 @@ fn test_words() {
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}

#[allow(deprecated)]
#[test]
fn test_nfd_chars() {
macro_rules! t {
Expand All @@ -963,6 +965,7 @@ fn test_nfd_chars() {
t!("\u{ac1c}", "\u{1100}\u{1162}");
}

#[allow(deprecated)]
#[test]
fn test_nfkd_chars() {
macro_rules! t {
Expand All @@ -982,6 +985,7 @@ fn test_nfkd_chars() {
t!("\u{ac1c}", "\u{1100}\u{1162}");
}

#[allow(deprecated)]
#[test]
fn test_nfc_chars() {
macro_rules! t {
Expand All @@ -1002,6 +1006,7 @@ fn test_nfc_chars() {
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
}

#[allow(deprecated)]
#[test]
fn test_nfkc_chars() {
macro_rules! t {
Expand Down Expand Up @@ -1033,6 +1038,7 @@ fn test_lines() {
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
}

#[allow(deprecated)]
#[test]
fn test_graphemes() {
use std::iter::order;
Expand Down Expand Up @@ -1629,7 +1635,7 @@ fn test_rev_split_char_iterator_no_trailing() {

#[test]
fn test_utf16_code_units() {
use unicode::str::Utf16Encoder;
use rustc_unicode::str::Utf16Encoder;
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
[0xE9, 0xD83D, 0xDCA9])
}
Expand Down
2 changes: 1 addition & 1 deletion src/libcollectionstest/string.rs
Expand Up @@ -135,7 +135,7 @@ fn test_from_utf16() {
let s_as_utf16 = s.utf16_units().collect::<Vec<u16>>();
let u_as_string = String::from_utf16(&u).unwrap();

assert!(::unicode::str::is_utf16(&u));
assert!(::rustc_unicode::str::is_utf16(&u));
assert_eq!(s_as_utf16, u);

assert_eq!(u_as_string, s);
Expand Down
2 changes: 1 addition & 1 deletion src/libcore/char.rs
Expand Up @@ -10,7 +10,7 @@

//! Character manipulation.
//!
//! For more details, see ::unicode::char (a.k.a. std::char)
//! For more details, see ::rustc_unicode::char (a.k.a. std::char)

#![allow(non_snake_case)]
#![doc(primitive = "char")]
Expand Down
1 change: 1 addition & 0 deletions src/libcoretest/char.rs
Expand Up @@ -210,6 +210,7 @@ fn test_len_utf16() {
assert!('\u{1f4a9}'.len_utf16() == 2);
}

#[allow(deprecated)]
#[test]
fn test_width() {
assert_eq!('\x00'.width(false),Some(0));
Expand Down
2 changes: 1 addition & 1 deletion src/libcoretest/lib.rs
Expand Up @@ -32,7 +32,7 @@
extern crate core;
extern crate test;
extern crate libc;
extern crate unicode;
extern crate rustc_unicode;

mod any;
mod atomic;
Expand Down

0 comments on commit 29d1252

Please sign in to comment.