From 9654142aaeb57e1ee010f48db49fc76cbb0796f5 Mon Sep 17 00:00:00 2001 From: Shanavas M Date: Sun, 9 Jul 2017 14:55:28 +0300 Subject: [PATCH 1/5] Port CHECK_TYPE and CHECK_CHARACTER macros to rust --- rust_src/remacs-sys/lib.rs | 1 + rust_src/src/lisp.rs | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/rust_src/remacs-sys/lib.rs b/rust_src/remacs-sys/lib.rs index 9478c8e8d54..d5883d5a473 100644 --- a/rust_src/remacs-sys/lib.rs +++ b/rust_src/remacs-sys/lib.rs @@ -702,6 +702,7 @@ extern "C" { pub static Qwholenump: Lisp_Object; pub static Qvectorp: Lisp_Object; pub static Qsequencep: Lisp_Object; + pub static Qcharacterp: Lisp_Object; pub static Qinteger: Lisp_Object; pub static Qsymbol: Lisp_Object; diff --git a/rust_src/src/lisp.rs b/rust_src/src/lisp.rs index f8c5e16bc96..8bc752e3a12 100644 --- a/rust_src/src/lisp.rs +++ b/rust_src/src/lisp.rs @@ -22,7 +22,7 @@ use remacs_sys::{EmacsInt, EmacsUint, EmacsDouble, EMACS_INT_MAX, EMACS_INT_SIZE EMACS_FLOAT_SIZE, USE_LSB_TAG, GCTYPEBITS, wrong_type_argument, Qstringp, Qsymbolp, Qnumber_or_marker_p, Qt, make_float, Qlistp, Qintegerp, Qconsp, circular_list, internal_equal, Fcons, CHECK_IMPURE, Qnumberp, Qfloatp, - Qwholenump, Qvectorp, SYMBOL_NAME, PseudovecType, lispsym}; + Qwholenump, Qvectorp, Qcharacterp, SYMBOL_NAME, PseudovecType, lispsym}; use remacs_sys::Lisp_Object as CLisp_Object; // TODO: tweak Makefile to rebuild C files if this changes. @@ -148,6 +148,18 @@ impl LispObject { pub fn get_untaggedptr(self) -> *mut c_void { (self.to_raw() & VALMASK) as intptr_t as *mut c_void } + + // Same as CHECK_TYPE macro, + // order of arguments changed + #[inline] + #[allow(dead_code)] + fn check_type_or_error(self, ok: bool, predicate: CLisp_Object) -> () { + if !ok { + unsafe { + wrong_type_argument(predicate, self.to_raw()); + } + } + } } // Symbol support (LispType == Lisp_Symbol == 0) @@ -881,6 +893,15 @@ impl LispObject { ) } + /// Check if Lisp object is a character or not. + /// Similar to CHECK_CHARACTER + #[inline] + pub fn is_character_or_error(self) -> () { + unsafe { + self.check_type_or_error(self.is_character(), Qcharacterp); + } + } + #[inline] pub fn is_overlay(self) -> bool { self.as_misc().map_or( From 8ab69652545a01dabaedb37077cc0fcbcd52f121 Mon Sep 17 00:00:00 2001 From: Shanavas M Date: Sun, 9 Jul 2017 14:57:34 +0300 Subject: [PATCH 2/5] Port UNIBYTE_TO_CHAR and MAKE_CHAR_MULTIBYTE macros --- rust_src/src/multibyte.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/rust_src/src/multibyte.rs b/rust_src/src/multibyte.rs index 5f87789a128..e22b59b6844 100644 --- a/rust_src/src/multibyte.rs +++ b/rust_src/src/multibyte.rs @@ -35,6 +35,7 @@ use std::ptr; use std::slice; use libc::{ptrdiff_t, c_char, c_uchar, c_uint, c_int}; +use std::ascii::AsciiExt; use lisp::ExternalPtr; use remacs_sys::{CHAR_MODIFIER_MASK, CHAR_SHIFT, CHAR_CTL, emacs_abort, CHARACTERBITS, EmacsInt, @@ -191,6 +192,24 @@ fn raw_byte_from_codepoint(cp: Codepoint) -> c_uchar { (cp - 0x3F_FF00) as c_uchar } +/// UNIBYTE_TO_CHAR macro +#[inline] +pub fn unibyte_to_char(cp: Codepoint) -> Codepoint { + if (cp as u8).is_ascii() { + cp + } else { + raw_byte_codepoint(cp as c_uchar) + } +} + +/// MAKE_CHAR_MULTIBYTE macro +#[inline] +#[allow(unused_comparisons)] +pub fn make_char_multibyte(cp: Codepoint) -> Codepoint { + debug_assert!((cp) >= 0 && (cp) < 256); + unibyte_to_char(cp) +} + /// Same as the CHAR_STRING macro. #[inline] fn write_codepoint(to: &mut [c_uchar], cp: Codepoint) -> usize { From fe3f16f78cb213c5bb872bf1a90483e289bc9e75 Mon Sep 17 00:00:00 2001 From: Shanavas M Date: Sun, 9 Jul 2017 14:58:25 +0300 Subject: [PATCH 3/5] Port unibyte-char-to-multibyte function --- rust_src/src/character.rs | 18 ++++++++++++++++-- rust_src/src/lib.rs | 1 + src/character.c | 17 ----------------- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/rust_src/src/character.rs b/rust_src/src/character.rs index cd3402fa4aa..9771d7eec08 100644 --- a/rust_src/src/character.rs +++ b/rust_src/src/character.rs @@ -1,9 +1,9 @@ //! Operations on characters. use lisp::LispObject; -use multibyte::MAX_CHAR; +use multibyte::{MAX_CHAR, make_char_multibyte}; use remacs_macros::lisp_fn; -use remacs_sys::EmacsInt; +use remacs_sys::{EmacsInt, error}; /// Return the character of the maximum code. #[lisp_fn] @@ -26,3 +26,17 @@ fn characterp(object: LispObject, _ignore: LispObject) -> LispObject { fn char_or_string_p(object: LispObject) -> LispObject { LispObject::from_bool(object.is_character() || object.is_string()) } + +/// Convert the byte CH to multibyte character. +#[lisp_fn] +fn unibyte_char_to_multibyte(ch: LispObject) -> LispObject { + ch.is_character_or_error(); + let mut c = ch.as_fixnum().unwrap() as u32; + if c >= 0x100 { + unsafe { + error("Not a unibyte character: %d".as_ptr(), c); + } + } + c = make_char_multibyte(c); + LispObject::from_fixnum(c as EmacsInt) +} diff --git a/rust_src/src/lib.rs b/rust_src/src/lib.rs index abb88487f1e..7b5dc3f3f4c 100755 --- a/rust_src/src/lib.rs +++ b/rust_src/src/lib.rs @@ -193,6 +193,7 @@ pub extern "C" fn rust_init_syms() { defsubr(&*character::Smax_char); defsubr(&*character::Scharacterp); defsubr(&*character::Schar_or_string_p); + defsubr(&*character::Sunibyte_char_to_multibyte); defsubr(&*vectors::Sarrayp); defsubr(&*vectors::Sbool_vector_p); defsubr(&*vectors::Sbufferp); diff --git a/src/character.c b/src/character.c index 0cb0726a0c5..b4627c6ac09 100644 --- a/src/character.c +++ b/src/character.c @@ -67,22 +67,6 @@ translate_char (Lisp_Object table, int c) return c; } - -DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte, - Sunibyte_char_to_multibyte, 1, 1, 0, - doc: /* Convert the byte CH to multibyte character. */) - (Lisp_Object ch) -{ - int c; - - CHECK_CHARACTER (ch); - c = XFASTINT (ch); - if (c >= 0x100) - error ("Not a unibyte character: %d", c); - MAKE_CHAR_MULTIBYTE (c); - return make_number (c); -} - DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte, Smultibyte_char_to_unibyte, 1, 1, 0, doc: /* Convert the multibyte character CH to a byte. @@ -631,7 +615,6 @@ syms_of_character (void) staticpro (&Vchar_unify_table); Vchar_unify_table = Qnil; - defsubr (&Sunibyte_char_to_multibyte); defsubr (&Smultibyte_char_to_unibyte); defsubr (&Schar_width); defsubr (&Sstring_width); From 8d85e9c77000df7088fb9aae8a05cb7b56a9c27f Mon Sep 17 00:00:00 2001 From: Shanavas M Date: Mon, 10 Jul 2017 09:08:17 +0300 Subject: [PATCH 4/5] Null terminate string for c compatibility --- rust_src/src/character.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust_src/src/character.rs b/rust_src/src/character.rs index 9771d7eec08..e0aef354c9d 100644 --- a/rust_src/src/character.rs +++ b/rust_src/src/character.rs @@ -34,7 +34,7 @@ fn unibyte_char_to_multibyte(ch: LispObject) -> LispObject { let mut c = ch.as_fixnum().unwrap() as u32; if c >= 0x100 { unsafe { - error("Not a unibyte character: %d".as_ptr(), c); + error("Not a unibyte character: %d\0".as_ptr(), c); } } c = make_char_multibyte(c); From 2a53970f5d3d41ae63b2aa22b43577f704cc7e5d Mon Sep 17 00:00:00 2001 From: Shanavas M Date: Mon, 10 Jul 2017 11:05:54 +0300 Subject: [PATCH 5/5] Refactor --- rust_src/src/character.rs | 6 ++---- rust_src/src/lisp.rs | 8 ++++---- rust_src/src/multibyte.rs | 6 ++---- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/rust_src/src/character.rs b/rust_src/src/character.rs index e0aef354c9d..fab4a3fa805 100644 --- a/rust_src/src/character.rs +++ b/rust_src/src/character.rs @@ -30,13 +30,11 @@ fn char_or_string_p(object: LispObject) -> LispObject { /// Convert the byte CH to multibyte character. #[lisp_fn] fn unibyte_char_to_multibyte(ch: LispObject) -> LispObject { - ch.is_character_or_error(); - let mut c = ch.as_fixnum().unwrap() as u32; + let c = ch.as_character_or_error(); if c >= 0x100 { unsafe { error("Not a unibyte character: %d\0".as_ptr(), c); } } - c = make_char_multibyte(c); - LispObject::from_fixnum(c as EmacsInt) + LispObject::from_fixnum(make_char_multibyte(c) as EmacsInt) } diff --git a/rust_src/src/lisp.rs b/rust_src/src/lisp.rs index 8bc752e3a12..22a24eed72b 100644 --- a/rust_src/src/lisp.rs +++ b/rust_src/src/lisp.rs @@ -13,7 +13,7 @@ use std::fmt::{Debug, Formatter, Error}; use libc::{c_void, intptr_t}; use marker::{LispMarker, marker_position}; -use multibyte::{LispStringRef, MAX_CHAR}; +use multibyte::{Codepoint, LispStringRef, MAX_CHAR}; use symbols::LispSymbolRef; use vectors::LispVectorlikeRef; use buffers::LispBufferRef; @@ -152,7 +152,6 @@ impl LispObject { // Same as CHECK_TYPE macro, // order of arguments changed #[inline] - #[allow(dead_code)] fn check_type_or_error(self, ok: bool, predicate: CLisp_Object) -> () { if !ok { unsafe { @@ -893,13 +892,14 @@ impl LispObject { ) } - /// Check if Lisp object is a character or not. + /// Check if Lisp object is a character or not and return the codepoint /// Similar to CHECK_CHARACTER #[inline] - pub fn is_character_or_error(self) -> () { + pub fn as_character_or_error(self) -> Codepoint { unsafe { self.check_type_or_error(self.is_character(), Qcharacterp); } + self.as_fixnum().unwrap() as Codepoint } #[inline] diff --git a/rust_src/src/multibyte.rs b/rust_src/src/multibyte.rs index e22b59b6844..bcf9f9f9de2 100644 --- a/rust_src/src/multibyte.rs +++ b/rust_src/src/multibyte.rs @@ -35,7 +35,6 @@ use std::ptr; use std::slice; use libc::{ptrdiff_t, c_char, c_uchar, c_uint, c_int}; -use std::ascii::AsciiExt; use lisp::ExternalPtr; use remacs_sys::{CHAR_MODIFIER_MASK, CHAR_SHIFT, CHAR_CTL, emacs_abort, CHARACTERBITS, EmacsInt, @@ -195,7 +194,7 @@ fn raw_byte_from_codepoint(cp: Codepoint) -> c_uchar { /// UNIBYTE_TO_CHAR macro #[inline] pub fn unibyte_to_char(cp: Codepoint) -> Codepoint { - if (cp as u8).is_ascii() { + if cp < 0x80 { cp } else { raw_byte_codepoint(cp as c_uchar) @@ -204,9 +203,8 @@ pub fn unibyte_to_char(cp: Codepoint) -> Codepoint { /// MAKE_CHAR_MULTIBYTE macro #[inline] -#[allow(unused_comparisons)] pub fn make_char_multibyte(cp: Codepoint) -> Codepoint { - debug_assert!((cp) >= 0 && (cp) < 256); + debug_assert!(cp < 256); unibyte_to_char(cp) }