diff --git a/src/sage/cpython/string.pxd b/src/sage/cpython/string.pxd index 0ac92862092..3e4ce2a9e7e 100644 --- a/src/sage/cpython/string.pxd +++ b/src/sage/cpython/string.pxd @@ -10,8 +10,6 @@ from __future__ import absolute_import -from cpython.version cimport PY_MAJOR_VERSION - cdef extern from "string_impl.h": str _cstr_to_str(const char* c, encoding, errors) @@ -32,9 +30,8 @@ cpdef inline str bytes_to_str(b, encoding=None, errors=None): r""" Convert ``bytes`` to ``str``. - On Python 2 this is a no-op since ``bytes is str``. On Python 3 - this decodes the given ``bytes`` to a Python 3 unicode ``str`` using - the specified encoding. + This decodes the given ``bytes`` to a Python 3 unicode ``str`` using + the specified encoding. It is a no-op on ``str`` input. EXAMPLES:: @@ -52,22 +49,15 @@ cpdef inline str bytes_to_str(b, encoding=None, errors=None): if type(b) is not bytes: raise TypeError(f"expected bytes, {type(b).__name__} found") - if PY_MAJOR_VERSION <= 2: - return b - else: - return _cstr_to_str(b, encoding, errors) + return _cstr_to_str(b, encoding, errors) cpdef inline bytes str_to_bytes(s, encoding=None, errors=None): r""" Convert ``str`` or ``unicode`` to ``bytes``. - On Python 3 this encodes the given ``str`` to a Python 3 ``bytes`` - using the specified encoding. - - On Python 2 this is a no-op on ``str`` input since ``str is bytes``. - However, this function also accepts Python 2 ``unicode`` objects and - treats them the same as Python 3 unicode ``str`` objects. + It encodes the given ``str`` to a Python 3 ``bytes`` + using the specified encoding. It is a no-op on ``bytes`` input. EXAMPLES:: diff --git a/src/sage/cpython/string_impl.h b/src/sage/cpython/string_impl.h index 6e3a48dca37..c0f78966908 100644 --- a/src/sage/cpython/string_impl.h +++ b/src/sage/cpython/string_impl.h @@ -15,9 +15,6 @@ static inline PyObject* _cstr_to_str(const char* c, PyObject* encoding, PyObject* errors) { -#if PY_MAJOR_VERSION <= 2 - return PyBytes_FromString(c); -#else const char* err = NULL; // Default: strict const char* enc = NULL; // Default: utf-8 @@ -32,27 +29,14 @@ static inline PyObject* _cstr_to_str(const char* c, PyObject* encoding, PyObject } return PyUnicode_Decode(c, strlen(c), enc, err); -#endif } static inline PyObject* _str_to_bytes(PyObject* s, PyObject* encoding, PyObject* errors) { -#if PY_MAJOR_VERSION <= 2 - /* On Python 2, we accept bytes == str as input */ - if (PyBytes_CheckExact(s)) { - Py_INCREF(s); - return s; - } -#endif - if (!PyUnicode_Check(s)) { PyErr_Format(PyExc_TypeError, -#if PY_MAJOR_VERSION >= 3 "expected str, %s found", -#else - "expected str or unicode, %s found", -#endif Py_TYPE(s)->tp_name); return NULL; } @@ -60,20 +44,6 @@ static inline PyObject* _str_to_bytes(PyObject* s, PyObject* encoding, PyObject* const char* err = NULL; // Default: strict const char* enc = NULL; // Default: utf-8 -#if PY_MAJOR_VERSION <= 2 - if (errors != Py_None) { - err = PyString_AsString(errors); - if (!err) return NULL; - } - - if (encoding != Py_None) { - enc = PyString_AsString(encoding); - if (!enc) return NULL; - } - else { - enc = "utf-8"; - } -#else if (errors != Py_None) { err = PyUnicode_AsUTF8(errors); if (!err) return NULL; @@ -83,7 +53,6 @@ static inline PyObject* _str_to_bytes(PyObject* s, PyObject* encoding, PyObject* enc = PyUnicode_AsUTF8(encoding); if (!enc) return NULL; } -#endif return PyUnicode_AsEncodedString(s, enc, err); } diff --git a/src/sage/libs/ecl.pxd b/src/sage/libs/ecl.pxd index f0b30de5bc2..f92c561d5c9 100644 --- a/src/sage/libs/ecl.pxd +++ b/src/sage/libs/ecl.pxd @@ -135,6 +135,7 @@ cdef extern from "ecl/ecl.h": cl_object cl_write_to_string(cl_narg narg, cl_object o) cl_object ecl_cstring_to_base_string_or_nil(char *s) cl_object si_coerce_to_base_string(cl_object x) + cl_object si_base_string_p(cl_object x) # S-expr evaluation and function calls diff --git a/src/sage/libs/ecl.pyx b/src/sage/libs/ecl.pyx index 978e19b4604..5727946d379 100644 --- a/src/sage/libs/ecl.pyx +++ b/src/sage/libs/ecl.pyx @@ -42,6 +42,9 @@ cdef bint bint_integerp(cl_object obj): cdef bint bint_rationalp(cl_object obj): return not(cl_rationalp(obj) == Cnil) +cdef bint bint_base_string_p(cl_object obj): + return not(si_base_string_p(obj) == Cnil) + cdef extern from "eclsig.h": int ecl_sig_on() except 0 void ecl_sig_off() @@ -101,6 +104,8 @@ cdef void remove_node(cl_object node): cdef cl_object list_of_objects cdef cl_object read_from_string_clobj #our own error catching reader +cdef cl_object make_unicode_string_clobj +cdef cl_object unicode_string_codepoints_clobj cdef bint ecl_has_booted = 0 @@ -230,6 +235,8 @@ def init_ecl(): """ global list_of_objects global read_from_string_clobj + global make_unicode_string_clobj + global unicode_string_codepoints_clobj global conditions_to_handle_clobj global ecl_has_booted cdef char *argv[1] @@ -284,8 +291,27 @@ def init_ecl(): conditions_to_handle_clobj=ecl_list1(ecl_make_symbol(b"SERIOUS-CONDITION", b"COMMON-LISP")) insert_node_after(list_of_objects,conditions_to_handle_clobj) + cl_eval(string_to_object(b""" + (defun sage-make-unicode-string (codepoints) + (map 'string #'code-char codepoints)) + """)) + make_unicode_string_clobj = cl_eval(string_to_object(b"#'sage-make-unicode-string")) + + cl_eval(string_to_object(b""" + (defun sage-unicode-string-codepoints (s) + (map 'list #'char-code s)) + """)) + unicode_string_codepoints_clobj = cl_eval(string_to_object(b"#'sage-unicode-string-codepoints")) + ecl_has_booted = 1 +cdef ecl_string_to_python(cl_object s): + if bint_base_string_p(s): + return char_to_str(ecl_base_string_pointer_safe(s)) + else: + s = cl_funcall(2, unicode_string_codepoints_clobj, s) + return ''.join(chr(code) for code in ecl_to_python(s)) + cdef cl_object ecl_safe_eval(cl_object form) except NULL: """ TESTS: @@ -310,9 +336,8 @@ cdef cl_object ecl_safe_eval(cl_object form) except NULL: ecl_sig_off() if error != NULL: - error = si_coerce_to_base_string(error) raise RuntimeError("ECL says: {}".format( - char_to_str(ecl_base_string_pointer_safe(error)))) + ecl_string_to_python(error))) else: return ret @@ -324,9 +349,8 @@ cdef cl_object ecl_safe_funcall(cl_object func, cl_object arg) except NULL: ecl_sig_off() if error != NULL: - error = si_coerce_to_base_string(error) raise RuntimeError("ECL says: {}".format( - char_to_str(ecl_base_string_pointer_safe(error)))) + ecl_string_to_python(error))) else: return ret @@ -338,9 +362,8 @@ cdef cl_object ecl_safe_apply(cl_object func, cl_object args) except NULL: ecl_sig_off() if error != NULL: - error = si_coerce_to_base_string(error) raise RuntimeError("ECL says: {}".format( - char_to_str(ecl_base_string_pointer_safe(error)))) + ecl_string_to_python(error))) else: return ret @@ -393,8 +416,10 @@ def print_objects(): cdef cl_object c, s c = list_of_objects while True: - s = si_coerce_to_base_string(cl_write_to_string(1,cl_car(c))) - print(char_to_str(ecl_base_string_pointer_safe(s))) + + s = cl_write_to_string(1, cl_car(c)) + print(ecl_string_to_python(s)) + c = cl_cadr(c) if c == Cnil: break @@ -407,7 +432,7 @@ cdef cl_object python_to_ecl(pyobj) except NULL: # strings ->parsed by lisp reader cdef bytes s - cdef cl_object L, ptr + cdef cl_object L, ptr, o if isinstance(pyobj,bool): if pyobj: @@ -426,8 +451,14 @@ cdef cl_object python_to_ecl(pyobj) except NULL: elif isinstance(pyobj,float): return ecl_make_doublefloat(pyobj) elif isinstance(pyobj,unicode): - s=str_to_bytes(pyobj) - return ecl_safe_read_string(s) + try: + s = str_to_bytes(pyobj, 'ascii') + except UnicodeEncodeError: + o = cl_funcall(2, make_unicode_string_clobj, + python_to_ecl([ord(c) for c in pyobj])) + else: + o = ecl_cstring_to_base_string_or_nil(s) + return ecl_safe_funcall(read_from_string_clobj, o) elif isinstance(pyobj,bytes): s=pyobj return ecl_safe_read_string(s) @@ -504,8 +535,8 @@ cdef ecl_to_python(cl_object o): return tuple(L) return L else: - s = si_coerce_to_base_string(cl_write_to_string(1,o)) - return char_to_str(ecl_base_string_pointer_safe(s)) + s = cl_write_to_string(1, o) + return ecl_string_to_python(s) #Maxima's BFLOAT multiprecision float type can be read with: #def bfloat_to_python(e): @@ -605,6 +636,19 @@ cdef class EclObject: True sage: EclObject(-i).python() == -i True + + We check that symbols with Unicode names are converted correctly:: + + sage: EclObject('λ') + + sage: EclObject('|λ|') + + + We check that Unicode strings are converted correctly:: + + sage: EclObject('"Mαξιμα"') + + """ cdef cl_object obj #the wrapped object cdef cl_object node #linked list pointer: car(node) == obj @@ -721,8 +765,8 @@ cdef class EclObject: """ cdef cl_object s - s = si_coerce_to_base_string(cl_write_to_string(1,self.obj)) - return char_to_str(ecl_base_string_pointer_safe(s)) + s = cl_write_to_string(1, self.obj) + return ecl_string_to_python(s) def __hash__(self): r""" @@ -1288,7 +1332,7 @@ cdef EclObject ecl_wrap(cl_object o): #convenience routine to more easily evaluate strings cpdef EclObject ecl_eval(str s): - """ + r""" Read and evaluate string in Lisp and return the result EXAMPLES:: @@ -1299,10 +1343,18 @@ cpdef EclObject ecl_eval(str s): sage: ecl_eval("(mapcar 'fibo '(1 2 3 4 5 6 7))") + TESTS: + + We check that Unicode is handled correctly:: + + sage: ecl_eval('''(defun double-struck-number (n) (map 'string #'(lambda (c) (code-char (+ (char-code #\𝟘) (- (char-code c) (char-code #\\0))))) (format nil "~A" n)))''') + + sage: _(4711) + + """ cdef cl_object o - o=ecl_safe_read_string(str_to_bytes(s)) - o=ecl_safe_eval(o) + o=ecl_safe_eval(python_to_ecl(s)) return ecl_wrap(o) init_ecl()