diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index c13ea221..34290073 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -13,6 +13,7 @@ extern "C" { #include "numpy/ndarraytypes.h" #include "numpy/dtype_api.h" } +#include #include "sleef.h" #include "sleefquad.h" @@ -20,8 +21,13 @@ extern "C" { #include "scalar.h" #include "casts.h" #include "dtype.h" +#include "utilities.h" +#include "lock.h" +#include "dragon4.h" +#include "ops.hpp" -#define NUM_CASTS 34 // 16 to_casts + 16 from_casts + 1 quad_to_quad + 1 void_to_quad +#define NUM_CASTS 36 // 17 to_casts + 17 from_casts + 1 quad_to_quad + 1 void_to_quad +#define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer static NPY_CASTING quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), @@ -151,38 +157,391 @@ quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const da return 0; } - static NPY_CASTING void_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) { - PyErr_SetString(PyExc_TypeError, - "Void to QuadPrecision cast is not implemented"); + PyErr_SetString(PyExc_TypeError, "Void to QuadPrecision cast is not implemented"); return (NPY_CASTING)-1; } static int void_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { PyErr_SetString(PyExc_RuntimeError, "void_to_quad_strided_loop should not be called"); return -1; } +// Unicode/String to QuadDType casting +static NPY_CASTING +unicode_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + if (!PyArray_ISNBO(given_descrs[0]->byteorder)) { + loop_descrs[0] = PyArray_DescrNewByteorder(given_descrs[0], NPY_NATIVE); + if (loop_descrs[0] == nullptr) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + } + + if (given_descrs[1] == NULL) { + loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); + if (loop_descrs[1] == nullptr) { + Py_DECREF(loop_descrs[0]); + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + return NPY_UNSAFE_CASTING; +} + +// Helper function: Convert UCS4 string to quad_value +static inline int +unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars, + QuadBackendType backend, quad_value *out_val) +{ + PyObject *unicode_obj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, ucs4_str, unicode_size_chars); + if (unicode_obj == NULL) { + return -1; + } + + const char *utf8_str = PyUnicode_AsUTF8(unicode_obj); + if (utf8_str == NULL) { + Py_DECREF(unicode_obj); + return -1; + } + + char *endptr; + int err = NumPyOS_ascii_strtoq(utf8_str, backend, out_val, &endptr); + + if (err < 0) { + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", utf8_str); + Py_DECREF(unicode_obj); + return -1; + } + + // Check that we parsed the entire string (skip trailing whitespace) + while (ascii_isspace(*endptr)) { + endptr++; + } + + if (*endptr != '\0') { + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", utf8_str); + Py_DECREF(unicode_obj); + return -1; + } + + Py_DECREF(unicode_obj); + return 0; +} + +static int +unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1]; + QuadBackendType backend = descr_out->backend; + + // Unicode strings are stored as UCS4 (4 bytes per character) + npy_intp unicode_size_chars = descrs[0]->elsize / 4; + + while (N--) { + Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; + quad_value out_val; + + if (unicode_to_quad_convert(ucs4_str, unicode_size_chars, backend, &out_val) < 0) { + return -1; + } + + if (backend == BACKEND_SLEEF) { + memcpy(out_ptr, &out_val.sleef_value, sizeof(Sleef_quad)); + } + else { + memcpy(out_ptr, &out_val.longdouble_value, sizeof(long double)); + } + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} + +static int +unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1]; + QuadBackendType backend = descr_out->backend; + + // Unicode strings are stored as UCS4 (4 bytes per character) + npy_intp unicode_size_chars = descrs[0]->elsize / 4; + + while (N--) { + Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; + quad_value out_val; + + if (unicode_to_quad_convert(ucs4_str, unicode_size_chars, backend, &out_val) < 0) { + return -1; + } + + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = out_val.sleef_value; + } + else { + *(long double *)out_ptr = out_val.longdouble_value; + } + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} + +// QuadDType to unicode/string +static NPY_CASTING +quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + npy_intp required_size_chars = QUAD_STR_WIDTH; + npy_intp required_size_bytes = required_size_chars * 4; // UCS4 = 4 bytes per char + + if (given_descrs[1] == NULL) { + // Create descriptor with required size + PyArray_Descr *unicode_descr = PyArray_DescrNewFromType(NPY_UNICODE); + if (unicode_descr == nullptr) { + return (NPY_CASTING)-1; + } + + unicode_descr->elsize = required_size_bytes; + loop_descrs[1] = unicode_descr; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + // Set the input descriptor + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + *view_offset = 0; + + // If target descriptor is wide enough, it's a safe cast + if (loop_descrs[1]->elsize >= required_size_bytes) { + return NPY_SAFE_CASTING; + } + return NPY_SAME_KIND_CASTING; +} + +// Helper function: Convert quad to string with adaptive notation +static inline PyObject * +quad_to_string_adaptive(Sleef_quad *sleef_val, npy_intp unicode_size_chars) +{ + // Try positional format first to see if it would fit + PyObject *positional_str = Dragon4_Positional_QuadDType( + sleef_val, DigitMode_Unique, CutoffMode_TotalLength, SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 0); + + if (positional_str == NULL) { + return NULL; + } + + const char *pos_str = PyUnicode_AsUTF8(positional_str); + if (pos_str == NULL) { + Py_DECREF(positional_str); + return NULL; + } + + npy_intp pos_len = strlen(pos_str); + + // If positional format fits, use it; otherwise use scientific notation + if (pos_len <= unicode_size_chars) { + return positional_str; // Keep the positional string + } + else { + Py_DECREF(positional_str); + // Use scientific notation with full precision + return Dragon4_Scientific_QuadDType(sleef_val, DigitMode_Unique, + SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 2); + } +} + +// Helper function: Copy string to UCS4 output buffer +static inline void +copy_string_to_ucs4(const char *str, Py_UCS4 *out_ucs4, npy_intp unicode_size_chars) +{ + npy_intp str_len = strlen(str); + + for (npy_intp i = 0; i < unicode_size_chars; i++) { + if (i < str_len) { + out_ucs4[i] = (Py_UCS4)str[i]; + } + else { + out_ucs4[i] = 0; + } + } +} + +static int +quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0]; + QuadBackendType backend = descr_in->backend; + + npy_intp unicode_size_chars = descrs[1]->elsize / 4; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) { + quad_value in_val; + if (backend == BACKEND_SLEEF) { + memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad)); + } + else { + memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double)); + } + + // Convert to Sleef_quad for Dragon4 + Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); + + // Get string representation with adaptive notation + PyObject *py_str = quad_to_string_adaptive(&sleef_val, unicode_size_chars); + if (py_str == NULL) { + return -1; + } + + const char *temp_str = PyUnicode_AsUTF8(py_str); + if (temp_str == NULL) { + Py_DECREF(py_str); + return -1; + } + + // Convert char string to UCS4 and store in output + Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr; + copy_string_to_ucs4(temp_str, out_ucs4, unicode_size_chars); + + Py_DECREF(py_str); + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} + +static int +quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0]; + QuadBackendType backend = descr_in->backend; + + npy_intp unicode_size_chars = descrs[1]->elsize / 4; + + while (N--) { + quad_value in_val; + if (backend == BACKEND_SLEEF) { + in_val.sleef_value = *(Sleef_quad *)in_ptr; + } + else { + in_val.longdouble_value = *(long double *)in_ptr; + } + + // Convert to Sleef_quad for Dragon4 + Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); + + // Get string representation with adaptive notation + PyObject *py_str = quad_to_string_adaptive(&sleef_val, unicode_size_chars); + if (py_str == NULL) { + return -1; + } + + const char *temp_str = PyUnicode_AsUTF8(py_str); + if (temp_str == NULL) { + Py_DECREF(py_str); + return -1; + } + + // Convert char string to UCS4 and store in output + Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr; + copy_string_to_ucs4(temp_str, out_ucs4, unicode_size_chars); + + Py_DECREF(py_str); + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} // Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates // see e.g. https://stackoverflow.com/q/32522279 struct spec_npy_bool {}; struct spec_npy_half {}; -template -struct NpyType { typedef T TYPE; }; -template<> -struct NpyType{ typedef npy_bool TYPE; }; -template<> -struct NpyType{ typedef npy_half TYPE; }; +template +struct NpyType { + typedef T TYPE; +}; +template <> +struct NpyType { + typedef npy_bool TYPE; +}; +template <> +struct NpyType { + typedef npy_half TYPE; +}; // Casting from other types to QuadDType @@ -829,21 +1188,22 @@ init_casts_internal(void) add_spec(quad2quad_spec); - PyArray_DTypeMeta **void_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_VoidDType, &QuadPrecDType}; + PyArray_DTypeMeta **void_dtypes = + new PyArray_DTypeMeta *[2]{&PyArray_VoidDType, &QuadPrecDType}; PyType_Slot *void_slots = new PyType_Slot[4]{ - {NPY_METH_resolve_descriptors, (void *)&void_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&void_to_quad_strided_loop}, - {NPY_METH_unaligned_strided_loop, (void *)&void_to_quad_strided_loop}, - {0, nullptr}}; + {NPY_METH_resolve_descriptors, (void *)&void_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&void_to_quad_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&void_to_quad_strided_loop}, + {0, nullptr}}; PyArrayMethod_Spec *void_spec = new PyArrayMethod_Spec{ - .name = "cast_Void_to_QuadPrec_ERROR", - .nin = 1, - .nout = 1, - .casting = NPY_UNSAFE_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = void_dtypes, - .slots = void_slots, + .name = "cast_Void_to_QuadPrec_ERROR", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = void_dtypes, + .slots = void_slots, }; add_spec(void_spec); @@ -879,6 +1239,44 @@ init_casts_internal(void) add_cast_from(&PyArray_DoubleDType); add_cast_from(&PyArray_LongDoubleDType); + // Unicode/String to QuadPrecision cast + PyArray_DTypeMeta **unicode_to_quad_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_UnicodeDType, &QuadPrecDType}; + PyType_Slot *unicode_to_quad_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&unicode_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop_unaligned}, + {0, nullptr}}; + + PyArrayMethod_Spec *unicode_to_quad_spec = new PyArrayMethod_Spec{ + .name = "cast_Unicode_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = unicode_to_quad_dtypes, + .slots = unicode_to_quad_slots, + }; + add_spec(unicode_to_quad_spec); + + // QuadPrecision to Unicode + PyArray_DTypeMeta **quad_to_unicode_dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, &PyArray_UnicodeDType}; + PyType_Slot *quad_to_unicode_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&quad_to_unicode_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_unicode_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop_unaligned}, + {0, nullptr}}; + + PyArrayMethod_Spec *quad_to_unicode_spec = new PyArrayMethod_Spec{ + .name = "cast_QuadPrec_to_Unicode", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = quad_to_unicode_dtypes, + .slots = quad_to_unicode_slots, + }; + add_spec(quad_to_unicode_spec); + specs[spec_count] = nullptr; return specs; } diff --git a/quaddtype/numpy_quaddtype/src/constants.hpp b/quaddtype/numpy_quaddtype/src/constants.hpp index c2c41260..25bcabe9 100644 --- a/quaddtype/numpy_quaddtype/src/constants.hpp +++ b/quaddtype/numpy_quaddtype/src/constants.hpp @@ -1,6 +1,10 @@ #ifndef QUAD_CONSTANTS_HPP #define QUAD_CONSTANTS_HPP +#ifdef __cplusplus +extern "C" { +#endif + #include #include #include @@ -130,4 +134,8 @@ static inline ConstantResult get_sleef_constant_by_name(const char* constant_nam return result; } +#ifdef __cplusplus +} +#endif + #endif // QUAD_CONSTANTS_HPP \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h index e17ee0dd..566269a7 100644 --- a/quaddtype/numpy_quaddtype/src/quad_common.h +++ b/quaddtype/numpy_quaddtype/src/quad_common.h @@ -19,6 +19,13 @@ typedef union { long double longdouble_value; } quad_value; + +// For IEEE 754 binary128 (quad precision), we need 36 decimal digits +// to guarantee round-trip conversion (string -> parse -> equals original value) +// Formula: ceil(1 + MANT_DIG * log10(2)) = ceil(1 + 113 * 0.30103) = 36 +// src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format +#define SLEEF_QUAD_DECIMAL_DIG 36 + #ifdef __cplusplus } #endif diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 5b51d3bc..4adfb434 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -18,12 +18,6 @@ #include "lock.h" #include "utilities.h" -// For IEEE 754 binary128 (quad precision), we need 36 decimal digits -// to guarantee round-trip conversion (string -> parse -> equals original value) -// Formula: ceil(1 + MANT_DIG * log10(2)) = ceil(1 + 113 * 0.30103) = 36 -// src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format -#define SLEEF_QUAD_DECIMAL_DIG 36 - QuadPrecisionObject * QuadPrecision_raw_new(QuadBackendType backend) diff --git a/quaddtype/numpy_quaddtype/src/utilities.c b/quaddtype/numpy_quaddtype/src/utilities.c index aa00d30b..6690605c 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.c +++ b/quaddtype/numpy_quaddtype/src/utilities.c @@ -1,5 +1,157 @@ -#include "utilities.h" #include +#include +#include +#include +#include "utilities.h" +#include "constants.hpp" + +int +ascii_isspace(int c) +{ + return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; +} + +int +ascii_isalpha(char c) +{ + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +int +ascii_isdigit(char c) +{ + return (c >= '0' && c <= '9'); +} + +int +ascii_isalnum(char c) +{ + return ascii_isdigit(c) || ascii_isalpha(c); +} + +int +ascii_tolower(int c) +{ + if (c >= 'A' && c <= 'Z') { + return c + ('a' - 'A'); + } + return c; +} + +// inspired from NumPyOS_ascii_strncasecmp +int +ascii_strncasecmp(const char *s1, const char *s2, size_t n) +{ + while (n > 0 && *s1 != '\0' && *s2 != '\0') { + int c1 = ascii_tolower((unsigned char)*s1); + int c2 = ascii_tolower((unsigned char)*s2); + int diff = c1 - c2; + + if (diff != 0) { + return diff; + } + + s1++; + s2++; + n--; + } + + if(n > 0) { + return *s1 - *s2; + } + return 0; +} + +/* + * NumPyOS_ascii_strtoq: + * + * Locale-independent string to quad-precision parser. + * Inspired by NumPyOS_ascii_strtold from NumPy. + * + * This function: + * - Skips leading whitespace + * - Recognizes inf/nan case-insensitively with optional signs and payloads + * - Delegates to cstring_to_quad for numeric parsing + * + * Returns: + * 0 on success + * -1 on parse error + */ +int +NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr) +{ + const char *p; + int sign; + + // skip leading whitespace + while (ascii_isspace(*s)) { + s++; + } + + p = s; + sign = 1; + if (*p == '-') { + sign = -1; + ++p; + } + else if (*p == '+') { + ++p; + } + + // Check for inf/infinity (case-insensitive) + if (ascii_strncasecmp(p, "inf", 3) == 0) { + p += 3; + if (ascii_strncasecmp(p, "inity", 5) == 0) { + p += 5; + } + + // Set infinity values with sign applied + if (backend == BACKEND_SLEEF) { + out_value->sleef_value = sign > 0 ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF; + } + else { + out_value->longdouble_value = sign > 0 ? strtold("inf", NULL) : strtold("-inf", NULL); + } + + if (endptr) { + *endptr = (char *)p; + } + return 0; + } + + // Check for nan (case-insensitive) with optional payload + if (ascii_strncasecmp(p, "nan", 3) == 0) { + p += 3; + + // Skip optional (payload) + if (*p == '(') { + ++p; + while (ascii_isalnum(*p) || *p == '_') { + ++p; + } + if (*p == ')') { + ++p; + } + } + + // Set NaN value (sign is ignored for NaN) + if (backend == BACKEND_SLEEF) { + out_value->sleef_value = QUAD_PRECISION_NAN; + } + else { + out_value->longdouble_value = nanl(""); + } + + if (endptr) { + *endptr = (char *)p; + } + return 0; + } + + // For numeric values, delegate to cstring_to_quad + // Pass the original string position (after whitespace, includes sign if present) + return cstring_to_quad(s, backend, out_value, endptr, false); +} int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse) @@ -17,4 +169,16 @@ char **endptr, bool require_full_parse) return -1; // parse error - characters remain to be converted return 0; // success +} + +// Helper function: Convert quad_value to Sleef_quad for Dragon4 +Sleef_quad +quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return in_val->sleef_value; + } + else { + return Sleef_cast_from_doubleq1(in_val->longdouble_value); + } } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/utilities.h b/quaddtype/numpy_quaddtype/src/utilities.h index 1925046f..62cb2a30 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.h +++ b/quaddtype/numpy_quaddtype/src/utilities.h @@ -1,11 +1,33 @@ #ifndef QUAD_UTILITIES_H #define QUAD_UTILITIES_H +#ifdef __cplusplus +extern "C" { +#endif + #include "quad_common.h" #include #include #include int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse); +int ascii_isspace(int c); +int ascii_isalpha(char c); +int ascii_isdigit(char c); +int ascii_isalnum(char c); +int ascii_tolower(int c); +int ascii_strncasecmp(const char *s1, const char *s2, size_t n); + +// Locale-independent ASCII string to quad parser (inspired by NumPyOS_ascii_strtold) +int NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr); + + +// Helper function: Convert quad_value to Sleef_quad for Dragon4 +Sleef_quad +quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend); + +#ifdef __cplusplus +} +#endif #endif \ No newline at end of file diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 5a434a2f..460c1970 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -540,7 +540,7 @@ def test_supported_astype(dtype): assert back == orig -@pytest.mark.parametrize("dtype", ["S10", "U10", "T", "V10", "datetime64[ms]", "timedelta64[ms]"]) +@pytest.mark.parametrize("dtype", ["V10", "datetime64[ms]", "timedelta64[ms]"]) def test_unsupported_astype(dtype): if dtype == "V10": with pytest.raises(TypeError, match="cast"): @@ -552,6 +552,365 @@ def test_unsupported_astype(dtype): with pytest.raises(TypeError, match="cast"): np.array(QuadPrecision(1)).astype(dtype, casting="unsafe") +class TestArrayCastStringBytes: + @pytest.mark.parametrize("strtype", [np.str_, str]) + @pytest.mark.parametrize("input_val", [ + "3.141592653589793238462643383279502884197", + "2.71828182845904523536028747135266249775", + "1e100", + "1e-100", + "0.0", + "-0.0", + "inf", + "-inf", + "nan", + "-nan", + ]) + def test_cast_string_to_quad_roundtrip(self, input_val, strtype): + str_array = np.array(input_val, dtype=strtype) + quad_array = str_array.astype(QuadPrecDType()) + expected = np.array(input_val, dtype=QuadPrecDType()) + + if np.isnan(float(expected)): + np.testing.assert_array_equal(np.isnan(quad_array), np.isnan(expected)) + else: + np.testing.assert_array_equal(quad_array, expected) + + quad_to_string_array = quad_array.astype(strtype) + + # Round-trip - String -> Quad -> String -> Quad should preserve value + roundtrip_quad_array = quad_to_string_array.astype(QuadPrecDType()) + + if np.isnan(float(expected)): + np.testing.assert_array_equal(np.isnan(roundtrip_quad_array), np.isnan(quad_array)) + else: + np.testing.assert_array_equal(roundtrip_quad_array, quad_array, + err_msg=f"Round-trip failed for {input_val}") + + # Verify the string representation can be parsed back + # (This ensures the quad->string cast produces valid parseable strings) + scalar_str = str(quad_array[()]) + scalar_from_str = QuadPrecision(scalar_str) + + if np.isnan(float(quad_array[()])): + assert np.isnan(float(scalar_from_str)) + else: + assert scalar_from_str == quad_array[()], \ + f"Scalar round-trip failed: {scalar_str} -> {scalar_from_str} != {quad_array[()]}" + + +class TestStringParsingEdgeCases: + """Test edge cases in NumPyOS_ascii_strtoq string parsing""" + @pytest.mark.parametrize("input_str", ['3.14', '-2.71', '0.0', '1e10', '-1e-10']) + @pytest.mark.parametrize("byte_order", ['<', '>']) + def test_numeric_string_parsing(self, input_str, byte_order): + """Test that numeric strings are parsed correctly regardless of byte order""" + strtype = np.dtype(f'{byte_order}U20') + arr = np.array([input_str], dtype=strtype) + result = arr.astype(QuadPrecDType()) + + expected = np.array(input_str, dtype=np.float64) + + np.testing.assert_allclose(result, expected, + err_msg=f"Failed parsing '{input_str}' with byte order '{byte_order}'") + + + @pytest.mark.parametrize("input_str,expected_sign", [ + ("inf", 1), + ("+inf", 1), + ("-inf", -1), + ("Inf", 1), + ("+Inf", 1), + ("-Inf", -1), + ("INF", 1), + ("+INF", 1), + ("-INF", -1), + ("infinity", 1), + ("+infinity", 1), + ("-infinity", -1), + ("Infinity", 1), + ("+Infinity", 1), + ("-Infinity", -1), + ("INFINITY", 1), + ("+INFINITY", 1), + ("-INFINITY", -1), + ]) + def test_infinity_sign_preservation(self, input_str, expected_sign): + """Test that +/- signs are correctly applied to infinity values""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + assert np.isinf(float(str(result[0]))), f"Expected inf for '{input_str}'" + + actual_sign = 1 if float(str(result[0])) > 0 else -1 + assert actual_sign == expected_sign, \ + f"Sign mismatch for '{input_str}': got {actual_sign}, expected {expected_sign}" + + @pytest.mark.parametrize("input_str", [ + "nan", "+nan", "-nan", # Note: NaN sign is typically ignored + "NaN", "+NaN", "-NaN", + "NAN", "+NAN", "-NAN", + "nan()", "nan(123)", "nan(abc_)", "NAN(XYZ)", + ]) + def test_nan_case_insensitive(self, input_str): + """Test case-insensitive NaN parsing with optional payloads""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + assert np.isnan(float(str(result[0]))), f"Expected NaN for '{input_str}'" + + @pytest.mark.parametrize("input_str,expected_val", [ + ("3.14", 3.14), + ("+3.14", 3.14), + ("-3.14", -3.14), + ("0.0", 0.0), + ("+0.0", 0.0), + ("-0.0", -0.0), + ("1e10", 1e10), + ("+1e10", 1e10), + ("-1e10", -1e10), + ("1.23e-45", 1.23e-45), + ("+1.23e-45", 1.23e-45), + ("-1.23e-45", -1.23e-45), + ]) + def test_numeric_sign_handling(self, input_str, expected_val): + """Test that +/- signs are correctly handled for numeric values""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + result_val = float(str(result[0])) + + # For zero, check sign separately + if expected_val == 0.0: + assert result_val == 0.0 + if input_str.startswith('-'): + assert np.signbit(result_val), f"Expected negative zero for '{input_str}'" + else: + assert not np.signbit(result_val), f"Expected positive zero for '{input_str}'" + else: + np.testing.assert_allclose(result_val, expected_val, rtol=1e-10) + + @pytest.mark.parametrize("input_str", [ + " 3.14 ", + "\t3.14\t", + "\n3.14\n", + "\r3.14\r", + " \t\n\r 3.14 \t\n\r ", + " inf ", + "\t-inf\t", + " nan ", + ]) + def test_whitespace_handling(self, input_str): + """Test that leading/trailing whitespace is handled correctly""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + # Should not raise an error + result_str = str(result[0]) + assert result_str # Should have a value + + @pytest.mark.parametrize("invalid_str", [ + "abc", # Non-numeric + "3.14.15", # Multiple decimals + "1.23e", # Incomplete scientific notation + "e10", # Scientific notation without base + "3.14abc", # Trailing non-numeric + "++3.14", # Double sign + "--3.14", # Double sign + "+-3.14", # Mixed signs + "in", # Incomplete inf + "na", # Incomplete nan + "infinit", # Incomplete infinity + ]) + def test_invalid_strings_raise_error(self, invalid_str): + """Test that invalid strings raise ValueError""" + arr = np.array([invalid_str], dtype='U20') + + with pytest.raises(ValueError): + arr.astype(QuadPrecDType()) + + @pytest.mark.parametrize("input_str", [ + "3.14ñ", # Trailing non-ASCII + "ñ3.14", # Leading non-ASCII + "3.1€4", # Mid non-ASCII + "π", # Greek pi + ]) + def test_non_ascii_raises_error(self, input_str): + """Test that non-ASCII characters raise ValueError""" + arr = np.array([input_str], dtype='U20') + + with pytest.raises(ValueError): + arr.astype(QuadPrecDType()) + + def test_numpy_longdouble_compatibility(self): + """Test that our parsing matches NumPy's longdouble for common cases""" + test_cases = [ + "inf", "INF", "Inf", "-inf", "+infinity", + "nan", "NAN", "NaN", + "3.14", "-2.718", "1e10", "-1.23e-45", + ] + + for test_str in test_cases: + arr = np.array([test_str], dtype='U20') + + # NumPy's built-in + np_result = arr.astype(np.longdouble) + + # Our QuadPrecision + quad_result = arr.astype(QuadPrecDType()) + + np_val = np_result[0] + quad_val = float(str(quad_result[0])) + + if np.isnan(np_val): + assert np.isnan(quad_val), f"NumPy gives NaN but QuadPrec doesn't for '{test_str}'" + elif np.isinf(np_val): + assert np.isinf(quad_val), f"NumPy gives inf but QuadPrec doesn't for '{test_str}'" + assert np.sign(np_val) == np.sign(quad_val), \ + f"Inf sign mismatch for '{test_str}': NumPy={np.sign(np_val)}, Quad={np.sign(quad_val)}" + else: + np.testing.assert_allclose(quad_val, np_val, rtol=1e-10) + + def test_locale_independence(self): + """Test that parsing always uses '.' for decimal, not locale-specific separator""" + # This should parse correctly (period as decimal) + arr = np.array(["3.14"], dtype='U20') + result = arr.astype(QuadPrecDType()) + assert float(str(result[0])) == 3.14 + + # In some locales ',' is decimal separator, but we should always use '.' + # So "3,14" should either error or parse as "3" (stopping at comma) + # Since require_full_parse validation happens in casts.cpp, and we check + # for trailing content, this should raise ValueError + arr_comma = np.array(["3,14"], dtype='U20') + with pytest.raises(ValueError): + arr_comma.astype(QuadPrecDType()) + + @pytest.mark.parametrize("input_str,description", [ + (" 1.23 ", "space - leading and trailing"), + ("\t1.23\t", "tab - leading and trailing"), + ("\n1.23\n", "newline - leading and trailing"), + ("\r1.23\r", "carriage return - leading and trailing"), + ("\v1.23\v", "vertical tab - leading and trailing"), + ("\f1.23\f", "form feed - leading and trailing"), + (" \t\n\r\v\f1.23 \t\n\r\v\f", "all 6 whitespace chars - mixed"), + ("\t\t\t3.14\t\t\t", "multiple tabs"), + (" inf ", "infinity with spaces"), + ("\t\t-inf\t\t", "negative infinity with tabs"), + ("\n\nnan\n\n", "nan with newlines"), + ("\r\r-nan\r\r", "negative nan with carriage returns"), + ("\v\v1e10\v\v", "scientific notation with vertical tabs"), + ("\f\f-1.23e-45\f\f", "negative scientific with form feeds"), + ]) + def test_all_six_whitespace_characters(self, input_str, description): + """Test all 6 ASCII whitespace characters (space, tab, newline, carriage return, vertical tab, form feed) + + This tests the ascii_isspace() helper function in casts.cpp which matches + CPython's Py_ISSPACE and NumPy's NumPyOS_ascii_isspace behavior. + The 6 characters are: 0x09(\t), 0x0A(\n), 0x0B(\v), 0x0C(\f), 0x0D(\r), 0x20(space) + """ + arr = np.array([input_str], dtype='U50') + result = arr.astype(QuadPrecDType()) + + # Should successfully parse without errors + result_val = str(result[0]) + assert result_val, f"Failed to parse with {description}" + + # Verify the value is correct (strip whitespace and compare) + stripped = input_str.strip(' \t\n\r\v\f') + expected_arr = np.array([stripped], dtype='U50') + expected = expected_arr.astype(QuadPrecDType()) + + if np.isnan(float(str(expected[0]))): + assert np.isnan(float(str(result[0]))), f"NaN parsing failed for {description}" + elif np.isinf(float(str(expected[0]))): + assert np.isinf(float(str(result[0]))), f"Inf parsing failed for {description}" + assert np.sign(float(str(expected[0]))) == np.sign(float(str(result[0]))), \ + f"Inf sign mismatch for {description}" + else: + assert result[0] == expected[0], f"Value mismatch for {description}" + + @pytest.mark.parametrize("invalid_str,description", [ + ("1.23 abc", "trailing non-whitespace after number"), + (" 1.23xyz ", "trailing garbage with surrounding whitespace"), + ("abc 123", "leading garbage before number"), + ("1.23\x01", "control char (SOH) after number"), + ("1.23 a", "letter after multiple spaces"), + ("\t1.23\tabc\t", "tabs with garbage in middle"), + ]) + def test_whitespace_with_invalid_trailing_content(self, invalid_str, description): + """Test that strings with invalid trailing content are rejected even with whitespace + + This ensures the trailing whitespace check in casts.cpp properly validates + that only whitespace follows the parsed number, not other characters. + """ + arr = np.array([invalid_str], dtype='U50') + + with pytest.raises(ValueError, match="could not convert string to QuadPrecision"): + arr.astype(QuadPrecDType()) + + def test_empty_string_and_whitespace_only(self): + """Test that empty strings and whitespace-only strings raise errors""" + test_cases = [ + "", # Empty string + " ", # Single space + " ", # Multiple spaces + "\t", # Single tab + "\n", # Single newline + "\r", # Single carriage return + "\v", # Single vertical tab + "\f", # Single form feed + " \t\n\r\v\f", # All whitespace characters + " \t\t\n\n ", # Mixed whitespace + ] + + for test_str in test_cases: + arr = np.array([test_str], dtype='U20') + with pytest.raises(ValueError, match="could not convert string to QuadPrecision"): + arr.astype(QuadPrecDType()) + + @pytest.mark.parametrize("boundary_str,description", [ + ("1e4932", "near max exponent for quad precision"), + ("1e-4932", "near min exponent for quad precision"), + ("1.189731495357231765085759326628007016196477" + "e4932", "very large number"), + ("3.362103143112093506262677817321752602596e-4932", "very small number"), + ("-1.189731495357231765085759326628007016196477" + "e4932", "very large negative"), + ("-3.362103143112093506262677817321752602596e-4932", "very small negative"), + ]) + def test_extreme_exponent_values(self, boundary_str, description): + """Test parsing of numbers with extreme exponents near quad precision limits + + IEEE 754 binary128 has exponent range of approximately ±4932 + """ + arr = np.array([boundary_str], dtype='U100') + result = arr.astype(QuadPrecDType()) + + # Should parse successfully (may result in inf for overflow cases) + result_str = str(result[0]) + assert result_str, f"Failed to parse {description}" + + @pytest.mark.parametrize("precision_str", [ + "3.141592653589793238462643383279502884197", # 36 digits (quad precision) + "2.718281828459045235360287471352662497757", # e to 36 digits + "1.414213562373095048801688724209698078569", # sqrt(2) to 36 digits + "-1.732050807568877293527446341505872366942", # -sqrt(3) to 36 digits + ]) + def test_full_precision_parsing(self, precision_str): + """Test that strings with full quad precision (36 decimal digits) parse correctly + + This ensures the full precision is preserved during string -> quad conversion + """ + arr = np.array([precision_str], dtype='U50') + result = arr.astype(QuadPrecDType()) + + # Convert back to string and verify roundtrip preserves precision + back_to_str = result.astype('U50') + roundtrip = back_to_str.astype(QuadPrecDType()) + + # Roundtrip should preserve the value + assert result[0] == roundtrip[0], \ + f"Precision lost in roundtrip for {precision_str}" + def test_basic_equality(): assert QuadPrecision("12") == QuadPrecision(