From 0f223d3feda044f91f86cab8c537644d12dfc0d1 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Thu, 13 Nov 2025 09:51:31 +0530 Subject: [PATCH 01/12] small refactor --- quaddtype/numpy_quaddtype/src/casts.cpp | 1 + quaddtype/numpy_quaddtype/src/utilities.h | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index c13ea221..6af91d4e 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -22,6 +22,7 @@ extern "C" { #include "dtype.h" #define NUM_CASTS 34 // 16 to_casts + 16 from_casts + 1 quad_to_quad + 1 void_to_quad +#define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer static NPY_CASTING quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), diff --git a/quaddtype/numpy_quaddtype/src/utilities.h b/quaddtype/numpy_quaddtype/src/utilities.h index 1925046f..8362bb68 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.h +++ b/quaddtype/numpy_quaddtype/src/utilities.h @@ -1,6 +1,10 @@ #ifndef QUAD_UTILITIES_H #define QUAD_UTILITIES_H +#ifdef __cplusplus +extern "C" { +#endif + #include "quad_common.h" #include #include @@ -8,4 +12,8 @@ int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse); +#ifdef __cplusplus +} +#endif + #endif \ No newline at end of file From 77def74bbb757bf28a1d27496acbb54e1b5a065d Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Thu, 13 Nov 2025 08:56:35 +0000 Subject: [PATCH 02/12] added string_to_quad and tests --- quaddtype/numpy_quaddtype/src/casts.cpp | 167 +++++++++++++++++++++++- quaddtype/tests/test_quaddtype.py | 45 +++++-- 2 files changed, 200 insertions(+), 12 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 6af91d4e..63076581 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -20,8 +20,9 @@ extern "C" { #include "scalar.h" #include "casts.h" #include "dtype.h" +#include "utilities.h" -#define NUM_CASTS 34 // 16 to_casts + 16 from_casts + 1 quad_to_quad + 1 void_to_quad +#define NUM_CASTS 35 // 16 to_casts + 16 from_casts + 1 quad_to_quad + 1 void_to_quad + 1 unicode_to_quad #define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer static NPY_CASTING @@ -172,6 +173,151 @@ void_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], return -1; } +// Unicode/String to QuadDType casting +static NPY_CASTING +unicode_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); + if (loop_descrs[1] == nullptr) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + return NPY_UNSAFE_CASTING; +} + +static int +unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1]; + QuadBackendType backend = descr_out->backend; + + // Unicode strings are stored as UCS4 (4 bytes per character) + npy_intp unicode_size_chars = descrs[0]->elsize / 4; + + while (N--) { + // Temporary buffer to convert UCS4 to null-terminated char string + char temp_str[QUAD_STR_WIDTH + 1]; + npy_intp copy_len = unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; + // Convert UCS4 characters to ASCII/char + Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; + npy_intp i; + for (i = 0; i < copy_len; i++) { + Py_UCS4 c = ucs4_str[i]; + + // reject non-ASCII characters + if (c > 127) { + PyErr_Format(PyExc_ValueError, "Cannot cast non-ASCII character '%c' to QuadPrecision", c); + return -1; + } + + temp_str[i] = (char)c; + } + temp_str[i] = '\0'; + + quad_value out_val; + char *endptr; + int err = cstring_to_quad(temp_str, backend, &out_val, &endptr, true); + if (err < 0) { + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + return -1; + } + + if (backend == BACKEND_SLEEF) { + memcpy(out_ptr, &out_val.sleef_value, sizeof(Sleef_quad)); + } + else { + memcpy(out_ptr, &out_val.longdouble_value, sizeof(long double)); + } + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} + +static int +unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1]; + QuadBackendType backend = descr_out->backend; + + // Unicode strings are stored as UCS4 (4 bytes per character) + npy_intp unicode_size_chars = descrs[0]->elsize / 4; + + while (N--) { + // Temporary buffer to convert UCS4 to null-terminated char string + char temp_str[QUAD_STR_WIDTH + 1]; + npy_intp copy_len = unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; + // Convert UCS4 characters to ASCII/char + Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; + npy_intp i; + for (i = 0; i < copy_len; i++) { + Py_UCS4 c = ucs4_str[i]; + + // reject non-ASCII characters + if (c > 127) { + PyErr_Format(PyExc_ValueError, "Cannot cast non-ASCII character '%c' to QuadPrecision", c); + return -1; + } + + temp_str[i] = (char)c; + } + temp_str[i] = '\0'; + + quad_value out_val; + char *endptr; + int err = cstring_to_quad(temp_str, backend, &out_val, &endptr, true); + if (err < 0) { + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + return -1; + } + + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = out_val.sleef_value; + } + else { + *(long double *)out_ptr = out_val.longdouble_value; + } + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} + // Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates // see e.g. https://stackoverflow.com/q/32522279 @@ -880,6 +1026,25 @@ init_casts_internal(void) add_cast_from(&PyArray_DoubleDType); add_cast_from(&PyArray_LongDoubleDType); + // Unicode/String to QuadPrecision cast + PyArray_DTypeMeta **unicode_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_UnicodeDType, &QuadPrecDType}; + PyType_Slot *unicode_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&unicode_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop_unaligned}, + {0, nullptr}}; + + PyArrayMethod_Spec *unicode_spec = new PyArrayMethod_Spec{ + .name = "cast_Unicode_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = unicode_dtypes, + .slots = unicode_slots, + }; + add_spec(unicode_spec); + specs[spec_count] = nullptr; return specs; } diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 5a434a2f..26ef8569 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -540,17 +540,40 @@ def test_supported_astype(dtype): assert back == orig -@pytest.mark.parametrize("dtype", ["S10", "U10", "T", "V10", "datetime64[ms]", "timedelta64[ms]"]) -def test_unsupported_astype(dtype): - if dtype == "V10": - with pytest.raises(TypeError, match="cast"): - np.ones((3, 3), dtype="V10").astype(QuadPrecDType, casting="unsafe") - else: - with pytest.raises(TypeError, match="cast"): - np.array(1, dtype=dtype).astype(QuadPrecDType, casting="unsafe") - - with pytest.raises(TypeError, match="cast"): - np.array(QuadPrecision(1)).astype(dtype, casting="unsafe") +# @pytest.mark.parametrize("dtype", ["S10", "U10", "T", "V10", "datetime64[ms]", "timedelta64[ms]"]) +# def test_unsupported_astype(dtype): +# if dtype == "V10": +# with pytest.raises(TypeError, match="cast"): +# np.ones((3, 3), dtype="V10").astype(QuadPrecDType, casting="unsafe") +# else: +# with pytest.raises(TypeError, match="cast"): +# np.array(1, dtype=dtype).astype(QuadPrecDType, casting="unsafe") + +# with pytest.raises(TypeError, match="cast"): +# np.array(QuadPrecision(1)).astype(dtype, casting="unsafe") + +class TestArrayCastStringBytes: + @pytest.mark.parametrize("strtype", [np.str_, str, np.bytes_]) + @pytest.mark.parametrize("input_val", [ + "3.141592653589793238462643383279502884197", + "2.71828182845904523536028747135266249775", + "1e100", + "1e-100", + "0.0", + "-0.0", + "inf", + "-inf", + "nan", + "-nan", + ]) + def test_cast_string_to_quad(self, input_val, strtype): + str_array = np.array(input_val, dtype=strtype) + quad_array = str_array.astype(QuadPrecDType()) + expected = np.array(input_val, dtype=QuadPrecDType()) + if np.isnan(float(expected)): + np.testing.assert_array_equal(np.isnan(quad_array), np.isnan(expected)) + else: + np.testing.assert_array_equal(quad_array, expected) def test_basic_equality(): From 84c4dfde98513487d9ccd06bbcfa2d380c2d95e8 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Thu, 13 Nov 2025 11:15:51 +0000 Subject: [PATCH 03/12] adapt to size --- quaddtype/numpy_quaddtype/src/casts.cpp | 277 +++++++++++++++++++- quaddtype/numpy_quaddtype/src/quad_common.h | 7 + quaddtype/numpy_quaddtype/src/scalar.c | 6 - quaddtype/tests/test_quaddtype.py | 32 ++- 4 files changed, 307 insertions(+), 15 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 63076581..ae57d96f 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -13,6 +13,7 @@ extern "C" { #include "numpy/ndarraytypes.h" #include "numpy/dtype_api.h" } +#include #include "sleef.h" #include "sleefquad.h" @@ -21,8 +22,11 @@ extern "C" { #include "casts.h" #include "dtype.h" #include "utilities.h" +#include "lock.h" +#include "dragon4.h" +#include "ops.hpp" -#define NUM_CASTS 35 // 16 to_casts + 16 from_casts + 1 quad_to_quad + 1 void_to_quad + 1 unicode_to_quad +#define NUM_CASTS 36 // 17 to_casts + 17 from_casts + 1 quad_to_quad + 1 void_to_quad #define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer static NPY_CASTING @@ -318,6 +322,246 @@ unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const return 0; } +// QuadDType to unicode/string +static NPY_CASTING +quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + PyArray_Descr *unicode_descr = PyArray_DescrNewFromType(NPY_UNICODE); + if (unicode_descr == nullptr) { + return (NPY_CASTING)-1; + } + + unicode_descr->elsize = QUAD_STR_WIDTH * 4; // bytes + loop_descrs[1] = unicode_descr; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + *view_offset = 0; + return NPY_UNSAFE_CASTING; +} + +static int +quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0]; + QuadBackendType backend = descr_in->backend; + + npy_intp unicode_size_chars = descrs[1]->elsize / 4; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) + { + quad_value in_val; + if(backend == BACKEND_SLEEF) { + memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad)); + } else { + memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double)); + } + + // Convert to Sleef_quad for Dragon4 + Sleef_quad sleef_val; + if(backend == BACKEND_SLEEF) { + sleef_val = in_val.sleef_value; + } else { + sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); + } + + // If positional format fits, use it; otherwise use scientific notation + PyObject *py_str; + PyObject *positional_str = Dragon4_Positional_QuadDType(&sleef_val, + DigitMode_Unique, + CutoffMode_TotalLength, + SLEEF_QUAD_DECIMAL_DIG, + 0, + 1, + TrimMode_LeaveOneZero, + 1, + 0); + + if (positional_str == NULL) { + return -1; + } + + const char *pos_str = PyUnicode_AsUTF8(positional_str); + if (pos_str == NULL) { + Py_DECREF(positional_str); + return -1; + } + + npy_intp pos_len = strlen(pos_str); + + + if (pos_len <= unicode_size_chars) { + py_str = positional_str; // Keep the positional string + } else { + Py_DECREF(positional_str); + // Use scientific notation with full precision + py_str = Dragon4_Scientific_QuadDType(&sleef_val, + DigitMode_Unique, + SLEEF_QUAD_DECIMAL_DIG, + 0, + 1, + TrimMode_LeaveOneZero, + 1, + 2); + if (py_str == NULL) { + return -1; + } + } + + const char *temp_str = PyUnicode_AsUTF8(py_str); + if (temp_str == NULL) { + Py_DECREF(py_str); + return -1; + } + + // Convert char string to UCS4 and store in output + Py_UCS4* out_ucs4 = (Py_UCS4 *)out_ptr; + npy_intp str_len = strlen(temp_str); + + for(npy_intp i = 0; i < unicode_size_chars; i++) + { + if(i < str_len) + { + out_ucs4[i] = (Py_UCS4)temp_str[i]; + } + else + { + out_ucs4[i] = 0; + } + } + + Py_DECREF(py_str); + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} + +static int +quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0]; + QuadBackendType backend = descr_in->backend; + + npy_intp unicode_size_chars = descrs[1]->elsize / 4; + + while (N--) + { + quad_value in_val; + if(backend == BACKEND_SLEEF) { + in_val.sleef_value = *(Sleef_quad *)in_ptr; + } else { + in_val.longdouble_value = *(long double *)in_ptr; + } + + // Convert to Sleef_quad for Dragon4 + Sleef_quad sleef_val; + if(backend == BACKEND_SLEEF) { + sleef_val = in_val.sleef_value; + } else { + sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); + } + + + PyObject *py_str; + PyObject *positional_str = Dragon4_Positional_QuadDType(&sleef_val, + DigitMode_Unique, + CutoffMode_TotalLength, + SLEEF_QUAD_DECIMAL_DIG, + 0, + 1, + TrimMode_LeaveOneZero, + 1, + 0); + + if (positional_str == NULL) { + return -1; + } + + const char *pos_str = PyUnicode_AsUTF8(positional_str); + if (pos_str == NULL) { + Py_DECREF(positional_str); + return -1; + } + + npy_intp pos_len = strlen(pos_str); + + if (pos_len <= unicode_size_chars) { + py_str = positional_str; + } else { + Py_DECREF(positional_str); + // Use scientific notation with full precision + py_str = Dragon4_Scientific_QuadDType(&sleef_val, + DigitMode_Unique, + SLEEF_QUAD_DECIMAL_DIG, + 0, + 1, + TrimMode_LeaveOneZero, + 1, + 2); + if (py_str == NULL) { + return -1; + } + } + + const char *temp_str = PyUnicode_AsUTF8(py_str); + if (temp_str == NULL) { + Py_DECREF(py_str); + return -1; + } + + // Convert char string to UCS4 and store in output + Py_UCS4* out_ucs4 = (Py_UCS4 *)out_ptr; + npy_intp str_len = strlen(temp_str); + + for(npy_intp i = 0; i < unicode_size_chars; i++) + { + if(i < str_len) + { + out_ucs4[i] = (Py_UCS4)temp_str[i]; + } + else + { + out_ucs4[i] = 0; + } + } + + Py_DECREF(py_str); + + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; +} // Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates // see e.g. https://stackoverflow.com/q/32522279 @@ -1027,23 +1271,42 @@ init_casts_internal(void) add_cast_from(&PyArray_LongDoubleDType); // Unicode/String to QuadPrecision cast - PyArray_DTypeMeta **unicode_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_UnicodeDType, &QuadPrecDType}; - PyType_Slot *unicode_slots = new PyType_Slot[4]{ + PyArray_DTypeMeta **unicode_to_quad_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_UnicodeDType, &QuadPrecDType}; + PyType_Slot *unicode_to_quad_slots = new PyType_Slot[4]{ {NPY_METH_resolve_descriptors, (void *)&unicode_to_quad_resolve_descriptors}, {NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop_aligned}, {NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop_unaligned}, {0, nullptr}}; - PyArrayMethod_Spec *unicode_spec = new PyArrayMethod_Spec{ + PyArrayMethod_Spec *unicode_to_quad_spec = new PyArrayMethod_Spec{ .name = "cast_Unicode_to_QuadPrec", .nin = 1, .nout = 1, .casting = NPY_UNSAFE_CASTING, .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = unicode_dtypes, - .slots = unicode_slots, + .dtypes = unicode_to_quad_dtypes, + .slots = unicode_to_quad_slots, + }; + add_spec(unicode_to_quad_spec); + + // QuadPrecision to Unicode + PyArray_DTypeMeta **quad_to_unicode_dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, &PyArray_UnicodeDType}; + PyType_Slot *quad_to_unicode_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&quad_to_unicode_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_unicode_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop_unaligned}, + {0, nullptr}}; + + PyArrayMethod_Spec *quad_to_unicode_spec = new PyArrayMethod_Spec{ + .name = "cast_QuadPrec_to_Unicode", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = quad_to_unicode_dtypes, + .slots = quad_to_unicode_slots, }; - add_spec(unicode_spec); + add_spec(quad_to_unicode_spec); specs[spec_count] = nullptr; return specs; diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h index e17ee0dd..566269a7 100644 --- a/quaddtype/numpy_quaddtype/src/quad_common.h +++ b/quaddtype/numpy_quaddtype/src/quad_common.h @@ -19,6 +19,13 @@ typedef union { long double longdouble_value; } quad_value; + +// For IEEE 754 binary128 (quad precision), we need 36 decimal digits +// to guarantee round-trip conversion (string -> parse -> equals original value) +// Formula: ceil(1 + MANT_DIG * log10(2)) = ceil(1 + 113 * 0.30103) = 36 +// src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format +#define SLEEF_QUAD_DECIMAL_DIG 36 + #ifdef __cplusplus } #endif diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 5b51d3bc..4adfb434 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -18,12 +18,6 @@ #include "lock.h" #include "utilities.h" -// For IEEE 754 binary128 (quad precision), we need 36 decimal digits -// to guarantee round-trip conversion (string -> parse -> equals original value) -// Formula: ceil(1 + MANT_DIG * log10(2)) = ceil(1 + 113 * 0.30103) = 36 -// src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format -#define SLEEF_QUAD_DECIMAL_DIG 36 - QuadPrecisionObject * QuadPrecision_raw_new(QuadBackendType backend) diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 26ef8569..81d2f1a2 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -553,7 +553,7 @@ def test_supported_astype(dtype): # np.array(QuadPrecision(1)).astype(dtype, casting="unsafe") class TestArrayCastStringBytes: - @pytest.mark.parametrize("strtype", [np.str_, str, np.bytes_]) + @pytest.mark.parametrize("strtype", [np.str_, str]) @pytest.mark.parametrize("input_val", [ "3.141592653589793238462643383279502884197", "2.71828182845904523536028747135266249775", @@ -566,14 +566,42 @@ class TestArrayCastStringBytes: "nan", "-nan", ]) - def test_cast_string_to_quad(self, input_val, strtype): + def test_cast_string_to_quad_roundtrip(self, input_val, strtype): + # Test 1: String to Quad conversion str_array = np.array(input_val, dtype=strtype) quad_array = str_array.astype(QuadPrecDType()) expected = np.array(input_val, dtype=QuadPrecDType()) + + # Verify string to quad conversion if np.isnan(float(expected)): np.testing.assert_array_equal(np.isnan(quad_array), np.isnan(expected)) else: np.testing.assert_array_equal(quad_array, expected) + + # Test 2: Quad to String conversion + quad_to_string_array = quad_array.astype(strtype) + + # Test 3: Round-trip - String -> Quad -> String -> Quad should preserve value + roundtrip_quad_array = quad_to_string_array.astype(QuadPrecDType()) + + if np.isnan(float(expected)): + # For NaN, just verify both are NaN + np.testing.assert_array_equal(np.isnan(roundtrip_quad_array), np.isnan(quad_array)) + else: + # For non-NaN values, the round-trip should preserve the exact value + np.testing.assert_array_equal(roundtrip_quad_array, quad_array, + err_msg=f"Round-trip failed for {input_val}") + + # Test 4: Verify the string representation can be parsed back + # (This ensures the quad->string cast produces valid parseable strings) + scalar_str = str(quad_array[()]) + scalar_from_str = QuadPrecision(scalar_str) + + if np.isnan(float(quad_array[()])): + assert np.isnan(float(scalar_from_str)) + else: + assert scalar_from_str == quad_array[()], \ + f"Scalar round-trip failed: {scalar_str} -> {scalar_from_str} != {quad_array[()]}" def test_basic_equality(): From a38a651e3f98996cc814009a9923b2dfc581446c Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Thu, 13 Nov 2025 11:18:37 +0000 Subject: [PATCH 04/12] roundtrip --- quaddtype/tests/test_quaddtype.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 81d2f1a2..8ed0faed 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -567,32 +567,27 @@ class TestArrayCastStringBytes: "-nan", ]) def test_cast_string_to_quad_roundtrip(self, input_val, strtype): - # Test 1: String to Quad conversion str_array = np.array(input_val, dtype=strtype) quad_array = str_array.astype(QuadPrecDType()) expected = np.array(input_val, dtype=QuadPrecDType()) - # Verify string to quad conversion if np.isnan(float(expected)): np.testing.assert_array_equal(np.isnan(quad_array), np.isnan(expected)) else: np.testing.assert_array_equal(quad_array, expected) - # Test 2: Quad to String conversion quad_to_string_array = quad_array.astype(strtype) - # Test 3: Round-trip - String -> Quad -> String -> Quad should preserve value + # Round-trip - String -> Quad -> String -> Quad should preserve value roundtrip_quad_array = quad_to_string_array.astype(QuadPrecDType()) if np.isnan(float(expected)): - # For NaN, just verify both are NaN np.testing.assert_array_equal(np.isnan(roundtrip_quad_array), np.isnan(quad_array)) else: - # For non-NaN values, the round-trip should preserve the exact value np.testing.assert_array_equal(roundtrip_quad_array, quad_array, err_msg=f"Round-trip failed for {input_val}") - # Test 4: Verify the string representation can be parsed back + # Verify the string representation can be parsed back # (This ensures the quad->string cast produces valid parseable strings) scalar_str = str(quad_array[()]) scalar_from_str = QuadPrecision(scalar_str) From 4cdef99b09958ed1c904b7d347f0a7eb34d95238 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Thu, 13 Nov 2025 11:20:42 +0000 Subject: [PATCH 05/12] T and S10 TBA --- quaddtype/tests/test_quaddtype.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 8ed0faed..edf74bb8 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -540,17 +540,17 @@ def test_supported_astype(dtype): assert back == orig -# @pytest.mark.parametrize("dtype", ["S10", "U10", "T", "V10", "datetime64[ms]", "timedelta64[ms]"]) -# def test_unsupported_astype(dtype): -# if dtype == "V10": -# with pytest.raises(TypeError, match="cast"): -# np.ones((3, 3), dtype="V10").astype(QuadPrecDType, casting="unsafe") -# else: -# with pytest.raises(TypeError, match="cast"): -# np.array(1, dtype=dtype).astype(QuadPrecDType, casting="unsafe") - -# with pytest.raises(TypeError, match="cast"): -# np.array(QuadPrecision(1)).astype(dtype, casting="unsafe") +@pytest.mark.parametrize("dtype", ["V10", "datetime64[ms]", "timedelta64[ms]"]) +def test_unsupported_astype(dtype): + if dtype == "V10": + with pytest.raises(TypeError, match="cast"): + np.ones((3, 3), dtype="V10").astype(QuadPrecDType, casting="unsafe") + else: + with pytest.raises(TypeError, match="cast"): + np.array(1, dtype=dtype).astype(QuadPrecDType, casting="unsafe") + + with pytest.raises(TypeError, match="cast"): + np.array(QuadPrecision(1)).astype(dtype, casting="unsafe") class TestArrayCastStringBytes: @pytest.mark.parametrize("strtype", [np.str_, str]) From d1605c76dc9f2c0db5dffe313a7e0db189b5833b Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Thu, 13 Nov 2025 12:15:59 +0000 Subject: [PATCH 06/12] file reformat --- quaddtype/numpy_quaddtype/src/casts.cpp | 327 +++++++++++------------- 1 file changed, 156 insertions(+), 171 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index ae57d96f..c2050765 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -27,7 +27,7 @@ extern "C" { #include "ops.hpp" #define NUM_CASTS 36 // 17 to_casts + 17 from_casts + 1 quad_to_quad + 1 void_to_quad -#define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer +#define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer static NPY_CASTING quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), @@ -157,21 +157,19 @@ quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const da return 0; } - static NPY_CASTING void_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) { - PyErr_SetString(PyExc_TypeError, - "Void to QuadPrecision cast is not implemented"); + PyErr_SetString(PyExc_TypeError, "Void to QuadPrecision cast is not implemented"); return (NPY_CASTING)-1; } static int void_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { PyErr_SetString(PyExc_RuntimeError, "void_to_quad_strided_loop should not be called"); return -1; @@ -180,8 +178,8 @@ void_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], // Unicode/String to QuadDType casting static NPY_CASTING unicode_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) { Py_INCREF(given_descrs[0]); loop_descrs[0] = given_descrs[0]; @@ -202,8 +200,8 @@ unicode_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet static int unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -214,26 +212,28 @@ unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *con PyArray_Descr *const *descrs = context->descriptors; QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1]; QuadBackendType backend = descr_out->backend; - + // Unicode strings are stored as UCS4 (4 bytes per character) npy_intp unicode_size_chars = descrs[0]->elsize / 4; while (N--) { - // Temporary buffer to convert UCS4 to null-terminated char string + // Temporary buffer to convert UCS4 to null-terminated char string char temp_str[QUAD_STR_WIDTH + 1]; - npy_intp copy_len = unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; + npy_intp copy_len = + unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; // Convert UCS4 characters to ASCII/char Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; npy_intp i; for (i = 0; i < copy_len; i++) { Py_UCS4 c = ucs4_str[i]; - + // reject non-ASCII characters if (c > 127) { - PyErr_Format(PyExc_ValueError, "Cannot cast non-ASCII character '%c' to QuadPrecision", c); + PyErr_Format(PyExc_ValueError, + "Cannot cast non-ASCII character '%c' to QuadPrecision", c); return -1; } - + temp_str[i] = (char)c; } temp_str[i] = '\0'; @@ -242,10 +242,10 @@ unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *con char *endptr; int err = cstring_to_quad(temp_str, backend, &out_val, &endptr, true); if (err < 0) { - PyErr_Format(PyExc_ValueError, - "could not convert string to QuadPrecision: np.str_('%s')", temp_str); - return -1; - } + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + return -1; + } if (backend == BACKEND_SLEEF) { memcpy(out_ptr, &out_val.sleef_value, sizeof(Sleef_quad)); @@ -257,14 +257,14 @@ unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *con in_ptr += in_stride; out_ptr += out_stride; } - + return 0; } static int unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -275,26 +275,28 @@ unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const PyArray_Descr *const *descrs = context->descriptors; QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1]; QuadBackendType backend = descr_out->backend; - + // Unicode strings are stored as UCS4 (4 bytes per character) npy_intp unicode_size_chars = descrs[0]->elsize / 4; while (N--) { - // Temporary buffer to convert UCS4 to null-terminated char string + // Temporary buffer to convert UCS4 to null-terminated char string char temp_str[QUAD_STR_WIDTH + 1]; - npy_intp copy_len = unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; + npy_intp copy_len = + unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; // Convert UCS4 characters to ASCII/char Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; npy_intp i; for (i = 0; i < copy_len; i++) { Py_UCS4 c = ucs4_str[i]; - + // reject non-ASCII characters if (c > 127) { - PyErr_Format(PyExc_ValueError, "Cannot cast non-ASCII character '%c' to QuadPrecision", c); + PyErr_Format(PyExc_ValueError, + "Cannot cast non-ASCII character '%c' to QuadPrecision", c); return -1; } - + temp_str[i] = (char)c; } temp_str[i] = '\0'; @@ -303,10 +305,10 @@ unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const char *endptr; int err = cstring_to_quad(temp_str, backend, &out_val, &endptr, true); if (err < 0) { - PyErr_Format(PyExc_ValueError, - "could not convert string to QuadPrecision: np.str_('%s')", temp_str); - return -1; - } + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + return -1; + } if (backend == BACKEND_SLEEF) { *(Sleef_quad *)out_ptr = out_val.sleef_value; @@ -318,15 +320,15 @@ unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const in_ptr += in_stride; out_ptr += out_stride; } - + return 0; } // QuadDType to unicode/string static NPY_CASTING quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) { Py_INCREF(given_descrs[0]); loop_descrs[0] = given_descrs[0]; @@ -336,8 +338,8 @@ quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet if (unicode_descr == nullptr) { return (NPY_CASTING)-1; } - - unicode_descr->elsize = QUAD_STR_WIDTH * 4; // bytes + + unicode_descr->elsize = QUAD_STR_WIDTH * 4; // bytes loop_descrs[1] = unicode_descr; } else { @@ -350,8 +352,9 @@ quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet } static int -quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], void *NPY_UNUSED(auxdata)) +quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -365,85 +368,72 @@ quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[ npy_intp unicode_size_chars = descrs[1]->elsize / 4; size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - while (N--) - { + + while (N--) { quad_value in_val; - if(backend == BACKEND_SLEEF) { - memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad)); - } else { - memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double)); + if (backend == BACKEND_SLEEF) { + memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad)); + } + else { + memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double)); } // Convert to Sleef_quad for Dragon4 Sleef_quad sleef_val; - if(backend == BACKEND_SLEEF) { - sleef_val = in_val.sleef_value; - } else { - sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); + if (backend == BACKEND_SLEEF) { + sleef_val = in_val.sleef_value; + } + else { + sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); } // If positional format fits, use it; otherwise use scientific notation - PyObject *py_str; - PyObject *positional_str = Dragon4_Positional_QuadDType(&sleef_val, - DigitMode_Unique, - CutoffMode_TotalLength, - SLEEF_QUAD_DECIMAL_DIG, - 0, - 1, - TrimMode_LeaveOneZero, - 1, - 0); - + PyObject *py_str; + PyObject *positional_str = Dragon4_Positional_QuadDType( + &sleef_val, DigitMode_Unique, CutoffMode_TotalLength, SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 0); + if (positional_str == NULL) { return -1; } - + const char *pos_str = PyUnicode_AsUTF8(positional_str); if (pos_str == NULL) { Py_DECREF(positional_str); return -1; } - + npy_intp pos_len = strlen(pos_str); - - + if (pos_len <= unicode_size_chars) { py_str = positional_str; // Keep the positional string - } else { + } + else { Py_DECREF(positional_str); // Use scientific notation with full precision - py_str = Dragon4_Scientific_QuadDType(&sleef_val, - DigitMode_Unique, - SLEEF_QUAD_DECIMAL_DIG, - 0, - 1, - TrimMode_LeaveOneZero, - 1, - 2); + py_str = Dragon4_Scientific_QuadDType(&sleef_val, DigitMode_Unique, + SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 2); if (py_str == NULL) { return -1; } } - + const char *temp_str = PyUnicode_AsUTF8(py_str); if (temp_str == NULL) { Py_DECREF(py_str); return -1; } - + // Convert char string to UCS4 and store in output - Py_UCS4* out_ucs4 = (Py_UCS4 *)out_ptr; + Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr; npy_intp str_len = strlen(temp_str); - - for(npy_intp i = 0; i < unicode_size_chars; i++) - { - if(i < str_len) - { + + for (npy_intp i = 0; i < unicode_size_chars; i++) { + if (i < str_len) { out_ucs4[i] = (Py_UCS4)temp_str[i]; } - else - { + else { out_ucs4[i] = 0; } } @@ -458,8 +448,9 @@ quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[ } static int -quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], void *NPY_UNUSED(auxdata)) +quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -472,35 +463,29 @@ quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], QuadBackendType backend = descr_in->backend; npy_intp unicode_size_chars = descrs[1]->elsize / 4; - - while (N--) - { + + while (N--) { quad_value in_val; - if(backend == BACKEND_SLEEF) { - in_val.sleef_value = *(Sleef_quad *)in_ptr; - } else { - in_val.longdouble_value = *(long double *)in_ptr; + if (backend == BACKEND_SLEEF) { + in_val.sleef_value = *(Sleef_quad *)in_ptr; + } + else { + in_val.longdouble_value = *(long double *)in_ptr; } // Convert to Sleef_quad for Dragon4 Sleef_quad sleef_val; - if(backend == BACKEND_SLEEF) { - sleef_val = in_val.sleef_value; - } else { - sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); + if (backend == BACKEND_SLEEF) { + sleef_val = in_val.sleef_value; + } + else { + sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); } - PyObject *py_str; - PyObject *positional_str = Dragon4_Positional_QuadDType(&sleef_val, - DigitMode_Unique, - CutoffMode_TotalLength, - SLEEF_QUAD_DECIMAL_DIG, - 0, - 1, - TrimMode_LeaveOneZero, - 1, - 0); + PyObject *positional_str = Dragon4_Positional_QuadDType( + &sleef_val, DigitMode_Unique, CutoffMode_TotalLength, SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 0); if (positional_str == NULL) { return -1; @@ -511,22 +496,18 @@ quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], Py_DECREF(positional_str); return -1; } - + npy_intp pos_len = strlen(pos_str); - + if (pos_len <= unicode_size_chars) { - py_str = positional_str; - } else { + py_str = positional_str; + } + else { Py_DECREF(positional_str); // Use scientific notation with full precision - py_str = Dragon4_Scientific_QuadDType(&sleef_val, - DigitMode_Unique, - SLEEF_QUAD_DECIMAL_DIG, - 0, - 1, - TrimMode_LeaveOneZero, - 1, - 2); + py_str = Dragon4_Scientific_QuadDType(&sleef_val, DigitMode_Unique, + SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 2); if (py_str == NULL) { return -1; } @@ -537,19 +518,16 @@ quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], Py_DECREF(py_str); return -1; } - + // Convert char string to UCS4 and store in output - Py_UCS4* out_ucs4 = (Py_UCS4 *)out_ptr; + Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr; npy_intp str_len = strlen(temp_str); - - for(npy_intp i = 0; i < unicode_size_chars; i++) - { - if(i < str_len) - { + + for (npy_intp i = 0; i < unicode_size_chars; i++) { + if (i < str_len) { out_ucs4[i] = (Py_UCS4)temp_str[i]; } - else - { + else { out_ucs4[i] = 0; } } @@ -568,12 +546,18 @@ quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], struct spec_npy_bool {}; struct spec_npy_half {}; -template -struct NpyType { typedef T TYPE; }; -template<> -struct NpyType{ typedef npy_bool TYPE; }; -template<> -struct NpyType{ typedef npy_half TYPE; }; +template +struct NpyType { + typedef T TYPE; +}; +template <> +struct NpyType { + typedef npy_bool TYPE; +}; +template <> +struct NpyType { + typedef npy_half TYPE; +}; // Casting from other types to QuadDType @@ -1220,21 +1204,22 @@ init_casts_internal(void) add_spec(quad2quad_spec); - PyArray_DTypeMeta **void_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_VoidDType, &QuadPrecDType}; + PyArray_DTypeMeta **void_dtypes = + new PyArray_DTypeMeta *[2]{&PyArray_VoidDType, &QuadPrecDType}; PyType_Slot *void_slots = new PyType_Slot[4]{ - {NPY_METH_resolve_descriptors, (void *)&void_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&void_to_quad_strided_loop}, - {NPY_METH_unaligned_strided_loop, (void *)&void_to_quad_strided_loop}, - {0, nullptr}}; + {NPY_METH_resolve_descriptors, (void *)&void_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&void_to_quad_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&void_to_quad_strided_loop}, + {0, nullptr}}; PyArrayMethod_Spec *void_spec = new PyArrayMethod_Spec{ - .name = "cast_Void_to_QuadPrec_ERROR", - .nin = 1, - .nout = 1, - .casting = NPY_UNSAFE_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = void_dtypes, - .slots = void_slots, + .name = "cast_Void_to_QuadPrec_ERROR", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = void_dtypes, + .slots = void_slots, }; add_spec(void_spec); @@ -1273,38 +1258,38 @@ init_casts_internal(void) // Unicode/String to QuadPrecision cast PyArray_DTypeMeta **unicode_to_quad_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_UnicodeDType, &QuadPrecDType}; PyType_Slot *unicode_to_quad_slots = new PyType_Slot[4]{ - {NPY_METH_resolve_descriptors, (void *)&unicode_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop_unaligned}, - {0, nullptr}}; + {NPY_METH_resolve_descriptors, (void *)&unicode_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&unicode_to_quad_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&unicode_to_quad_strided_loop_unaligned}, + {0, nullptr}}; PyArrayMethod_Spec *unicode_to_quad_spec = new PyArrayMethod_Spec{ - .name = "cast_Unicode_to_QuadPrec", - .nin = 1, - .nout = 1, - .casting = NPY_UNSAFE_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = unicode_to_quad_dtypes, - .slots = unicode_to_quad_slots, + .name = "cast_Unicode_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = unicode_to_quad_dtypes, + .slots = unicode_to_quad_slots, }; add_spec(unicode_to_quad_spec); // QuadPrecision to Unicode PyArray_DTypeMeta **quad_to_unicode_dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, &PyArray_UnicodeDType}; PyType_Slot *quad_to_unicode_slots = new PyType_Slot[4]{ - {NPY_METH_resolve_descriptors, (void *)&quad_to_unicode_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_unicode_loop_aligned}, - {NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop_unaligned}, - {0, nullptr}}; + {NPY_METH_resolve_descriptors, (void *)&quad_to_unicode_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_unicode_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_unicode_loop_unaligned}, + {0, nullptr}}; PyArrayMethod_Spec *quad_to_unicode_spec = new PyArrayMethod_Spec{ - .name = "cast_QuadPrec_to_Unicode", - .nin = 1, - .nout = 1, - .casting = NPY_UNSAFE_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = quad_to_unicode_dtypes, - .slots = quad_to_unicode_slots, + .name = "cast_QuadPrec_to_Unicode", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = quad_to_unicode_dtypes, + .slots = quad_to_unicode_slots, }; add_spec(quad_to_unicode_spec); From 7e9df19f3b3795cc98738da3f9d4d01c95b4e90b Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Sat, 15 Nov 2025 13:23:51 +0000 Subject: [PATCH 07/12] refactoring duplicated logic --- quaddtype/numpy_quaddtype/src/casts.cpp | 241 +++++++++------------- quaddtype/numpy_quaddtype/src/utilities.c | 12 ++ quaddtype/numpy_quaddtype/src/utilities.h | 4 + 3 files changed, 116 insertions(+), 141 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index c2050765..de962ef5 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -198,6 +198,42 @@ unicode_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet return NPY_UNSAFE_CASTING; } +// Helper function: Convert UCS4 string to quad_value +static inline int +unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars, + QuadBackendType backend, quad_value *out_val) +{ + // Temporary buffer to convert UCS4 to null-terminated char string + char temp_str[QUAD_STR_WIDTH + 1]; + npy_intp copy_len = unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; + + // Convert UCS4 characters to ASCII/char + npy_intp i; + for (i = 0; i < copy_len; i++) { + Py_UCS4 c = ucs4_str[i]; + + // reject non-ASCII characters + if (c > 127) { + PyErr_Format(PyExc_ValueError, + "Cannot cast non-ASCII character '%c' to QuadPrecision", c); + return -1; + } + + temp_str[i] = (char)c; + } + temp_str[i] = '\0'; + + char *endptr; + int err = cstring_to_quad(temp_str, backend, out_val, &endptr, true); + if (err < 0) { + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + return -1; + } + + return 0; +} + static int unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], npy_intp const dimensions[], npy_intp const strides[], @@ -217,33 +253,10 @@ unicode_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *con npy_intp unicode_size_chars = descrs[0]->elsize / 4; while (N--) { - // Temporary buffer to convert UCS4 to null-terminated char string - char temp_str[QUAD_STR_WIDTH + 1]; - npy_intp copy_len = - unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; - // Convert UCS4 characters to ASCII/char Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; - npy_intp i; - for (i = 0; i < copy_len; i++) { - Py_UCS4 c = ucs4_str[i]; - - // reject non-ASCII characters - if (c > 127) { - PyErr_Format(PyExc_ValueError, - "Cannot cast non-ASCII character '%c' to QuadPrecision", c); - return -1; - } - - temp_str[i] = (char)c; - } - temp_str[i] = '\0'; - quad_value out_val; - char *endptr; - int err = cstring_to_quad(temp_str, backend, &out_val, &endptr, true); - if (err < 0) { - PyErr_Format(PyExc_ValueError, - "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + + if (unicode_to_quad_convert(ucs4_str, unicode_size_chars, backend, &out_val) < 0) { return -1; } @@ -280,33 +293,10 @@ unicode_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const npy_intp unicode_size_chars = descrs[0]->elsize / 4; while (N--) { - // Temporary buffer to convert UCS4 to null-terminated char string - char temp_str[QUAD_STR_WIDTH + 1]; - npy_intp copy_len = - unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; - // Convert UCS4 characters to ASCII/char Py_UCS4 *ucs4_str = (Py_UCS4 *)in_ptr; - npy_intp i; - for (i = 0; i < copy_len; i++) { - Py_UCS4 c = ucs4_str[i]; - - // reject non-ASCII characters - if (c > 127) { - PyErr_Format(PyExc_ValueError, - "Cannot cast non-ASCII character '%c' to QuadPrecision", c); - return -1; - } - - temp_str[i] = (char)c; - } - temp_str[i] = '\0'; - quad_value out_val; - char *endptr; - int err = cstring_to_quad(temp_str, backend, &out_val, &endptr, true); - if (err < 0) { - PyErr_Format(PyExc_ValueError, - "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + + if (unicode_to_quad_convert(ucs4_str, unicode_size_chars, backend, &out_val) < 0) { return -1; } @@ -351,6 +341,56 @@ quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet return NPY_UNSAFE_CASTING; } +// Helper function: Convert quad to string with adaptive notation +static inline PyObject * +quad_to_string_adaptive(Sleef_quad *sleef_val, npy_intp unicode_size_chars) +{ + // Try positional format first to see if it would fit + PyObject *positional_str = Dragon4_Positional_QuadDType( + sleef_val, DigitMode_Unique, CutoffMode_TotalLength, SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 0); + + if (positional_str == NULL) { + return NULL; + } + + const char *pos_str = PyUnicode_AsUTF8(positional_str); + if (pos_str == NULL) { + Py_DECREF(positional_str); + return NULL; + } + + npy_intp pos_len = strlen(pos_str); + + // If positional format fits, use it; otherwise use scientific notation + if (pos_len <= unicode_size_chars) { + return positional_str; // Keep the positional string + } + else { + Py_DECREF(positional_str); + // Use scientific notation with full precision + return Dragon4_Scientific_QuadDType(sleef_val, DigitMode_Unique, + SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 2); + } +} + +// Helper function: Copy string to UCS4 output buffer +static inline void +copy_string_to_ucs4(const char *str, Py_UCS4 *out_ucs4, npy_intp unicode_size_chars) +{ + npy_intp str_len = strlen(str); + + for (npy_intp i = 0; i < unicode_size_chars; i++) { + if (i < str_len) { + out_ucs4[i] = (Py_UCS4)str[i]; + } + else { + out_ucs4[i] = 0; + } + } +} + static int quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[], npy_intp const dimensions[], npy_intp const strides[], @@ -379,46 +419,14 @@ quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[ } // Convert to Sleef_quad for Dragon4 - Sleef_quad sleef_val; - if (backend == BACKEND_SLEEF) { - sleef_val = in_val.sleef_value; - } - else { - sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); - } - - // If positional format fits, use it; otherwise use scientific notation - PyObject *py_str; - PyObject *positional_str = Dragon4_Positional_QuadDType( - &sleef_val, DigitMode_Unique, CutoffMode_TotalLength, SLEEF_QUAD_DECIMAL_DIG, 0, 1, - TrimMode_LeaveOneZero, 1, 0); - - if (positional_str == NULL) { - return -1; - } + Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); - const char *pos_str = PyUnicode_AsUTF8(positional_str); - if (pos_str == NULL) { - Py_DECREF(positional_str); + // Get string representation with adaptive notation + PyObject *py_str = quad_to_string_adaptive(&sleef_val, unicode_size_chars); + if (py_str == NULL) { return -1; } - npy_intp pos_len = strlen(pos_str); - - if (pos_len <= unicode_size_chars) { - py_str = positional_str; // Keep the positional string - } - else { - Py_DECREF(positional_str); - // Use scientific notation with full precision - py_str = Dragon4_Scientific_QuadDType(&sleef_val, DigitMode_Unique, - SLEEF_QUAD_DECIMAL_DIG, 0, 1, - TrimMode_LeaveOneZero, 1, 2); - if (py_str == NULL) { - return -1; - } - } - const char *temp_str = PyUnicode_AsUTF8(py_str); if (temp_str == NULL) { Py_DECREF(py_str); @@ -427,16 +435,7 @@ quad_to_unicode_loop_unaligned(PyArrayMethod_Context *context, char *const data[ // Convert char string to UCS4 and store in output Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr; - npy_intp str_len = strlen(temp_str); - - for (npy_intp i = 0; i < unicode_size_chars; i++) { - if (i < str_len) { - out_ucs4[i] = (Py_UCS4)temp_str[i]; - } - else { - out_ucs4[i] = 0; - } - } + copy_string_to_ucs4(temp_str, out_ucs4, unicode_size_chars); Py_DECREF(py_str); @@ -474,45 +473,14 @@ quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], } // Convert to Sleef_quad for Dragon4 - Sleef_quad sleef_val; - if (backend == BACKEND_SLEEF) { - sleef_val = in_val.sleef_value; - } - else { - sleef_val = Sleef_cast_from_doubleq1(in_val.longdouble_value); - } + Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); - PyObject *py_str; - PyObject *positional_str = Dragon4_Positional_QuadDType( - &sleef_val, DigitMode_Unique, CutoffMode_TotalLength, SLEEF_QUAD_DECIMAL_DIG, 0, 1, - TrimMode_LeaveOneZero, 1, 0); - - if (positional_str == NULL) { + // Get string representation with adaptive notation + PyObject *py_str = quad_to_string_adaptive(&sleef_val, unicode_size_chars); + if (py_str == NULL) { return -1; } - const char *pos_str = PyUnicode_AsUTF8(positional_str); - if (pos_str == NULL) { - Py_DECREF(positional_str); - return -1; - } - - npy_intp pos_len = strlen(pos_str); - - if (pos_len <= unicode_size_chars) { - py_str = positional_str; - } - else { - Py_DECREF(positional_str); - // Use scientific notation with full precision - py_str = Dragon4_Scientific_QuadDType(&sleef_val, DigitMode_Unique, - SLEEF_QUAD_DECIMAL_DIG, 0, 1, - TrimMode_LeaveOneZero, 1, 2); - if (py_str == NULL) { - return -1; - } - } - const char *temp_str = PyUnicode_AsUTF8(py_str); if (temp_str == NULL) { Py_DECREF(py_str); @@ -521,16 +489,7 @@ quad_to_unicode_loop_aligned(PyArrayMethod_Context *context, char *const data[], // Convert char string to UCS4 and store in output Py_UCS4 *out_ucs4 = (Py_UCS4 *)out_ptr; - npy_intp str_len = strlen(temp_str); - - for (npy_intp i = 0; i < unicode_size_chars; i++) { - if (i < str_len) { - out_ucs4[i] = (Py_UCS4)temp_str[i]; - } - else { - out_ucs4[i] = 0; - } - } + copy_string_to_ucs4(temp_str, out_ucs4, unicode_size_chars); Py_DECREF(py_str); diff --git a/quaddtype/numpy_quaddtype/src/utilities.c b/quaddtype/numpy_quaddtype/src/utilities.c index aa00d30b..cf749b31 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.c +++ b/quaddtype/numpy_quaddtype/src/utilities.c @@ -17,4 +17,16 @@ char **endptr, bool require_full_parse) return -1; // parse error - characters remain to be converted return 0; // success +} + +// Helper function: Convert quad_value to Sleef_quad for Dragon4 +Sleef_quad +quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return in_val->sleef_value; + } + else { + return Sleef_cast_from_doubleq1(in_val->longdouble_value); + } } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/utilities.h b/quaddtype/numpy_quaddtype/src/utilities.h index 8362bb68..9433a41d 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.h +++ b/quaddtype/numpy_quaddtype/src/utilities.h @@ -12,6 +12,10 @@ extern "C" { int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse); +// Helper function: Convert quad_value to Sleef_quad for Dragon4 +Sleef_quad +quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend); + #ifdef __cplusplus } #endif From 10b7964f1becff130698eb315fdb548a28cc0ab5 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Sat, 15 Nov 2025 15:37:13 +0000 Subject: [PATCH 08/12] better string handling --- quaddtype/numpy_quaddtype/src/casts.cpp | 50 +++--- quaddtype/numpy_quaddtype/src/constants.hpp | 8 + quaddtype/numpy_quaddtype/src/utilities.c | 154 ++++++++++++++++- quaddtype/numpy_quaddtype/src/utilities.h | 3 + quaddtype/tests/test_quaddtype.py | 175 ++++++++++++++++++++ 5 files changed, 370 insertions(+), 20 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index de962ef5..4ea55e0d 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -203,34 +203,46 @@ static inline int unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars, QuadBackendType backend, quad_value *out_val) { - // Temporary buffer to convert UCS4 to null-terminated char string - char temp_str[QUAD_STR_WIDTH + 1]; - npy_intp copy_len = unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH; + // Convert UCS4 to Python Unicode object then to UTF-8 bytes + // This is more robust than manual UCS4→char conversion + PyObject *unicode_obj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, ucs4_str, unicode_size_chars); + if (unicode_obj == NULL) { + return -1; + } - // Convert UCS4 characters to ASCII/char - npy_intp i; - for (i = 0; i < copy_len; i++) { - Py_UCS4 c = ucs4_str[i]; - - // reject non-ASCII characters - if (c > 127) { - PyErr_Format(PyExc_ValueError, - "Cannot cast non-ASCII character '%c' to QuadPrecision", c); - return -1; - } - - temp_str[i] = (char)c; + // Convert to UTF-8 bytes + const char *utf8_str = PyUnicode_AsUTF8(unicode_obj); + if (utf8_str == NULL) { + Py_DECREF(unicode_obj); + return -1; } - temp_str[i] = '\0'; + // Use locale-independent parser char *endptr; - int err = cstring_to_quad(temp_str, backend, out_val, &endptr, true); + int err = NumPyOS_ascii_strtoq(utf8_str, backend, out_val, &endptr); + + // Check for parse errors if (err < 0) { PyErr_Format(PyExc_ValueError, - "could not convert string to QuadPrecision: np.str_('%s')", temp_str); + "could not convert string to QuadPrecision: np.str_('%s')", utf8_str); + Py_DECREF(unicode_obj); + return -1; + } + + // Check that we parsed the entire string (skip trailing whitespace) + while (*endptr == ' ' || *endptr == '\t' || *endptr == '\n' || + *endptr == '\r' || *endptr == '\f' || *endptr == '\v') { + endptr++; + } + + if (*endptr != '\0') { + PyErr_Format(PyExc_ValueError, + "could not convert string to QuadPrecision: np.str_('%s')", utf8_str); + Py_DECREF(unicode_obj); return -1; } + Py_DECREF(unicode_obj); return 0; } diff --git a/quaddtype/numpy_quaddtype/src/constants.hpp b/quaddtype/numpy_quaddtype/src/constants.hpp index c2c41260..25bcabe9 100644 --- a/quaddtype/numpy_quaddtype/src/constants.hpp +++ b/quaddtype/numpy_quaddtype/src/constants.hpp @@ -1,6 +1,10 @@ #ifndef QUAD_CONSTANTS_HPP #define QUAD_CONSTANTS_HPP +#ifdef __cplusplus +extern "C" { +#endif + #include #include #include @@ -130,4 +134,8 @@ static inline ConstantResult get_sleef_constant_by_name(const char* constant_nam return result; } +#ifdef __cplusplus +} +#endif + #endif // QUAD_CONSTANTS_HPP \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/utilities.c b/quaddtype/numpy_quaddtype/src/utilities.c index cf749b31..c0f9ade4 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.c +++ b/quaddtype/numpy_quaddtype/src/utilities.c @@ -1,5 +1,157 @@ -#include "utilities.h" #include +#include +#include +#include "utilities.h" +#include "constants.hpp" + +// Locale-independent ASCII character classification helpers +static int +ascii_isspace(int c) +{ + return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; +} + +static int +ascii_isalpha(char c) +{ + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +static int +ascii_isdigit(char c) +{ + return (c >= '0' && c <= '9'); +} + +static int +ascii_isalnum(char c) +{ + return ascii_isdigit(c) || ascii_isalpha(c); +} + +static int +ascii_tolower(int c) +{ + if (c >= 'A' && c <= 'Z') { + return c + ('a' - 'A'); + } + return c; +} + +// inspired from NumPyOS_ascii_strncasecmp +static int +ascii_strncasecmp(const char *s1, const char *s2, size_t n) +{ + while (n > 0 && *s1 != '\0' && *s2 != '\0') { + int c1 = ascii_tolower((unsigned char)*s1); + int c2 = ascii_tolower((unsigned char)*s2); + int diff = c1 - c2; + + if (diff != 0) { + return diff; + } + + s1++; + s2++; + n--; + } + + if(n > 0) { + return *s1 - *s2; + } + return 0; +} + +/* + * NumPyOS_ascii_strtoq: + * + * Locale-independent string to quad-precision parser. + * Inspired by NumPyOS_ascii_strtold from NumPy. + * + * This function: + * - Skips leading whitespace + * - Recognizes inf/nan case-insensitively with optional signs and payloads + * - Delegates to cstring_to_quad for numeric parsing + * + * Returns: + * 0 on success + * -1 on parse error + */ +int +NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr) +{ + const char *p; + int sign; + + // skip leading whitespace + while (ascii_isspace(*s)) { + s++; + } + + p = s; + sign = 1; + if (*p == '-') { + sign = -1; + ++p; + } + else if (*p == '+') { + ++p; + } + + // Check for inf/infinity (case-insensitive) + if (ascii_strncasecmp(p, "inf", 3) == 0) { + p += 3; + if (ascii_strncasecmp(p, "inity", 5) == 0) { + p += 5; + } + + // Set infinity values with sign applied + if (backend == BACKEND_SLEEF) { + out_value->sleef_value = sign > 0 ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF; + } + else { + out_value->longdouble_value = sign > 0 ? strtold("inf", NULL) : strtold("-inf", NULL); + } + + if (endptr) { + *endptr = (char *)p; + } + return 0; + } + + // Check for nan (case-insensitive) with optional payload + if (ascii_strncasecmp(p, "nan", 3) == 0) { + p += 3; + + // Skip optional (payload) + if (*p == '(') { + ++p; + while (ascii_isalnum(*p) || *p == '_') { + ++p; + } + if (*p == ')') { + ++p; + } + } + + // Set NaN value (sign is ignored for NaN) + if (backend == BACKEND_SLEEF) { + out_value->sleef_value = QUAD_PRECISION_NAN; + } + else { + out_value->longdouble_value = nanl(""); + } + + if (endptr) { + *endptr = (char *)p; + } + return 0; + } + + // For numeric values, delegate to cstring_to_quad + // Pass the original string position (after whitespace, includes sign if present) + return cstring_to_quad(s, backend, out_value, endptr, false); +} int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse) diff --git a/quaddtype/numpy_quaddtype/src/utilities.h b/quaddtype/numpy_quaddtype/src/utilities.h index 9433a41d..1c47e0a9 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.h +++ b/quaddtype/numpy_quaddtype/src/utilities.h @@ -12,6 +12,9 @@ extern "C" { int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse); +// Locale-independent ASCII string to quad parser (inspired by NumPyOS_ascii_strtold) +int NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr); + // Helper function: Convert quad_value to Sleef_quad for Dragon4 Sleef_quad quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend); diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index edf74bb8..72cd8078 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -599,6 +599,181 @@ def test_cast_string_to_quad_roundtrip(self, input_val, strtype): f"Scalar round-trip failed: {scalar_str} -> {scalar_from_str} != {quad_array[()]}" +class TestStringParsingEdgeCases: + """Test edge cases in NumPyOS_ascii_strtoq string parsing""" + + @pytest.mark.parametrize("input_str,expected_sign", [ + ("inf", 1), + ("+inf", 1), + ("-inf", -1), + ("Inf", 1), + ("+Inf", 1), + ("-Inf", -1), + ("INF", 1), + ("+INF", 1), + ("-INF", -1), + ("infinity", 1), + ("+infinity", 1), + ("-infinity", -1), + ("Infinity", 1), + ("+Infinity", 1), + ("-Infinity", -1), + ("INFINITY", 1), + ("+INFINITY", 1), + ("-INFINITY", -1), + ]) + def test_infinity_sign_preservation(self, input_str, expected_sign): + """Test that +/- signs are correctly applied to infinity values""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + assert np.isinf(float(str(result[0]))), f"Expected inf for '{input_str}'" + + actual_sign = 1 if float(str(result[0])) > 0 else -1 + assert actual_sign == expected_sign, \ + f"Sign mismatch for '{input_str}': got {actual_sign}, expected {expected_sign}" + + @pytest.mark.parametrize("input_str", [ + "nan", "+nan", "-nan", # Note: NaN sign is typically ignored + "NaN", "+NaN", "-NaN", + "NAN", "+NAN", "-NAN", + "nan()", "nan(123)", "nan(abc_)", "NAN(XYZ)", + ]) + def test_nan_case_insensitive(self, input_str): + """Test case-insensitive NaN parsing with optional payloads""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + assert np.isnan(float(str(result[0]))), f"Expected NaN for '{input_str}'" + + @pytest.mark.parametrize("input_str,expected_val", [ + ("3.14", 3.14), + ("+3.14", 3.14), + ("-3.14", -3.14), + ("0.0", 0.0), + ("+0.0", 0.0), + ("-0.0", -0.0), + ("1e10", 1e10), + ("+1e10", 1e10), + ("-1e10", -1e10), + ("1.23e-45", 1.23e-45), + ("+1.23e-45", 1.23e-45), + ("-1.23e-45", -1.23e-45), + ]) + def test_numeric_sign_handling(self, input_str, expected_val): + """Test that +/- signs are correctly handled for numeric values""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + result_val = float(str(result[0])) + + # For zero, check sign separately + if expected_val == 0.0: + assert result_val == 0.0 + if input_str.startswith('-'): + assert np.signbit(result_val), f"Expected negative zero for '{input_str}'" + else: + assert not np.signbit(result_val), f"Expected positive zero for '{input_str}'" + else: + np.testing.assert_allclose(result_val, expected_val, rtol=1e-10) + + @pytest.mark.parametrize("input_str", [ + " 3.14 ", + "\t3.14\t", + "\n3.14\n", + "\r3.14\r", + " \t\n\r 3.14 \t\n\r ", + " inf ", + "\t-inf\t", + " nan ", + ]) + def test_whitespace_handling(self, input_str): + """Test that leading/trailing whitespace is handled correctly""" + arr = np.array([input_str], dtype='U20') + result = arr.astype(QuadPrecDType()) + + # Should not raise an error + result_str = str(result[0]) + assert result_str # Should have a value + + @pytest.mark.parametrize("invalid_str", [ + "abc", # Non-numeric + "3.14.15", # Multiple decimals + "1.23e", # Incomplete scientific notation + "e10", # Scientific notation without base + "3.14abc", # Trailing non-numeric + "++3.14", # Double sign + "--3.14", # Double sign + "+-3.14", # Mixed signs + "in", # Incomplete inf + "na", # Incomplete nan + "infinit", # Incomplete infinity + ]) + def test_invalid_strings_raise_error(self, invalid_str): + """Test that invalid strings raise ValueError""" + arr = np.array([invalid_str], dtype='U20') + + with pytest.raises(ValueError): + arr.astype(QuadPrecDType()) + + @pytest.mark.parametrize("input_str", [ + "3.14ñ", # Trailing non-ASCII + "ñ3.14", # Leading non-ASCII + "3.1€4", # Mid non-ASCII + "π", # Greek pi + ]) + def test_non_ascii_raises_error(self, input_str): + """Test that non-ASCII characters raise ValueError""" + arr = np.array([input_str], dtype='U20') + + with pytest.raises(ValueError): + arr.astype(QuadPrecDType()) + + def test_numpy_longdouble_compatibility(self): + """Test that our parsing matches NumPy's longdouble for common cases""" + test_cases = [ + "inf", "INF", "Inf", "-inf", "+infinity", + "nan", "NAN", "NaN", + "3.14", "-2.718", "1e10", "-1.23e-45", + ] + + for test_str in test_cases: + arr = np.array([test_str], dtype='U20') + + # NumPy's built-in + np_result = arr.astype(np.longdouble) + + # Our QuadPrecision + quad_result = arr.astype(QuadPrecDType()) + + np_val = np_result[0] + quad_val = float(str(quad_result[0])) + + if np.isnan(np_val): + assert np.isnan(quad_val), f"NumPy gives NaN but QuadPrec doesn't for '{test_str}'" + elif np.isinf(np_val): + assert np.isinf(quad_val), f"NumPy gives inf but QuadPrec doesn't for '{test_str}'" + assert np.sign(np_val) == np.sign(quad_val), \ + f"Inf sign mismatch for '{test_str}': NumPy={np.sign(np_val)}, Quad={np.sign(quad_val)}" + else: + np.testing.assert_allclose(quad_val, np_val, rtol=1e-10) + + def test_locale_independence(self): + """Test that parsing always uses '.' for decimal, not locale-specific separator""" + # This should parse correctly (period as decimal) + arr = np.array(["3.14"], dtype='U20') + result = arr.astype(QuadPrecDType()) + assert float(str(result[0])) == 3.14 + + # In some locales ',' is decimal separator, but we should always use '.' + # So "3,14" should either error or parse as "3" (stopping at comma) + # Since require_full_parse validation happens in casts.cpp, and we check + # for trailing content, this should raise ValueError + arr_comma = np.array(["3,14"], dtype='U20') + with pytest.raises(ValueError): + arr_comma.astype(QuadPrecDType()) + + def test_basic_equality(): assert QuadPrecision("12") == QuadPrecision( "12.0") == QuadPrecision("12.00") From 0e97ae74ecfc7a495f3686e42220056a29053425 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Sat, 15 Nov 2025 15:44:12 +0000 Subject: [PATCH 09/12] import math --- quaddtype/numpy_quaddtype/src/casts.cpp | 7 +------ quaddtype/numpy_quaddtype/src/utilities.c | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 4ea55e0d..786fdd87 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -203,25 +203,20 @@ static inline int unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars, QuadBackendType backend, quad_value *out_val) { - // Convert UCS4 to Python Unicode object then to UTF-8 bytes - // This is more robust than manual UCS4→char conversion PyObject *unicode_obj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, ucs4_str, unicode_size_chars); if (unicode_obj == NULL) { return -1; } - - // Convert to UTF-8 bytes + const char *utf8_str = PyUnicode_AsUTF8(unicode_obj); if (utf8_str == NULL) { Py_DECREF(unicode_obj); return -1; } - // Use locale-independent parser char *endptr; int err = NumPyOS_ascii_strtoq(utf8_str, backend, out_val, &endptr); - // Check for parse errors if (err < 0) { PyErr_Format(PyExc_ValueError, "could not convert string to QuadPrecision: np.str_('%s')", utf8_str); diff --git a/quaddtype/numpy_quaddtype/src/utilities.c b/quaddtype/numpy_quaddtype/src/utilities.c index c0f9ade4..899b2174 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.c +++ b/quaddtype/numpy_quaddtype/src/utilities.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "utilities.h" #include "constants.hpp" From fa70b10308b6f1e2c96bf19b1488eebd44e8faed Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Sat, 15 Nov 2025 16:19:18 +0000 Subject: [PATCH 10/12] casting as per size --- quaddtype/numpy_quaddtype/src/casts.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 786fdd87..9624601d 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -327,16 +327,17 @@ quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], npy_intp *view_offset) { - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; + npy_intp required_size_chars = QUAD_STR_WIDTH; + npy_intp required_size_bytes = required_size_chars * 4; // UCS4 = 4 bytes per char if (given_descrs[1] == NULL) { + // Create descriptor with required size PyArray_Descr *unicode_descr = PyArray_DescrNewFromType(NPY_UNICODE); if (unicode_descr == nullptr) { return (NPY_CASTING)-1; } - unicode_descr->elsize = QUAD_STR_WIDTH * 4; // bytes + unicode_descr->elsize = required_size_bytes; loop_descrs[1] = unicode_descr; } else { @@ -344,8 +345,17 @@ quad_to_unicode_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet loop_descrs[1] = given_descrs[1]; } + // Set the input descriptor + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + *view_offset = 0; - return NPY_UNSAFE_CASTING; + + // If target descriptor is wide enough, it's a safe cast + if (loop_descrs[1]->elsize >= required_size_bytes) { + return NPY_SAFE_CASTING; + } + return NPY_SAME_KIND_CASTING; } // Helper function: Convert quad to string with adaptive notation From d76ef3f407885a1b2ba1aae303aab6925ba3a940 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Sat, 15 Nov 2025 18:33:14 +0000 Subject: [PATCH 11/12] simplify trailing whitespace logic --- quaddtype/numpy_quaddtype/src/casts.cpp | 3 +- quaddtype/numpy_quaddtype/src/utilities.c | 13 ++- quaddtype/numpy_quaddtype/src/utilities.h | 7 ++ quaddtype/tests/test_quaddtype.py | 125 ++++++++++++++++++++++ 4 files changed, 139 insertions(+), 9 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 9624601d..011a4ba7 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -225,8 +225,7 @@ unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars, } // Check that we parsed the entire string (skip trailing whitespace) - while (*endptr == ' ' || *endptr == '\t' || *endptr == '\n' || - *endptr == '\r' || *endptr == '\f' || *endptr == '\v') { + while (ascii_isspace(*endptr)) { endptr++; } diff --git a/quaddtype/numpy_quaddtype/src/utilities.c b/quaddtype/numpy_quaddtype/src/utilities.c index 899b2174..6690605c 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.c +++ b/quaddtype/numpy_quaddtype/src/utilities.c @@ -5,32 +5,31 @@ #include "utilities.h" #include "constants.hpp" -// Locale-independent ASCII character classification helpers -static int +int ascii_isspace(int c) { return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; } -static int +int ascii_isalpha(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } -static int +int ascii_isdigit(char c) { return (c >= '0' && c <= '9'); } -static int +int ascii_isalnum(char c) { return ascii_isdigit(c) || ascii_isalpha(c); } -static int +int ascii_tolower(int c) { if (c >= 'A' && c <= 'Z') { @@ -40,7 +39,7 @@ ascii_tolower(int c) } // inspired from NumPyOS_ascii_strncasecmp -static int +int ascii_strncasecmp(const char *s1, const char *s2, size_t n) { while (n > 0 && *s1 != '\0' && *s2 != '\0') { diff --git a/quaddtype/numpy_quaddtype/src/utilities.h b/quaddtype/numpy_quaddtype/src/utilities.h index 1c47e0a9..62cb2a30 100644 --- a/quaddtype/numpy_quaddtype/src/utilities.h +++ b/quaddtype/numpy_quaddtype/src/utilities.h @@ -11,10 +11,17 @@ extern "C" { #include int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse); +int ascii_isspace(int c); +int ascii_isalpha(char c); +int ascii_isdigit(char c); +int ascii_isalnum(char c); +int ascii_tolower(int c); +int ascii_strncasecmp(const char *s1, const char *s2, size_t n); // Locale-independent ASCII string to quad parser (inspired by NumPyOS_ascii_strtold) int NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr); + // Helper function: Convert quad_value to Sleef_quad for Dragon4 Sleef_quad quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend); diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 72cd8078..10fecdad 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -773,6 +773,131 @@ def test_locale_independence(self): with pytest.raises(ValueError): arr_comma.astype(QuadPrecDType()) + @pytest.mark.parametrize("input_str,description", [ + (" 1.23 ", "space - leading and trailing"), + ("\t1.23\t", "tab - leading and trailing"), + ("\n1.23\n", "newline - leading and trailing"), + ("\r1.23\r", "carriage return - leading and trailing"), + ("\v1.23\v", "vertical tab - leading and trailing"), + ("\f1.23\f", "form feed - leading and trailing"), + (" \t\n\r\v\f1.23 \t\n\r\v\f", "all 6 whitespace chars - mixed"), + ("\t\t\t3.14\t\t\t", "multiple tabs"), + (" inf ", "infinity with spaces"), + ("\t\t-inf\t\t", "negative infinity with tabs"), + ("\n\nnan\n\n", "nan with newlines"), + ("\r\r-nan\r\r", "negative nan with carriage returns"), + ("\v\v1e10\v\v", "scientific notation with vertical tabs"), + ("\f\f-1.23e-45\f\f", "negative scientific with form feeds"), + ]) + def test_all_six_whitespace_characters(self, input_str, description): + """Test all 6 ASCII whitespace characters (space, tab, newline, carriage return, vertical tab, form feed) + + This tests the ascii_isspace() helper function in casts.cpp which matches + CPython's Py_ISSPACE and NumPy's NumPyOS_ascii_isspace behavior. + The 6 characters are: 0x09(\t), 0x0A(\n), 0x0B(\v), 0x0C(\f), 0x0D(\r), 0x20(space) + """ + arr = np.array([input_str], dtype='U50') + result = arr.astype(QuadPrecDType()) + + # Should successfully parse without errors + result_val = str(result[0]) + assert result_val, f"Failed to parse with {description}" + + # Verify the value is correct (strip whitespace and compare) + stripped = input_str.strip(' \t\n\r\v\f') + expected_arr = np.array([stripped], dtype='U50') + expected = expected_arr.astype(QuadPrecDType()) + + if np.isnan(float(str(expected[0]))): + assert np.isnan(float(str(result[0]))), f"NaN parsing failed for {description}" + elif np.isinf(float(str(expected[0]))): + assert np.isinf(float(str(result[0]))), f"Inf parsing failed for {description}" + assert np.sign(float(str(expected[0]))) == np.sign(float(str(result[0]))), \ + f"Inf sign mismatch for {description}" + else: + assert result[0] == expected[0], f"Value mismatch for {description}" + + @pytest.mark.parametrize("invalid_str,description", [ + ("1.23 abc", "trailing non-whitespace after number"), + (" 1.23xyz ", "trailing garbage with surrounding whitespace"), + ("abc 123", "leading garbage before number"), + ("1.23\x01", "control char (SOH) after number"), + ("1.23 a", "letter after multiple spaces"), + ("\t1.23\tabc\t", "tabs with garbage in middle"), + ]) + def test_whitespace_with_invalid_trailing_content(self, invalid_str, description): + """Test that strings with invalid trailing content are rejected even with whitespace + + This ensures the trailing whitespace check in casts.cpp properly validates + that only whitespace follows the parsed number, not other characters. + """ + arr = np.array([invalid_str], dtype='U50') + + with pytest.raises(ValueError, match="could not convert string to QuadPrecision"): + arr.astype(QuadPrecDType()) + + def test_empty_string_and_whitespace_only(self): + """Test that empty strings and whitespace-only strings raise errors""" + test_cases = [ + "", # Empty string + " ", # Single space + " ", # Multiple spaces + "\t", # Single tab + "\n", # Single newline + "\r", # Single carriage return + "\v", # Single vertical tab + "\f", # Single form feed + " \t\n\r\v\f", # All whitespace characters + " \t\t\n\n ", # Mixed whitespace + ] + + for test_str in test_cases: + arr = np.array([test_str], dtype='U20') + with pytest.raises(ValueError, match="could not convert string to QuadPrecision"): + arr.astype(QuadPrecDType()) + + @pytest.mark.parametrize("boundary_str,description", [ + ("1e4932", "near max exponent for quad precision"), + ("1e-4932", "near min exponent for quad precision"), + ("1.189731495357231765085759326628007016196477" + "e4932", "very large number"), + ("3.362103143112093506262677817321752602596e-4932", "very small number"), + ("-1.189731495357231765085759326628007016196477" + "e4932", "very large negative"), + ("-3.362103143112093506262677817321752602596e-4932", "very small negative"), + ]) + def test_extreme_exponent_values(self, boundary_str, description): + """Test parsing of numbers with extreme exponents near quad precision limits + + IEEE 754 binary128 has exponent range of approximately ±4932 + """ + arr = np.array([boundary_str], dtype='U100') + result = arr.astype(QuadPrecDType()) + + # Should parse successfully (may result in inf for overflow cases) + result_str = str(result[0]) + assert result_str, f"Failed to parse {description}" + + @pytest.mark.parametrize("precision_str", [ + "3.141592653589793238462643383279502884197", # 36 digits (quad precision) + "2.718281828459045235360287471352662497757", # e to 36 digits + "1.414213562373095048801688724209698078569", # sqrt(2) to 36 digits + "-1.732050807568877293527446341505872366942", # -sqrt(3) to 36 digits + ]) + def test_full_precision_parsing(self, precision_str): + """Test that strings with full quad precision (36 decimal digits) parse correctly + + This ensures the full precision is preserved during string -> quad conversion + """ + arr = np.array([precision_str], dtype='U50') + result = arr.astype(QuadPrecDType()) + + # Convert back to string and verify roundtrip preserves precision + back_to_str = result.astype('U50') + roundtrip = back_to_str.astype(QuadPrecDType()) + + # Roundtrip should preserve the value + assert result[0] == roundtrip[0], \ + f"Precision lost in roundtrip for {precision_str}" + def test_basic_equality(): assert QuadPrecision("12") == QuadPrecision( From 0e0614c22d6a93ae884daddb823ad4f358c931f3 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Wed, 19 Nov 2025 15:54:24 +0000 Subject: [PATCH 12/12] change here --- quaddtype/numpy_quaddtype/src/casts.cpp | 13 +++++++++++-- quaddtype/tests/test_quaddtype.py | 15 ++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 011a4ba7..34290073 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -181,12 +181,21 @@ unicode_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMet PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], npy_intp *view_offset) { - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; + if (!PyArray_ISNBO(given_descrs[0]->byteorder)) { + loop_descrs[0] = PyArray_DescrNewByteorder(given_descrs[0], NPY_NATIVE); + if (loop_descrs[0] == nullptr) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + } if (given_descrs[1] == NULL) { loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); if (loop_descrs[1] == nullptr) { + Py_DECREF(loop_descrs[0]); return (NPY_CASTING)-1; } } diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 10fecdad..460c1970 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -601,7 +601,20 @@ def test_cast_string_to_quad_roundtrip(self, input_val, strtype): class TestStringParsingEdgeCases: """Test edge cases in NumPyOS_ascii_strtoq string parsing""" - + @pytest.mark.parametrize("input_str", ['3.14', '-2.71', '0.0', '1e10', '-1e-10']) + @pytest.mark.parametrize("byte_order", ['<', '>']) + def test_numeric_string_parsing(self, input_str, byte_order): + """Test that numeric strings are parsed correctly regardless of byte order""" + strtype = np.dtype(f'{byte_order}U20') + arr = np.array([input_str], dtype=strtype) + result = arr.astype(QuadPrecDType()) + + expected = np.array(input_str, dtype=np.float64) + + np.testing.assert_allclose(result, expected, + err_msg=f"Failed parsing '{input_str}' with byte order '{byte_order}'") + + @pytest.mark.parametrize("input_str,expected_sign", [ ("inf", 1), ("+inf", 1),