From 5bea798ae6e324b17da2585412ccb705e11e8ec5 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 7 Feb 2023 14:57:05 -0800 Subject: [PATCH 1/7] isna_element takes kwargs, include_none --- src/_arraykit.c | 68 ++++++++++++++++++++++++++++++----------------- test/test_util.py | 10 +++++-- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index 91ec20e8..f4c35c96 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3385,61 +3385,76 @@ dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg) return (PyObject*)PyArray_DescrFromType(NPY_OBJECT); } +static char *isna_element_kwarg_names[] = { + "element", + "include_none", + NULL +}; + static PyObject * -isna_element(PyObject *Py_UNUSED(m), PyObject *arg) +isna_element(PyObject *m, PyObject *args, PyObject *kwargs) { + PyObject *element; + int include_none = 1; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|p:isna_element", isna_element_kwarg_names, + &element, + &include_none)) { + return NULL; + } + // None - if (arg == Py_None) { + if (include_none && element == Py_None) { Py_RETURN_TRUE; } // NaN - if (PyFloat_Check(arg)) { - return PyBool_FromLong(isnan(PyFloat_AS_DOUBLE(arg))); + if (PyFloat_Check(element)) { + return PyBool_FromLong(isnan(PyFloat_AS_DOUBLE(element))); } - if (PyArray_IsScalar(arg, Half)) { - return PyBool_FromLong(npy_half_isnan(PyArrayScalar_VAL(arg, Half))); + if (PyArray_IsScalar(element, Half)) { + return PyBool_FromLong(npy_half_isnan(PyArrayScalar_VAL(element, Half))); } - if (PyArray_IsScalar(arg, Float32)) { - return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float32))); + if (PyArray_IsScalar(element, Float32)) { + return PyBool_FromLong(isnan(PyArrayScalar_VAL(element, Float32))); } - if (PyArray_IsScalar(arg, Float64)) { - return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float64))); + if (PyArray_IsScalar(element, Float64)) { + return PyBool_FromLong(isnan(PyArrayScalar_VAL(element, Float64))); } # ifdef PyFloat128ArrType_Type - if (PyArray_IsScalar(arg, Float128)) { - return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float128))); + if (PyArray_IsScalar(element, Float128)) { + return PyBool_FromLong(isnan(PyArrayScalar_VAL(element, Float128))); } # endif // Complex NaN - if (PyComplex_Check(arg)) { - Py_complex val = ((PyComplexObject*)arg)->cval; + if (PyComplex_Check(element)) { + Py_complex val = ((PyComplexObject*)element)->cval; return PyBool_FromLong(isnan(val.real) || isnan(val.imag)); } - if (PyArray_IsScalar(arg, Complex64)) { - npy_cfloat val = PyArrayScalar_VAL(arg, Complex64); + if (PyArray_IsScalar(element, Complex64)) { + npy_cfloat val = PyArrayScalar_VAL(element, Complex64); return PyBool_FromLong(isnan(val.real) || isnan(val.imag)); } - if (PyArray_IsScalar(arg, Complex128)) { - npy_cdouble val = PyArrayScalar_VAL(arg, Complex128); + if (PyArray_IsScalar(element, Complex128)) { + npy_cdouble val = PyArrayScalar_VAL(element, Complex128); return PyBool_FromLong(isnan(val.real) || isnan(val.imag)); } # ifdef PyComplex256ArrType_Type - if (PyArray_IsScalar(arg, Complex256)) { - npy_clongdouble val = PyArrayScalar_VAL(arg, Complex256); + if (PyArray_IsScalar(element, Complex256)) { + npy_clongdouble val = PyArrayScalar_VAL(element, Complex256); return PyBool_FromLong(isnan(val.real) || isnan(val.imag)); } # endif // NaT - Datetime - if (PyArray_IsScalar(arg, Datetime)) { - return PyBool_FromLong(PyArrayScalar_VAL(arg, Datetime) == NPY_DATETIME_NAT); + if (PyArray_IsScalar(element, Datetime)) { + return PyBool_FromLong(PyArrayScalar_VAL(element, Datetime) == NPY_DATETIME_NAT); } // NaT - Timedelta - if (PyArray_IsScalar(arg, Timedelta)) { - return PyBool_FromLong(PyArrayScalar_VAL(arg, Timedelta) == NPY_DATETIME_NAT); + if (PyArray_IsScalar(element, Timedelta)) { + return PyBool_FromLong(PyArrayScalar_VAL(element, Timedelta) == NPY_DATETIME_NAT); } Py_RETURN_FALSE; @@ -4042,7 +4057,10 @@ static PyMethodDef arraykit_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"count_iteration", count_iteration, METH_O, NULL}, - {"isna_element", isna_element, METH_O, NULL}, + {"isna_element", + (PyCFunction)isna_element, + METH_VARARGS | METH_KEYWORDS, + NULL}, {"dtype_from_element", dtype_from_element, METH_O, NULL}, {"get_new_indexers_and_screen", (PyCFunction)get_new_indexers_and_screen, diff --git a/test/test_util.py b/test/test_util.py index 5b355cdb..12e3476e 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -273,7 +273,7 @@ def test_array_deepcopy_f(self) -> None: a2 = array_deepcopy(a1) self.assertNotEqual(id(a1), id(a2)) - def test_isna_element_true(self) -> None: + def test_isna_element_a(self) -> None: class FloatSubclass(float): pass class ComplexSubclass(complex): pass @@ -308,7 +308,7 @@ class ComplexSubclass(complex): pass self.assertTrue(isna_element(-float('NaN'))) self.assertTrue(isna_element(None)) - def test_isna_element_false(self) -> None: + def test_isna_element_b(self) -> None: # Test a wide range of float values, with different precision, across types for val in ( 1e-1000, 1e-309, 1e-39, 1e-16, 1e-5, 0.1, 0., 1.0, 1e5, 1e16, 1e39, 1e309, 1e1000, @@ -326,6 +326,12 @@ def test_isna_element_false(self) -> None: self.assertFalse(isna_element(datetime.date(2020, 12, 31))) self.assertFalse(isna_element(False)) + + def test_isna_element_c(self) -> None: + self.assertFalse(isna_element(None, include_none=False)) + self.assertTrue(isna_element(None, include_none=True)) + + #--------------------------------------------------------------------------- def test_dtype_from_element_core_dtypes(self) -> None: From d4559661e98108df24dcbef2c61f7d3d17876777 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 7 Feb 2023 14:57:16 -0800 Subject: [PATCH 2/7] isna_element takes kwargs, include_none --- test/test_util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_util.py b/test/test_util.py index 12e3476e..aaa54751 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -330,6 +330,8 @@ def test_isna_element_b(self) -> None: def test_isna_element_c(self) -> None: self.assertFalse(isna_element(None, include_none=False)) self.assertTrue(isna_element(None, include_none=True)) + self.assertFalse(isna_element(None, False)) + self.assertTrue(isna_element(None, True)) #--------------------------------------------------------------------------- From c034534ca33de24aaae200191c591b729ec1f7aa Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 7 Feb 2023 17:25:25 -0800 Subject: [PATCH 3/7] implemented identification of Pandas NATs via to_numpy checks --- src/_arraykit.c | 11 +++++++++-- tasks.py | 2 +- test/test_util.py | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index f4c35c96..a77bd533 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3451,12 +3451,19 @@ isna_element(PyObject *m, PyObject *args, PyObject *kwargs) if (PyArray_IsScalar(element, Datetime)) { return PyBool_FromLong(PyArrayScalar_VAL(element, Datetime) == NPY_DATETIME_NAT); } - // NaT - Timedelta if (PyArray_IsScalar(element, Timedelta)) { return PyBool_FromLong(PyArrayScalar_VAL(element, Timedelta) == NPY_DATETIME_NAT); } - + if (PyObject_HasAttrString(element, "to_numpy")) { + PyObject *to_numpy = PyObject_GetAttrString(element, "to_numpy"); + if (!PyCallable_Check(to_numpy)) { + Py_RETURN_FALSE; + } + PyObject* post = PyObject_CallFunction(to_numpy, NULL); + if (post == NULL) return NULL; + return PyBool_FromLong(PyArrayScalar_VAL(post, Datetime) == NPY_DATETIME_NAT); + } Py_RETURN_FALSE; } diff --git a/tasks.py b/tasks.py index 5a091599..ad3478a3 100644 --- a/tasks.py +++ b/tasks.py @@ -26,7 +26,7 @@ def clean(context): @invoke.task(clean) def build(context): - context.run('pip install -r requirements-test.txt', echo=True, pty=True) + # context.run('pip install -r requirements-test.txt', echo=True, pty=True) # keep verbose to see warnings context.run(f'{sys.executable} -m pip -v install .', echo=True, pty=True) diff --git a/test/test_util.py b/test/test_util.py index aaa54751..c534cbd7 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -5,6 +5,7 @@ import warnings from io import StringIO import numpy as np # type: ignore +import pandas as pd from arraykit import resolve_dtype from arraykit import resolve_dtype_iter @@ -333,6 +334,19 @@ def test_isna_element_c(self) -> None: self.assertFalse(isna_element(None, False)) self.assertTrue(isna_element(None, True)) + def test_isna_element_d(self) -> None: + ts = pd.Timestamp('nat') + self.assertTrue(isna_element(ts)) + + + def test_isna_element_d(self) -> None: + from types import SimpleNamespace + sn = SimpleNamespace() + sn.to_numpy = None + self.assertFalse(isna_element(sn)) + + + #--------------------------------------------------------------------------- From e353916bad3244f84728e6bdf51f868f70d15313 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 7 Feb 2023 18:19:25 -0800 Subject: [PATCH 4/7] reduce most-recent numpy version to 1.21 --- requirements-build.txt | 2 +- requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-build.txt b/requirements-build.txt index 1f2c987a..ee6dfb44 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -1 +1 @@ -numpy==1.23.5 +numpy==1.21.6 diff --git a/requirements-test.txt b/requirements-test.txt index 51b335e6..caf54e3a 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,4 @@ -numpy==1.23.5 +numpy==1.21.6 pytest==7.1.2 pylint==2.7.4 invoke==1.4.0 From e276b7bdba868e493ea5ed32b4e626b3b87c3962 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 7 Feb 2023 18:30:30 -0800 Subject: [PATCH 5/7] updated numpy to 1.22.4 --- requirements-build.txt | 2 +- requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-build.txt b/requirements-build.txt index ee6dfb44..82ed9d4f 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -1 +1 @@ -numpy==1.21.6 +numpy==1.22.4 diff --git a/requirements-test.txt b/requirements-test.txt index caf54e3a..86859a22 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,4 @@ -numpy==1.21.6 +numpy==1.22.4 pytest==7.1.2 pylint==2.7.4 invoke==1.4.0 From 8fc8b808670fa318bf41f8dcb7b8a9a3a1b2735e Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 7 Feb 2023 19:02:10 -0800 Subject: [PATCH 6/7] updated numpy to 1.23.5 --- requirements-build.txt | 2 +- requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-build.txt b/requirements-build.txt index 82ed9d4f..1f2c987a 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -1 +1 @@ -numpy==1.22.4 +numpy==1.23.5 diff --git a/requirements-test.txt b/requirements-test.txt index 86859a22..51b335e6 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,4 @@ -numpy==1.22.4 +numpy==1.23.5 pytest==7.1.2 pylint==2.7.4 invoke==1.4.0 From f96aae4a1c58cf97b793e8506a325a652193dbdb Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 7 Feb 2023 19:03:28 -0800 Subject: [PATCH 7/7] added comment --- src/_arraykit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/_arraykit.c b/src/_arraykit.c index a77bd533..f3e34a01 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3455,6 +3455,7 @@ isna_element(PyObject *m, PyObject *args, PyObject *kwargs) if (PyArray_IsScalar(element, Timedelta)) { return PyBool_FromLong(PyArrayScalar_VAL(element, Timedelta) == NPY_DATETIME_NAT); } + // Try to identify Pandas Timestamp NATs if (PyObject_HasAttrString(element, "to_numpy")) { PyObject *to_numpy = PyObject_GetAttrString(element, "to_numpy"); if (!PyCallable_Check(to_numpy)) {