Skip to content

Extend isna_element to support include_none arg #90

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 53 additions & 27 deletions src/_arraykit.c
Original file line number Diff line number Diff line change
Expand Up @@ -3385,63 +3385,86 @@ dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg)
return (PyObject*)PyArray_DescrFromType(NPY_OBJECT);
}

static char *isna_element_kwarg_names[] = {
"element",
"include_none",
NULL
};

static PyObject *
isna_element(PyObject *Py_UNUSED(m), PyObject *arg)
isna_element(PyObject *m, PyObject *args, PyObject *kwargs)
{
PyObject *element;
int include_none = 1;
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"O|p:isna_element", isna_element_kwarg_names,
&element,
&include_none)) {
return NULL;
}

// None
if (arg == Py_None) {
if (include_none && element == Py_None) {
Py_RETURN_TRUE;
}

// NaN
if (PyFloat_Check(arg)) {
return PyBool_FromLong(isnan(PyFloat_AS_DOUBLE(arg)));
if (PyFloat_Check(element)) {
return PyBool_FromLong(isnan(PyFloat_AS_DOUBLE(element)));
}
if (PyArray_IsScalar(arg, Half)) {
return PyBool_FromLong(npy_half_isnan(PyArrayScalar_VAL(arg, Half)));
if (PyArray_IsScalar(element, Half)) {
return PyBool_FromLong(npy_half_isnan(PyArrayScalar_VAL(element, Half)));
}
if (PyArray_IsScalar(arg, Float32)) {
return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float32)));
if (PyArray_IsScalar(element, Float32)) {
return PyBool_FromLong(isnan(PyArrayScalar_VAL(element, Float32)));
}
if (PyArray_IsScalar(arg, Float64)) {
return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float64)));
if (PyArray_IsScalar(element, Float64)) {
return PyBool_FromLong(isnan(PyArrayScalar_VAL(element, Float64)));
}
# ifdef PyFloat128ArrType_Type
if (PyArray_IsScalar(arg, Float128)) {
return PyBool_FromLong(isnan(PyArrayScalar_VAL(arg, Float128)));
if (PyArray_IsScalar(element, Float128)) {
return PyBool_FromLong(isnan(PyArrayScalar_VAL(element, Float128)));
}
# endif

// Complex NaN
if (PyComplex_Check(arg)) {
Py_complex val = ((PyComplexObject*)arg)->cval;
if (PyComplex_Check(element)) {
Py_complex val = ((PyComplexObject*)element)->cval;
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
}
if (PyArray_IsScalar(arg, Complex64)) {
npy_cfloat val = PyArrayScalar_VAL(arg, Complex64);
if (PyArray_IsScalar(element, Complex64)) {
npy_cfloat val = PyArrayScalar_VAL(element, Complex64);
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
}
if (PyArray_IsScalar(arg, Complex128)) {
npy_cdouble val = PyArrayScalar_VAL(arg, Complex128);
if (PyArray_IsScalar(element, Complex128)) {
npy_cdouble val = PyArrayScalar_VAL(element, Complex128);
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
}
# ifdef PyComplex256ArrType_Type
if (PyArray_IsScalar(arg, Complex256)) {
npy_clongdouble val = PyArrayScalar_VAL(arg, Complex256);
if (PyArray_IsScalar(element, Complex256)) {
npy_clongdouble val = PyArrayScalar_VAL(element, Complex256);
return PyBool_FromLong(isnan(val.real) || isnan(val.imag));
}
# endif

// NaT - Datetime
if (PyArray_IsScalar(arg, Datetime)) {
return PyBool_FromLong(PyArrayScalar_VAL(arg, Datetime) == NPY_DATETIME_NAT);
if (PyArray_IsScalar(element, Datetime)) {
return PyBool_FromLong(PyArrayScalar_VAL(element, Datetime) == NPY_DATETIME_NAT);
}

// NaT - Timedelta
if (PyArray_IsScalar(arg, Timedelta)) {
return PyBool_FromLong(PyArrayScalar_VAL(arg, Timedelta) == NPY_DATETIME_NAT);
if (PyArray_IsScalar(element, Timedelta)) {
return PyBool_FromLong(PyArrayScalar_VAL(element, Timedelta) == NPY_DATETIME_NAT);
}
// Try to identify Pandas Timestamp NATs
if (PyObject_HasAttrString(element, "to_numpy")) {
PyObject *to_numpy = PyObject_GetAttrString(element, "to_numpy");
if (!PyCallable_Check(to_numpy)) {
Py_RETURN_FALSE;
}
PyObject* post = PyObject_CallFunction(to_numpy, NULL);
if (post == NULL) return NULL;
return PyBool_FromLong(PyArrayScalar_VAL(post, Datetime) == NPY_DATETIME_NAT);
}

Py_RETURN_FALSE;
}

Expand Down Expand Up @@ -4042,7 +4065,10 @@ static PyMethodDef arraykit_methods[] = {
METH_VARARGS | METH_KEYWORDS,
NULL},
{"count_iteration", count_iteration, METH_O, NULL},
{"isna_element", isna_element, METH_O, NULL},
{"isna_element",
(PyCFunction)isna_element,
METH_VARARGS | METH_KEYWORDS,
NULL},
{"dtype_from_element", dtype_from_element, METH_O, NULL},
{"get_new_indexers_and_screen",
(PyCFunction)get_new_indexers_and_screen,
Expand Down
2 changes: 1 addition & 1 deletion tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def clean(context):

@invoke.task(clean)
def build(context):
context.run('pip install -r requirements-test.txt', echo=True, pty=True)
# context.run('pip install -r requirements-test.txt', echo=True, pty=True)
# keep verbose to see warnings
context.run(f'{sys.executable} -m pip -v install .', echo=True, pty=True)

Expand Down
26 changes: 24 additions & 2 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import warnings
from io import StringIO
import numpy as np # type: ignore
import pandas as pd

from arraykit import resolve_dtype
from arraykit import resolve_dtype_iter
Expand Down Expand Up @@ -273,7 +274,7 @@ def test_array_deepcopy_f(self) -> None:
a2 = array_deepcopy(a1)
self.assertNotEqual(id(a1), id(a2))

def test_isna_element_true(self) -> None:
def test_isna_element_a(self) -> None:
class FloatSubclass(float): pass
class ComplexSubclass(complex): pass

Expand Down Expand Up @@ -308,7 +309,7 @@ class ComplexSubclass(complex): pass
self.assertTrue(isna_element(-float('NaN')))
self.assertTrue(isna_element(None))

def test_isna_element_false(self) -> None:
def test_isna_element_b(self) -> None:
# Test a wide range of float values, with different precision, across types
for val in (
1e-1000, 1e-309, 1e-39, 1e-16, 1e-5, 0.1, 0., 1.0, 1e5, 1e16, 1e39, 1e309, 1e1000,
Expand All @@ -326,6 +327,27 @@ def test_isna_element_false(self) -> None:
self.assertFalse(isna_element(datetime.date(2020, 12, 31)))
self.assertFalse(isna_element(False))


def test_isna_element_c(self) -> None:
self.assertFalse(isna_element(None, include_none=False))
self.assertTrue(isna_element(None, include_none=True))
self.assertFalse(isna_element(None, False))
self.assertTrue(isna_element(None, True))

def test_isna_element_d(self) -> None:
ts = pd.Timestamp('nat')
self.assertTrue(isna_element(ts))


def test_isna_element_d(self) -> None:
from types import SimpleNamespace
sn = SimpleNamespace()
sn.to_numpy = None
self.assertFalse(isna_element(sn))




#---------------------------------------------------------------------------

def test_dtype_from_element_core_dtypes(self) -> None:
Expand Down