Skip to content
This repository has been archived by the owner on Mar 6, 2023. It is now read-only.

Commit

Permalink
ENH: Add generalized ufuncs nansum(), nancumsum(), nanmean(), nanstd()
Browse files Browse the repository at this point in the history
  • Loading branch information
rigtorp committed Dec 31, 2010
1 parent 9451260 commit ea2e9f4
Show file tree
Hide file tree
Showing 2 changed files with 347 additions and 0 deletions.
287 changes: 287 additions & 0 deletions numpy/core/src/umath/umath_tests.c.src
Expand Up @@ -38,6 +38,9 @@ typedef npy_intp intp;
INIT_OUTER_LOOP_3 \ INIT_OUTER_LOOP_3 \
intp s3 = *steps++; intp s3 = *steps++;


#define BEGIN_OUTER_LOOP_2 \
for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1) {

#define BEGIN_OUTER_LOOP_3 \ #define BEGIN_OUTER_LOOP_3 \
for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) { for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) {


Expand Down Expand Up @@ -124,6 +127,7 @@ static void


char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)"; char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";



/**begin repeat /**begin repeat
#TYPE=FLOAT,DOUBLE,LONG# #TYPE=FLOAT,DOUBLE,LONG#
Expand Down Expand Up @@ -177,6 +181,229 @@ static void


/**end repeat**/ /**end repeat**/


char *mean_signature = "(i)->()";

/**begin repeat
#TYPE=DOUBLE#
#typ=npy_double#
*/

static void
@TYPE@_mean(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
{
INIT_OUTER_LOOP_2
intp di = dimensions[0];
intp i;
intp is1=steps[0];
BEGIN_OUTER_LOOP_2
char *ip1=args[0], *op=args[1];
@typ@ sum = 0;
for (i = 0; i < di; i++) {
sum += (*(@typ@ *)ip1);
ip1 += is1;
}
*(@typ@ *)op = sum / (@typ@) di;
END_OUTER_LOOP
}

/**end repeat**/

char *std_signature = "(i)->()";

/**begin repeat
#TYPE=DOUBLE#
#typ=npy_double#
*/

static void
@TYPE@_std(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
{
INIT_OUTER_LOOP_2
intp di = dimensions[0];
intp i;
intp is1=steps[0];
BEGIN_OUTER_LOOP_2
char *ip1=args[0], *op=args[1];
@typ@ sum = 0;
@typ@ sum2 = 0;
@typ@ mean;
for (i = 0; i < di; i++) {
sum += (*(@typ@ *)ip1);
ip1 += is1;
}
mean = sum / (@typ@) di;
ip1 = args[0];
for (i = 0; i < di; i++) {
const @typ@ value = (*(@typ@ *)ip1);
const @typ@ tmp = value - mean;
sum2 += tmp * tmp;
ip1 += is1;
}
*(@typ@ *)op = sqrt(sum2 / (@typ@) di);
END_OUTER_LOOP
}

/**end repeat**/

char *nansum_signature = "(i)->()";

/**begin repeat
#TYPE=DOUBLE#
#typ=npy_double#
*/

static void
@TYPE@_nansum(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
{
INIT_OUTER_LOOP_2
intp di = dimensions[0];
intp i;
intp is1=steps[0];
BEGIN_OUTER_LOOP_2
char *ip1=args[0], *op=args[1];
@typ@ sum = 0;
intp n = 0;
for (i = 0; i < di; i++) {
const @typ@ value = (*(@typ@ *)ip1);
ip1 += is1;
if (npy_isnan(value)) continue;
sum += value;
n++;
}
if (n == 0) {
*(@typ@ *)op = NPY_NAN;
} else {
*(@typ@ *)op = sum;
}
END_OUTER_LOOP
}

/**end repeat**/

char *nancumsum_signature = "(i)->(i)";

/**begin repeat
#TYPE=DOUBLE#
#typ=npy_double#
*/

static void
@TYPE@_nancumsum(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
{
INIT_OUTER_LOOP_2
intp di = dimensions[0];
intp i;
intp is1=steps[0]; intp os=steps[1];
BEGIN_OUTER_LOOP_2
char *ip1=args[0], *op=args[1];
@typ@ sum = 0;
intp n = 0;
for (i = 0; i < di; i++) {
const @typ@ value = (*(@typ@ *)ip1);
if (!npy_isnan(value)) {
sum += value;
n++;
}
if (n == 0) {
*(@typ@ *)op = NPY_NAN;
} else {
*(@typ@ *)op = sum;
}
ip1 += is1;
op += os;
}
END_OUTER_LOOP
}

/**end repeat**/

char *nanmean_signature = "(i)->()";

/**begin repeat
#TYPE=DOUBLE#
#typ=npy_double#
*/

static void
@TYPE@_nanmean(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
{
INIT_OUTER_LOOP_2
intp di = dimensions[0];
intp i;
intp is1=steps[0];
BEGIN_OUTER_LOOP_2
char *ip1=args[0], *op=args[1];
@typ@ sum = 0;
intp n = 0;
for (i = 0; i < di; i++) {
const @typ@ value = (*(@typ@ *)ip1);
ip1 += is1;
if (npy_isnan(value)) continue;
sum += value;
n++;
}
if (n == 0) {
*(@typ@ *)op = NPY_NAN;
} else {
*(@typ@ *)op = sum / n;
}
END_OUTER_LOOP
}

/**end repeat**/

char *nanstd_signature = "(i)->()";

/**begin repeat
#TYPE=DOUBLE#
#typ=npy_double#
*/

static void
@TYPE@_nanstd(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
{
INIT_OUTER_LOOP_2
intp di = dimensions[0];
intp i;
intp is1=steps[0];
BEGIN_OUTER_LOOP_2
char *ip1=args[0], *op=args[1];
@typ@ sum = 0;
@typ@ sum2 = 0;
@typ@ mean;
intp n = 0;
for (i = 0; i < di; i++) {
const @typ@ value = (*(@typ@ *)ip1);
ip1 += is1;
if (npy_isnan(value)) continue;
sum += value;
n++;
}
mean = sum / n;
ip1 = args[0];
for (i = 0; i < di; i++) {
const @typ@ value = (*(@typ@ *)ip1);
ip1 += is1;
if (npy_isnan(value)) continue;
const @typ@ tmp = value - mean;
sum2 += tmp * tmp;
}
if (n == 0) {
*(@typ@ *)op = NPY_NAN;
} else {
*(@typ@ *)op = sqrt(sum2 / n);
}
END_OUTER_LOOP
}

/**end repeat**/

/* The following lines were generated using a slightly modified /* The following lines were generated using a slightly modified
version of code_generators/generate_umath.py and adding these version of code_generators/generate_umath.py and adding these
lines to defdict: lines to defdict:
Expand Down Expand Up @@ -207,6 +434,24 @@ static char innerwt_signatures[] = { PyArray_LONG, PyArray_LONG, PyArray_LONG, P
static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply }; static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply };
static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL }; static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
static char matrix_multiply_signatures[] = { PyArray_LONG, PyArray_LONG, PyArray_LONG, PyArray_FLOAT, PyArray_FLOAT, PyArray_FLOAT, PyArray_DOUBLE, PyArray_DOUBLE, PyArray_DOUBLE }; static char matrix_multiply_signatures[] = { PyArray_LONG, PyArray_LONG, PyArray_LONG, PyArray_FLOAT, PyArray_FLOAT, PyArray_FLOAT, PyArray_DOUBLE, PyArray_DOUBLE, PyArray_DOUBLE };
static PyUFuncGenericFunction mean_functions[] = { DOUBLE_mean };
static void * mean_data[] = { (void *)NULL };
static char mean_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
static PyUFuncGenericFunction std_functions[] = { DOUBLE_std };
static void * std_data[] = { (void *)NULL };
static char std_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
static PyUFuncGenericFunction nansum_functions[] = { DOUBLE_nansum };
static void * nansum_data[] = { (void *)NULL };
static char nansum_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
static PyUFuncGenericFunction nancumsum_functions[] = { DOUBLE_nancumsum };
static void * nancumsum_data[] = { (void *)NULL };
static char nancumsum_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
static PyUFuncGenericFunction nanmean_functions[] = { DOUBLE_nanmean };
static void * nanmean_data[] = { (void *)NULL };
static char nanmean_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
static PyUFuncGenericFunction nanstd_functions[] = { DOUBLE_nanstd };
static void * nanstd_data[] = { (void *)NULL };
static char nanstd_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };


static void static void
addUfuncs(PyObject *dictionary) { addUfuncs(PyObject *dictionary) {
Expand Down Expand Up @@ -234,6 +479,48 @@ addUfuncs(PyObject *dictionary) {
0, matrix_multiply_signature); 0, matrix_multiply_signature);
PyDict_SetItemString(dictionary, "matrix_multiply", f); PyDict_SetItemString(dictionary, "matrix_multiply", f);
Py_DECREF(f); Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(mean_functions, mean_data, mean_signatures, 1,
1, 1, PyUFunc_None, "mean",
"mean on the last dimension and broadcast on the rest\n"\
" \"(i)->()\" \n",
0, mean_signature);
PyDict_SetItemString(dictionary, "mean", f);
Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(std_functions, std_data, std_signatures, 1,
1, 1, PyUFunc_None, "std",
"std on the last dimension and broadcast on the rest\n"\
" \"(i)->()\" \n",
0, std_signature);
PyDict_SetItemString(dictionary, "std", f);
Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(nansum_functions, nansum_data, nansum_signatures, 1,
1, 1, PyUFunc_None, "nansum",
"nansum on the last dimension and broadcast on the rest\n"\
" \"(i)->()\" \n",
0, nansum_signature);
PyDict_SetItemString(dictionary, "nansum", f);
Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(nancumsum_functions, nancumsum_data, nancumsum_signatures, 1,
1, 1, PyUFunc_None, "nancumsum",
"nancumsum on the last dimension and broadcast on the rest\n"\
" \"(i)->(i)\" \n",
0, nancumsum_signature);
PyDict_SetItemString(dictionary, "nancumsum", f);
Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(nanmean_functions, nanmean_data, nanmean_signatures, 1,
1, 1, PyUFunc_None, "nanmean",
"nanmean on the last dimension and broadcast on the rest\n"\
" \"(i)->()\" \n",
0, nanmean_signature);
PyDict_SetItemString(dictionary, "nanmean", f);
Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(nanstd_functions, nanstd_data, nanstd_signatures, 1,
1, 1, PyUFunc_None, "nanstd",
"nanstd on the last dimension and broadcast on the rest\n"\
" \"(i)->()\" \n",
0, nanstd_signature);
PyDict_SetItemString(dictionary, "nanstd", f);
Py_DECREF(f);
} }


/* /*
Expand Down
60 changes: 60 additions & 0 deletions numpy/core/tests/test_ufunc.py
Expand Up @@ -421,5 +421,65 @@ def broadcastable(s1,s2):


assert_equal(ref, True, err_msg="reference check") assert_equal(ref, True, err_msg="reference check")


class TestGeneralizeUfuncs(TestCase):
def setUp(self):
self.A = np.array([[0.57311211, 0.5566819 , 0.6124054 , 0.66216366, 0.00850151],
[0.06834556, 0.02745207, 0.98692964, 0.67906414, 0.37794816],
[0.21752169, 0.37686867, 0.93973352, 0.49818713, 0.21303216],
[0.08066141, 0.37775952, 0.56552893, 0.18719725, 0.64559197],
[0.99428521, 0.24205622, 0.63299425, 0.92321895, 0.94189936]])

def test_mean(self):
assert_almost_equal(umt.mean(self.A.flatten()), 0.49556561593504361)
assert_almost_equal(umt.mean(self.A),
np.array([0.48257292, 0.42794791, 0.44906863, 0.37134782, 0.7468908]))

def test_std(self):
assert_almost_equal(umt.std(self.A.flatten()),0.30789518960654322)
assert_almost_equal(umt.std(self.A),
np.array([0.2398093, 0.36543965, 0.26747138, 0.21511266, 0.28218941]))

class TestGeneralizeNaNUfuncs(TestCase):
def setUp(self):
self.A = np.array([[[np.nan, 0.01319214, 0.01620964],
[0.11704017, np.nan, 0.75157887],
[0.28333658, 0.1630199 , np.nan]],
[[0.59541557, np.nan, 0.37910852],
[np.nan, 0.87964135, np.nan],
[0.70543747, np.nan, 0.34306596]],
[[0.72687499, 0.91084584, np.nan],
[0.84386844, 0.38944762, 0.23913896],
[np.nan, 0.37068164, 0.33850425]]])

def test_nansum(self):
assert_almost_equal(umt.nansum(self.A.flatten()), 8.0664079100000006)
assert_almost_equal(np.swapaxes(umt.nansum(np.swapaxes(self.A,0,2)), 1, 0),
np.array([[1.32229056, 0.92403798, 0.39531816],
[0.96090861, 1.26908897, 0.99071783],
[0.98877405, 0.53370154, 0.68157021]]))
assert_almost_equal(umt.nansum(np.swapaxes(self.A,1,2)),
np.array([[0.40037675, 0.17621204, 0.76778851],
[1.30085304, 0.87964135, 0.72217448],
[1.57074343, 1.6709751 , 0.57764321]]))
assert_almost_equal(umt.nansum(self.A),
np.array([[0.02940178, 0.86861904, 0.44635648],
[0.97452409, 0.87964135, 1.04850343],
[1.63772083, 1.47245502, 0.70918589]]))

def test_nancumsum(self):
assert_almost_equal(umt.nancumsum(self.A.flatten()),
np.array([np.nan, 0.01319214, 0.02940178, 0.14644195, 0.14644195,
0.89802082, 1.1813574, 1.3443773, 1.3443773, 1.93979287,
1.93979287, 2.31890139, 2.31890139, 3.19854274, 3.19854274,
3.90398021, 3.90398021, 4.24704617, 4.97392116, 5.884767,
5.884767, 6.72863544, 7.11808306, 7.35722202, 7.35722202,
7.72790366, 8.06640791]))

def test_nanmean(self):
assert_almost_equal(umt.nanmean(self.A.flatten()), 0.44813377277777783)

def test_nanstd(self):
assert_almost_equal(umt.nanstd(self.A.flatten()), 0.28764635583291526)

if __name__ == "__main__": if __name__ == "__main__":
run_module_suite() run_module_suite()

0 comments on commit ea2e9f4

Please sign in to comment.