Permalink
Browse files

ENH: Add generalized ufuncs nansum(), nancumsum(), nanmean(), nanstd()

  • Loading branch information...
1 parent 9451260 commit ea2e9f4499dac040fedef5cdc568c69e511e3155 @rigtorp committed Dec 31, 2010
Showing with 347 additions and 0 deletions.
  1. +287 −0 numpy/core/src/umath/umath_tests.c.src
  2. +60 −0 numpy/core/tests/test_ufunc.py
@@ -38,6 +38,9 @@ typedef npy_intp intp;
INIT_OUTER_LOOP_3 \
intp s3 = *steps++;
+#define BEGIN_OUTER_LOOP_2 \
+ for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1) {
+
#define BEGIN_OUTER_LOOP_3 \
for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) {
@@ -124,6 +127,7 @@ static void
char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
+
/**begin repeat
#TYPE=FLOAT,DOUBLE,LONG#
@@ -177,6 +181,229 @@ static void
/**end repeat**/
+char *mean_signature = "(i)->()";
+
+/**begin repeat
+
+ #TYPE=DOUBLE#
+ #typ=npy_double#
+*/
+
+static void
+@TYPE@_mean(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+ INIT_OUTER_LOOP_2
+ intp di = dimensions[0];
+ intp i;
+ intp is1=steps[0];
+ BEGIN_OUTER_LOOP_2
+ char *ip1=args[0], *op=args[1];
+ @typ@ sum = 0;
+ for (i = 0; i < di; i++) {
+ sum += (*(@typ@ *)ip1);
+ ip1 += is1;
+ }
+ *(@typ@ *)op = sum / (@typ@) di;
+ END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *std_signature = "(i)->()";
+
+/**begin repeat
+
+ #TYPE=DOUBLE#
+ #typ=npy_double#
+*/
+
+static void
+@TYPE@_std(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+ INIT_OUTER_LOOP_2
+ intp di = dimensions[0];
+ intp i;
+ intp is1=steps[0];
+ BEGIN_OUTER_LOOP_2
+ char *ip1=args[0], *op=args[1];
+ @typ@ sum = 0;
+ @typ@ sum2 = 0;
+ @typ@ mean;
+ for (i = 0; i < di; i++) {
+ sum += (*(@typ@ *)ip1);
+ ip1 += is1;
+ }
+ mean = sum / (@typ@) di;
+ ip1 = args[0];
+ for (i = 0; i < di; i++) {
+ const @typ@ value = (*(@typ@ *)ip1);
+ const @typ@ tmp = value - mean;
+ sum2 += tmp * tmp;
+ ip1 += is1;
+ }
+ *(@typ@ *)op = sqrt(sum2 / (@typ@) di);
+ END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nansum_signature = "(i)->()";
+
+/**begin repeat
+
+ #TYPE=DOUBLE#
+ #typ=npy_double#
+*/
+
+static void
+@TYPE@_nansum(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+ INIT_OUTER_LOOP_2
+ intp di = dimensions[0];
+ intp i;
+ intp is1=steps[0];
+ BEGIN_OUTER_LOOP_2
+ char *ip1=args[0], *op=args[1];
+ @typ@ sum = 0;
+ intp n = 0;
+ for (i = 0; i < di; i++) {
+ const @typ@ value = (*(@typ@ *)ip1);
+ ip1 += is1;
+ if (npy_isnan(value)) continue;
+ sum += value;
+ n++;
+ }
+ if (n == 0) {
+ *(@typ@ *)op = NPY_NAN;
+ } else {
+ *(@typ@ *)op = sum;
+ }
+ END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nancumsum_signature = "(i)->(i)";
+
+/**begin repeat
+
+ #TYPE=DOUBLE#
+ #typ=npy_double#
+*/
+
+static void
+@TYPE@_nancumsum(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+ INIT_OUTER_LOOP_2
+ intp di = dimensions[0];
+ intp i;
+ intp is1=steps[0]; intp os=steps[1];
+ BEGIN_OUTER_LOOP_2
+ char *ip1=args[0], *op=args[1];
+ @typ@ sum = 0;
+ intp n = 0;
+ for (i = 0; i < di; i++) {
+ const @typ@ value = (*(@typ@ *)ip1);
+ if (!npy_isnan(value)) {
+ sum += value;
+ n++;
+ }
+ if (n == 0) {
+ *(@typ@ *)op = NPY_NAN;
+ } else {
+ *(@typ@ *)op = sum;
+ }
+ ip1 += is1;
+ op += os;
+ }
+ END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nanmean_signature = "(i)->()";
+
+/**begin repeat
+
+ #TYPE=DOUBLE#
+ #typ=npy_double#
+*/
+
+static void
+@TYPE@_nanmean(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+ INIT_OUTER_LOOP_2
+ intp di = dimensions[0];
+ intp i;
+ intp is1=steps[0];
+ BEGIN_OUTER_LOOP_2
+ char *ip1=args[0], *op=args[1];
+ @typ@ sum = 0;
+ intp n = 0;
+ for (i = 0; i < di; i++) {
+ const @typ@ value = (*(@typ@ *)ip1);
+ ip1 += is1;
+ if (npy_isnan(value)) continue;
+ sum += value;
+ n++;
+ }
+ if (n == 0) {
+ *(@typ@ *)op = NPY_NAN;
+ } else {
+ *(@typ@ *)op = sum / n;
+ }
+ END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nanstd_signature = "(i)->()";
+
+/**begin repeat
+
+ #TYPE=DOUBLE#
+ #typ=npy_double#
+*/
+
+static void
+@TYPE@_nanstd(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+ INIT_OUTER_LOOP_2
+ intp di = dimensions[0];
+ intp i;
+ intp is1=steps[0];
+ BEGIN_OUTER_LOOP_2
+ char *ip1=args[0], *op=args[1];
+ @typ@ sum = 0;
+ @typ@ sum2 = 0;
+ @typ@ mean;
+ intp n = 0;
+ for (i = 0; i < di; i++) {
+ const @typ@ value = (*(@typ@ *)ip1);
+ ip1 += is1;
+ if (npy_isnan(value)) continue;
+ sum += value;
+ n++;
+ }
+ mean = sum / n;
+ ip1 = args[0];
+ for (i = 0; i < di; i++) {
+ const @typ@ value = (*(@typ@ *)ip1);
+ ip1 += is1;
+ if (npy_isnan(value)) continue;
+ const @typ@ tmp = value - mean;
+ sum2 += tmp * tmp;
+ }
+ if (n == 0) {
+ *(@typ@ *)op = NPY_NAN;
+ } else {
+ *(@typ@ *)op = sqrt(sum2 / n);
+ }
+ END_OUTER_LOOP
+}
+
+/**end repeat**/
+
/* The following lines were generated using a slightly modified
version of code_generators/generate_umath.py and adding these
lines to defdict:
@@ -207,6 +434,24 @@ static char innerwt_signatures[] = { PyArray_LONG, PyArray_LONG, PyArray_LONG, P
static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply };
static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
static char matrix_multiply_signatures[] = { PyArray_LONG, PyArray_LONG, PyArray_LONG, PyArray_FLOAT, PyArray_FLOAT, PyArray_FLOAT, PyArray_DOUBLE, PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction mean_functions[] = { DOUBLE_mean };
+static void * mean_data[] = { (void *)NULL };
+static char mean_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction std_functions[] = { DOUBLE_std };
+static void * std_data[] = { (void *)NULL };
+static char std_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nansum_functions[] = { DOUBLE_nansum };
+static void * nansum_data[] = { (void *)NULL };
+static char nansum_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nancumsum_functions[] = { DOUBLE_nancumsum };
+static void * nancumsum_data[] = { (void *)NULL };
+static char nancumsum_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nanmean_functions[] = { DOUBLE_nanmean };
+static void * nanmean_data[] = { (void *)NULL };
+static char nanmean_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nanstd_functions[] = { DOUBLE_nanstd };
+static void * nanstd_data[] = { (void *)NULL };
+static char nanstd_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
static void
addUfuncs(PyObject *dictionary) {
@@ -234,6 +479,48 @@ addUfuncs(PyObject *dictionary) {
0, matrix_multiply_signature);
PyDict_SetItemString(dictionary, "matrix_multiply", f);
Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(mean_functions, mean_data, mean_signatures, 1,
+ 1, 1, PyUFunc_None, "mean",
+ "mean on the last dimension and broadcast on the rest\n"\
+ " \"(i)->()\" \n",
+ 0, mean_signature);
+ PyDict_SetItemString(dictionary, "mean", f);
+ Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(std_functions, std_data, std_signatures, 1,
+ 1, 1, PyUFunc_None, "std",
+ "std on the last dimension and broadcast on the rest\n"\
+ " \"(i)->()\" \n",
+ 0, std_signature);
+ PyDict_SetItemString(dictionary, "std", f);
+ Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(nansum_functions, nansum_data, nansum_signatures, 1,
+ 1, 1, PyUFunc_None, "nansum",
+ "nansum on the last dimension and broadcast on the rest\n"\
+ " \"(i)->()\" \n",
+ 0, nansum_signature);
+ PyDict_SetItemString(dictionary, "nansum", f);
+ Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(nancumsum_functions, nancumsum_data, nancumsum_signatures, 1,
+ 1, 1, PyUFunc_None, "nancumsum",
+ "nancumsum on the last dimension and broadcast on the rest\n"\
+ " \"(i)->(i)\" \n",
+ 0, nancumsum_signature);
+ PyDict_SetItemString(dictionary, "nancumsum", f);
+ Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(nanmean_functions, nanmean_data, nanmean_signatures, 1,
+ 1, 1, PyUFunc_None, "nanmean",
+ "nanmean on the last dimension and broadcast on the rest\n"\
+ " \"(i)->()\" \n",
+ 0, nanmean_signature);
+ PyDict_SetItemString(dictionary, "nanmean", f);
+ Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(nanstd_functions, nanstd_data, nanstd_signatures, 1,
+ 1, 1, PyUFunc_None, "nanstd",
+ "nanstd on the last dimension and broadcast on the rest\n"\
+ " \"(i)->()\" \n",
+ 0, nanstd_signature);
+ PyDict_SetItemString(dictionary, "nanstd", f);
+ Py_DECREF(f);
}
/*
@@ -421,5 +421,65 @@ def broadcastable(s1,s2):
assert_equal(ref, True, err_msg="reference check")
+class TestGeneralizeUfuncs(TestCase):
+ def setUp(self):
+ self.A = np.array([[0.57311211, 0.5566819 , 0.6124054 , 0.66216366, 0.00850151],
+ [0.06834556, 0.02745207, 0.98692964, 0.67906414, 0.37794816],
+ [0.21752169, 0.37686867, 0.93973352, 0.49818713, 0.21303216],
+ [0.08066141, 0.37775952, 0.56552893, 0.18719725, 0.64559197],
+ [0.99428521, 0.24205622, 0.63299425, 0.92321895, 0.94189936]])
+
+ def test_mean(self):
+ assert_almost_equal(umt.mean(self.A.flatten()), 0.49556561593504361)
+ assert_almost_equal(umt.mean(self.A),
+ np.array([0.48257292, 0.42794791, 0.44906863, 0.37134782, 0.7468908]))
+
+ def test_std(self):
+ assert_almost_equal(umt.std(self.A.flatten()),0.30789518960654322)
+ assert_almost_equal(umt.std(self.A),
+ np.array([0.2398093, 0.36543965, 0.26747138, 0.21511266, 0.28218941]))
+
+class TestGeneralizeNaNUfuncs(TestCase):
+ def setUp(self):
+ self.A = np.array([[[np.nan, 0.01319214, 0.01620964],
+ [0.11704017, np.nan, 0.75157887],
+ [0.28333658, 0.1630199 , np.nan]],
+ [[0.59541557, np.nan, 0.37910852],
+ [np.nan, 0.87964135, np.nan],
+ [0.70543747, np.nan, 0.34306596]],
+ [[0.72687499, 0.91084584, np.nan],
+ [0.84386844, 0.38944762, 0.23913896],
+ [np.nan, 0.37068164, 0.33850425]]])
+
+ def test_nansum(self):
+ assert_almost_equal(umt.nansum(self.A.flatten()), 8.0664079100000006)
+ assert_almost_equal(np.swapaxes(umt.nansum(np.swapaxes(self.A,0,2)), 1, 0),
+ np.array([[1.32229056, 0.92403798, 0.39531816],
+ [0.96090861, 1.26908897, 0.99071783],
+ [0.98877405, 0.53370154, 0.68157021]]))
+ assert_almost_equal(umt.nansum(np.swapaxes(self.A,1,2)),
+ np.array([[0.40037675, 0.17621204, 0.76778851],
+ [1.30085304, 0.87964135, 0.72217448],
+ [1.57074343, 1.6709751 , 0.57764321]]))
+ assert_almost_equal(umt.nansum(self.A),
+ np.array([[0.02940178, 0.86861904, 0.44635648],
+ [0.97452409, 0.87964135, 1.04850343],
+ [1.63772083, 1.47245502, 0.70918589]]))
+
+ def test_nancumsum(self):
+ assert_almost_equal(umt.nancumsum(self.A.flatten()),
+ np.array([np.nan, 0.01319214, 0.02940178, 0.14644195, 0.14644195,
+ 0.89802082, 1.1813574, 1.3443773, 1.3443773, 1.93979287,
+ 1.93979287, 2.31890139, 2.31890139, 3.19854274, 3.19854274,
+ 3.90398021, 3.90398021, 4.24704617, 4.97392116, 5.884767,
+ 5.884767, 6.72863544, 7.11808306, 7.35722202, 7.35722202,
+ 7.72790366, 8.06640791]))
+
+ def test_nanmean(self):
+ assert_almost_equal(umt.nanmean(self.A.flatten()), 0.44813377277777783)
+
+ def test_nanstd(self):
+ assert_almost_equal(umt.nanstd(self.A.flatten()), 0.28764635583291526)
+
if __name__ == "__main__":
run_module_suite()

0 comments on commit ea2e9f4

Please sign in to comment.