ENH: Add generalized ufuncs nansum(), nancumsum(), nanmean(), nanstd()

rigtorp · Dec 31, 2010 · ea2e9f4 · ea2e9f4
1 parent 9451260
commit ea2e9f4
Show file tree

Hide file tree

Showing 2 changed files with 347 additions and 0 deletions.
diff --git a/numpy/core/src/umath/umath_tests.c.src b/numpy/core/src/umath/umath_tests.c.src
@@ -38,6 +38,9 @@ typedef npy_intp intp;
     INIT_OUTER_LOOP_3           \
     intp s3 = *steps++;
 
+#define BEGIN_OUTER_LOOP_2      \
+    for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1) {
+
 #define BEGIN_OUTER_LOOP_3      \
     for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) {
 
@@ -124,6 +127,7 @@ static void
 
 char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
 
+
 /**begin repeat
 
    #TYPE=FLOAT,DOUBLE,LONG#
@@ -177,6 +181,229 @@ static void
 
 /**end repeat**/
 
+char *mean_signature = "(i)->()";
+
+/**begin repeat
+
+   #TYPE=DOUBLE#
+   #typ=npy_double#
+*/
+
+static void
+@TYPE@_mean(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_2
+    intp di = dimensions[0];
+    intp i;
+    intp is1=steps[0];
+    BEGIN_OUTER_LOOP_2
+        char *ip1=args[0], *op=args[1];
+        @typ@ sum = 0;
+        for (i = 0; i < di; i++) {
+            sum += (*(@typ@ *)ip1);
+            ip1 += is1;
+        }
+        *(@typ@ *)op = sum / (@typ@) di;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *std_signature = "(i)->()";
+
+/**begin repeat
+
+   #TYPE=DOUBLE#
+   #typ=npy_double#
+*/
+
+static void
+@TYPE@_std(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_2
+    intp di = dimensions[0];
+    intp i;
+    intp is1=steps[0];
+    BEGIN_OUTER_LOOP_2
+        char *ip1=args[0], *op=args[1];
+        @typ@ sum = 0;
+        @typ@ sum2 = 0;
+        @typ@ mean;
+        for (i = 0; i < di; i++) {
+            sum += (*(@typ@ *)ip1);
+            ip1 += is1;
+        }
+        mean = sum / (@typ@) di;
+        ip1 = args[0];
+        for (i = 0; i < di; i++) {
+            const @typ@ value = (*(@typ@ *)ip1);
+            const @typ@ tmp = value - mean;
+            sum2 += tmp * tmp;
+            ip1 += is1;
+        }
+        *(@typ@ *)op = sqrt(sum2 / (@typ@) di);
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nansum_signature = "(i)->()";
+
+/**begin repeat
+
+   #TYPE=DOUBLE#
+   #typ=npy_double#
+*/
+
+static void
+@TYPE@_nansum(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_2
+    intp di = dimensions[0];
+    intp i;
+    intp is1=steps[0];
+    BEGIN_OUTER_LOOP_2
+        char *ip1=args[0], *op=args[1];
+        @typ@ sum = 0;
+        intp n = 0;
+        for (i = 0; i < di; i++) {
+            const @typ@ value = (*(@typ@ *)ip1);
+            ip1 += is1;
+            if (npy_isnan(value)) continue;
+            sum += value;
+            n++;
+        }
+        if (n == 0) {
+            *(@typ@ *)op = NPY_NAN;
+        } else {
+            *(@typ@ *)op = sum;
+        }
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nancumsum_signature = "(i)->(i)";
+
+/**begin repeat
+
+   #TYPE=DOUBLE#
+   #typ=npy_double#
+*/
+
+static void
+@TYPE@_nancumsum(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_2
+    intp di = dimensions[0];
+    intp i;
+    intp is1=steps[0]; intp os=steps[1];
+    BEGIN_OUTER_LOOP_2
+        char *ip1=args[0], *op=args[1];
+        @typ@ sum = 0;
+        intp n = 0;
+        for (i = 0; i < di; i++) {
+            const @typ@ value = (*(@typ@ *)ip1);
+            if (!npy_isnan(value)) {
+                sum += value;
+                n++;
+            }
+            if (n == 0) {
+                *(@typ@ *)op = NPY_NAN;
+            } else {
+                *(@typ@ *)op = sum;
+            }
+            ip1 += is1;
+            op += os;
+        }
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nanmean_signature = "(i)->()";
+
+/**begin repeat
+
+   #TYPE=DOUBLE#
+   #typ=npy_double#
+*/
+
+static void
+@TYPE@_nanmean(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_2
+    intp di = dimensions[0];
+    intp i;
+    intp is1=steps[0];
+    BEGIN_OUTER_LOOP_2
+        char *ip1=args[0], *op=args[1];
+        @typ@ sum = 0;
+        intp n = 0;
+        for (i = 0; i < di; i++) {
+            const @typ@ value = (*(@typ@ *)ip1);
+            ip1 += is1;
+            if (npy_isnan(value)) continue;
+            sum += value;
+            n++;
+        }
+        if (n == 0) {
+            *(@typ@ *)op = NPY_NAN;
+        } else {
+            *(@typ@ *)op = sum / n;
+        }
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+char *nanstd_signature = "(i)->()";
+
+/**begin repeat
+
+   #TYPE=DOUBLE#
+   #typ=npy_double#
+*/
+
+static void
+@TYPE@_nanstd(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_2
+    intp di = dimensions[0];
+    intp i;
+    intp is1=steps[0];
+    BEGIN_OUTER_LOOP_2
+        char *ip1=args[0], *op=args[1];
+        @typ@ sum = 0;
+        @typ@ sum2 = 0;
+        @typ@ mean;
+        intp n = 0;
+        for (i = 0; i < di; i++) {
+            const @typ@ value = (*(@typ@ *)ip1);
+            ip1 += is1;
+            if (npy_isnan(value)) continue;
+            sum += value;
+            n++;
+        }
+        mean = sum / n;
+        ip1 = args[0];
+        for (i = 0; i < di; i++) {
+            const @typ@ value = (*(@typ@ *)ip1);
+            ip1 += is1;
+            if (npy_isnan(value)) continue;
+            const @typ@ tmp = value - mean;
+            sum2 += tmp * tmp;
+        }
+        if (n == 0) {
+            *(@typ@ *)op = NPY_NAN;
+        } else {
+            *(@typ@ *)op = sqrt(sum2 / n);
+        }
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
 /*  The following lines were generated using a slightly modified
     version of code_generators/generate_umath.py and adding these
     lines to defdict:
@@ -207,6 +434,24 @@ static char innerwt_signatures[] = { PyArray_LONG, PyArray_LONG, PyArray_LONG, P
 static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply };
 static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
 static char matrix_multiply_signatures[] = { PyArray_LONG, PyArray_LONG, PyArray_LONG,  PyArray_FLOAT, PyArray_FLOAT, PyArray_FLOAT,  PyArray_DOUBLE, PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction mean_functions[] = { DOUBLE_mean };
+static void * mean_data[] = { (void *)NULL };
+static char mean_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction std_functions[] = { DOUBLE_std };
+static void * std_data[] = { (void *)NULL };
+static char std_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nansum_functions[] = { DOUBLE_nansum };
+static void * nansum_data[] = { (void *)NULL };
+static char nansum_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nancumsum_functions[] = { DOUBLE_nancumsum };
+static void * nancumsum_data[] = { (void *)NULL };
+static char nancumsum_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nanmean_functions[] = { DOUBLE_nanmean };
+static void * nanmean_data[] = { (void *)NULL };
+static char nanmean_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
+static PyUFuncGenericFunction nanstd_functions[] = { DOUBLE_nanstd };
+static void * nanstd_data[] = { (void *)NULL };
+static char nanstd_signatures[] = { PyArray_DOUBLE, PyArray_DOUBLE };
 
 static void
 addUfuncs(PyObject *dictionary) {
@@ -234,6 +479,48 @@ addUfuncs(PyObject *dictionary) {
                                     0, matrix_multiply_signature);
     PyDict_SetItemString(dictionary, "matrix_multiply", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(mean_functions, mean_data, mean_signatures, 1,
+                                    1, 1, PyUFunc_None, "mean",
+                                    "mean on the last dimension and broadcast on the rest\n"\
+                                    "     \"(i)->()\" \n",
+                                    0, mean_signature);
+    PyDict_SetItemString(dictionary, "mean", f);
+    Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(std_functions, std_data, std_signatures, 1,
+                                    1, 1, PyUFunc_None, "std",
+                                    "std on the last dimension and broadcast on the rest\n"\
+                                    "     \"(i)->()\" \n",
+                                    0, std_signature);
+    PyDict_SetItemString(dictionary, "std", f);
+    Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(nansum_functions, nansum_data, nansum_signatures, 1,
+                                    1, 1, PyUFunc_None, "nansum",
+                                    "nansum on the last dimension and broadcast on the rest\n"\
+                                    "     \"(i)->()\" \n",
+                                    0, nansum_signature);
+    PyDict_SetItemString(dictionary, "nansum", f);
+    Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(nancumsum_functions, nancumsum_data, nancumsum_signatures, 1,
+                                    1, 1, PyUFunc_None, "nancumsum",
+                                    "nancumsum on the last dimension and broadcast on the rest\n"\
+                                    "     \"(i)->(i)\" \n",
+                                    0, nancumsum_signature);
+    PyDict_SetItemString(dictionary, "nancumsum", f);
+    Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(nanmean_functions, nanmean_data, nanmean_signatures, 1,
+                                    1, 1, PyUFunc_None, "nanmean",
+                                    "nanmean on the last dimension and broadcast on the rest\n"\
+                                    "     \"(i)->()\" \n",
+                                    0, nanmean_signature);
+    PyDict_SetItemString(dictionary, "nanmean", f);
+    Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(nanstd_functions, nanstd_data, nanstd_signatures, 1,
+                                    1, 1, PyUFunc_None, "nanstd",
+                                    "nanstd on the last dimension and broadcast on the rest\n"\
+                                    "     \"(i)->()\" \n",
+                                    0, nanstd_signature);
+    PyDict_SetItemString(dictionary, "nanstd", f);
+    Py_DECREF(f);
 }
 
 /*

diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
@@ -421,5 +421,65 @@ def broadcastable(s1,s2):
 
         assert_equal(ref, True, err_msg="reference check")
 
+class TestGeneralizeUfuncs(TestCase):
+    def setUp(self):
+        self.A = np.array([[0.57311211, 0.5566819 , 0.6124054 , 0.66216366, 0.00850151],
+                           [0.06834556, 0.02745207, 0.98692964, 0.67906414, 0.37794816],
+                           [0.21752169, 0.37686867, 0.93973352, 0.49818713, 0.21303216],
+                           [0.08066141, 0.37775952, 0.56552893, 0.18719725, 0.64559197],
+                           [0.99428521, 0.24205622, 0.63299425, 0.92321895, 0.94189936]])
+
+    def test_mean(self):
+        assert_almost_equal(umt.mean(self.A.flatten()), 0.49556561593504361)
+        assert_almost_equal(umt.mean(self.A),
+                            np.array([0.48257292, 0.42794791, 0.44906863, 0.37134782, 0.7468908]))
+
+    def test_std(self):
+        assert_almost_equal(umt.std(self.A.flatten()),0.30789518960654322)
+        assert_almost_equal(umt.std(self.A),
+                            np.array([0.2398093, 0.36543965, 0.26747138, 0.21511266, 0.28218941]))
+
+class TestGeneralizeNaNUfuncs(TestCase):
+    def setUp(self):
+        self.A = np.array([[[np.nan, 0.01319214, 0.01620964],
+                            [0.11704017, np.nan, 0.75157887],
+                            [0.28333658, 0.1630199 , np.nan]],
+                           [[0.59541557, np.nan, 0.37910852],
+                            [np.nan, 0.87964135, np.nan],
+                            [0.70543747, np.nan, 0.34306596]],
+                           [[0.72687499, 0.91084584, np.nan],
+                            [0.84386844, 0.38944762, 0.23913896],
+                            [np.nan, 0.37068164, 0.33850425]]])
+
+    def test_nansum(self):
+        assert_almost_equal(umt.nansum(self.A.flatten()), 8.0664079100000006)
+        assert_almost_equal(np.swapaxes(umt.nansum(np.swapaxes(self.A,0,2)), 1, 0),
+                            np.array([[1.32229056, 0.92403798, 0.39531816],
+                                      [0.96090861, 1.26908897, 0.99071783],
+                                      [0.98877405, 0.53370154, 0.68157021]]))
+        assert_almost_equal(umt.nansum(np.swapaxes(self.A,1,2)),
+                            np.array([[0.40037675, 0.17621204, 0.76778851],
+                                      [1.30085304, 0.87964135, 0.72217448],
+                                      [1.57074343, 1.6709751 , 0.57764321]]))
+        assert_almost_equal(umt.nansum(self.A),
+                            np.array([[0.02940178, 0.86861904, 0.44635648],
+                                      [0.97452409, 0.87964135, 1.04850343],
+                                      [1.63772083, 1.47245502, 0.70918589]]))
+
+    def test_nancumsum(self):
+        assert_almost_equal(umt.nancumsum(self.A.flatten()), 
+                            np.array([np.nan, 0.01319214, 0.02940178, 0.14644195, 0.14644195,
+                                      0.89802082, 1.1813574, 1.3443773, 1.3443773,  1.93979287,
+                                      1.93979287, 2.31890139, 2.31890139, 3.19854274, 3.19854274,
+                                      3.90398021, 3.90398021, 4.24704617, 4.97392116, 5.884767,
+                                      5.884767, 6.72863544, 7.11808306, 7.35722202, 7.35722202,
+                                      7.72790366, 8.06640791]))
+
+    def test_nanmean(self):
+        assert_almost_equal(umt.nanmean(self.A.flatten()), 0.44813377277777783)
+
+    def test_nanstd(self):
+        assert_almost_equal(umt.nanstd(self.A.flatten()), 0.28764635583291526)
+
 if __name__ == "__main__":
     run_module_suite()