numpy · mwiebe · Jul 26, 2011 · Jul 11, 2011 · Jul 12, 2011 · Jul 12, 2011
diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst
@@ -225,27 +225,29 @@ provides a starting point.
 
 For example,::
 
-    >>> np.array([1.0, 2.0, np.NA, 7.0], namasked=True)
-    array([1., 2., NA, 7.], namasked=True)
-    >>> np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f8]')
+    >>> np.array([1.0, 2.0, np.NA, 7.0], maskna=True)
+    array([1., 2., NA, 7.], maskna=True)
+    >>> np.array([1.0, 2.0, np.NA, 7.0], dtype='NA')
     array([1., 2., NA, 7.], dtype='NA[<f8]')
+    >>> np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f4]')
+    array([1., 2., NA, 7.], dtype='NA[<f4]')
 
 produce arrays with values [1.0, 2.0, <inaccessible>, 7.0] /
-mask [Unmasked, Unmasked, Masked, Unmasked], and
+mask [Exposed, Exposed, Exposed, Hidden], and
 values [1.0, 2.0, <NA bitpattern>, 7.0] respectively.
 
 It may be worth overloading the np.NA __call__ method to accept a dtype,
 returning a zero-dimensional array with a missing value of that dtype.
 Without doing this, NA printouts would look like::
 
-    >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], namasked=True))
-    array(NA, dtype='float64', namasked=True)
+    >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], maskna=True))
+    array(NA, dtype='float64', maskna=True)
     >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f8]'))
     array(NA, dtype='NA[<f8]')
 
 but with this, they could be printed as::
 
-    >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], namasked=True))
+    >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], maskna=True))
     NA('float64')
     >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f8]'))
     NA('NA[<f8]')
@@ -274,12 +276,12 @@ from another view which doesn't have them masked. For example::
 
     >>> a = np.array([1,2])
     >>> b = a.view()
-    >>> b.flags.hasnamask = True
+    >>> b.flags.hasmaskna = True
     >>> b
-    array([1,2], namasked=True)
+    array([1,2], maskna=True)
     >>> b[0] = np.NA
     >>> b
-    array([NA,2], namasked=True)
+    array([NA,2], maskna=True)
     >>> a
     array([1,2])
     >>> # The underlying number 1 value in 'a[0]' was untouched
@@ -351,10 +353,10 @@ Creating Masked Arrays
 There are two flags which indicate and control the nature of the mask
 used in masked arrays.
 
-First is 'arr.flags.hasnamask', which is True for all masked arrays and
+First is 'arr.flags.hasmaskna', which is True for all masked arrays and
 may be set to True to add a mask to an array which does not have one.
 
-Second is 'arr.flags.ownnamask', which is True if the array owns the
+Second is 'arr.flags.ownmaskna', which is True if the array owns the
 memory to the mask, and False if the array has no mask, or has a view
 into the mask of another array. If this is set to False in a masked
 array, the array will create a copy of the mask so that further modifications
@@ -402,8 +404,16 @@ New functions added to the ndarray are::
         array is unmasked and has the 'NA' part stripped from the
         parameterized type ('NA[f8]' becomes just 'f8').
 
-    arr.view(namasked=True)
-        This is a shortcut for 'a = arr.view(); a.flags.hasnamask=True'.
+    arr.view(maskna=True)
+        This is a shortcut for
+        >>> a = arr.view()
+        >>> a.flags.hasmaskna = True
+
+    arr.view(ownmaskna=True)
+        This is a shortcut for
+        >>> a = arr.view()
+        >>> a.flags.hasmaskna = True
+        >>> a.flags.ownmaskna = True
 
 Element-wise UFuncs With Missing Values
 =======================================
@@ -461,7 +471,7 @@ will also use the unmasked value counts for their calculations if
 
 Some examples::
 
-    >>> a = np.array([1., 3., np.NA, 7.], namasked=True)
+    >>> a = np.array([1., 3., np.NA, 7.], maskna=True)
     >>> np.sum(a)
     array(NA, dtype='<f8', masked=True)
     >>> np.sum(a, skipna=True)
@@ -471,11 +481,11 @@ Some examples::
     >>> np.mean(a, skipna=True)
     3.6666666666666665
 
-    >>> a = np.array([np.NA, np.NA], dtype='f8', namasked=True)
+    >>> a = np.array([np.NA, np.NA], dtype='f8', maskna=True)
     >>> np.sum(a, skipna=True)
     0.0
     >>> np.max(a, skipna=True)
-    array(NA, dtype='<f8', namasked=True)
+    array(NA, dtype='<f8', maskna=True)
     >>> np.mean(a)
     NA('<f8')
     >>> np.mean(a, skipna=True)
@@ -487,18 +497,18 @@ The functions 'np.any' and 'np.all' require some special consideration,
 just as logical_and and logical_or do. Maybe the best way to describe
 their behavior is through a series of examples::
 
-    >>> np.any(np.array([False, False, False], namasked=True))
+    >>> np.any(np.array([False, False, False], maskna=True))
     False
-    >>> np.any(np.array([False, NA, False], namasked=True))
+    >>> np.any(np.array([False, NA, False], maskna=True))
     NA
-    >>> np.any(np.array([False, NA, True], namasked=True))
+    >>> np.any(np.array([False, NA, True], maskna=True))
     True
 
-    >>> np.all(np.array([True, True, True], namasked=True))
+    >>> np.all(np.array([True, True, True], maskna=True))
     True
-    >>> np.all(np.array([True, NA, True], namasked=True))
+    >>> np.all(np.array([True, NA, True], maskna=True))
     NA
-    >>> np.all(np.array([False, NA, True], namasked=True))
+    >>> np.all(np.array([False, NA, True], maskna=True))
     False
 
 Parameterized NA Data Types
@@ -609,14 +619,14 @@ The important part of future-proofing the design is making sure
 the C ABI-level choices and the Python API-level choices have a natural
 transition to multi-NA support. Here is one way multi-NA support could look::
 
-    >>> a = np.array([np.NA(1), 3, np.NA(2)], namasked='multi')
+    >>> a = np.array([np.NA(1), 3, np.NA(2)], maskna='multi')
     >>> np.sum(a)
     NA(1)
     >>> np.sum(a[1:])
     NA(2)
-    >>> b = np.array([np.NA, 2, 5], namasked=True)
+    >>> b = np.array([np.NA, 2, 5], maskna=True)
     >>> a + b
-    array([NA(0), 5, NA(2)], namasked='multi')
+    array([NA(0), 5, NA(2)], maskna='multi')
 
 PEP 3118
 ========
@@ -696,28 +706,28 @@ This gives us the following additions to the PyArrayObject::
     /*
      * Descriptor for the mask dtype.
      *   If no mask: NULL
-     *   If mask   : bool/structured dtype of bools
+     *   If mask   : bool/uint8/structured dtype of mask dtypes
      */
-    PyArray_Descr *maskdescr;
+    PyArray_Descr *maskna_descr;
     /*
      * Raw data buffer for mask. If the array has the flag
-     * NPY_ARRAY_OWNNAMASK enabled, it owns this memory and
+     * NPY_ARRAY_OWNMASKNA enabled, it owns this memory and
      * must call PyArray_free on it when destroyed.
      */
-    npy_uint8 *maskdata;
+    npy_mask *maskna_data;
     /*
      * Just like dimensions and strides point into the same memory
      * buffer, we now just make the buffer 3x the nd instead of 2x
      * and use the same buffer.
      */
-    npy_intp *maskstrides;
+    npy_intp *maskna_strides;
 
 There are 2 (or 3) flags which must be added to the array flags::
 
-    NPY_ARRAY_HASNAMASK
-    NPY_ARRAY_OWNNAMASK
+    NPY_ARRAY_HASMASKNA
+    NPY_ARRAY_OWNMASKNA
     /* To possibly add in a later revision */
-    NPY_ARRAY_HARDNAMASK
+    NPY_ARRAY_HARDMASKNA
 
 To allow the easy detection of NA support, and whether an array
 has any missing values, we add the following functions:
@@ -807,7 +817,7 @@ NPY_ITER_ARRAYMASK
     can be only one such mask, and there cannot also be a virtual
     mask.
 
-    As a special case, if the flag NPY_ITER_USE_NAMASK is specified
+    As a special case, if the flag NPY_ITER_USE_MASKNA is specified
     at the same time, the mask for the operand is used instead
     of the operand itself. If the operand has no mask but is
     based on an NA dtype, that mask exposed by the iterator converts
@@ -827,14 +837,14 @@ Iterator NA-array Features
 
 We add several new per-operand flags:
 
-NPY_ITER_USE_NAMASK
+NPY_ITER_USE_MASKNA
     If the operand has an NA dtype, an NA mask, or both, this adds a new
     virtual operand to the end of the operand list which iterates
     over the mask of the particular operand.
 
-NPY_ITER_IGNORE_NAMASK
+NPY_ITER_IGNORE_MASKNA
     If an operand has an NA mask, by default the iterator will raise
-    an exception unless NPY_ITER_USE_NAMASK is specified. This flag
+    an exception unless NPY_ITER_USE_MASKNA is specified. This flag
     disables that check, and is intended for cases where one has first
     checked that all the elements in the array are not NA using the
     PyArray_ContainsNA function.

diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
@@ -129,60 +129,38 @@ def _reconstruct(subtype, shape, dtype):
     return ndarray.__new__(subtype, shape, dtype)
 
 
-# format_re and _split were taken from numarray by J. Todd Miller
+# format_re was originally from numarray by J. Todd Miller
 
-def _split(input):
-    """Split the input formats string into field formats without splitting
-       the tuple used to specify multi-dimensional arrays."""
-
-    newlist = []
-    hold = asbytes('')
-
-    listinput = input.split(asbytes(','))
-    for element in listinput:
-        if hold != asbytes(''):
-            item = hold + asbytes(',') + element
-        else:
-            item = element
-        left = item.count(asbytes('('))
-        right = item.count(asbytes(')'))
-
-        # if the parenthesis is not balanced, hold the string
-        if left > right :
-            hold = item
-
-        # when balanced, append to the output list and reset the hold
-        elif left == right:
-            newlist.append(item.strip())
-            hold = asbytes('')
-
-        # too many close parenthesis is unacceptable
-        else:
-            raise SyntaxError(item)
-
-    # if there is string left over in hold
-    if hold != asbytes(''):
-        raise SyntaxError(hold)
-
-    return newlist
-
-format_re = re.compile(asbytes(r'(?P<order1>[<>|=]?)(?P<repeats> *[(]?[ ,0-9]*[)]? *)(?P<order2>[<>|=]?)(?P<dtype>[A-Za-z0-9.]*)'))
+format_re = re.compile(asbytes(r'(?P<order1>[<>|=]?)(?P<repeats> *[(]?[ ,0-9]*[)]? *)(?P<order2>[<>|=]?)(?P<dtype>[A-Za-z0-9.]*(?:\[[a-zA-Z0-9,.]+\])?)'))
+sep_re = re.compile(asbytes(r'\s*,\s*'))
+space_re = re.compile(asbytes(r'\s+$'))
 
 # astr is a string (perhaps comma separated)
 
 _convorder = {asbytes('='): _nbo}
 
 def _commastring(astr):
-    res = _split(astr)
-    if (len(res)) < 1:
-        raise ValueError("unrecognized formant")
+    startindex = 0
     result = []
-    for k,item in enumerate(res):
-        # convert item
+    while startindex < len(astr):
+        mo = format_re.match(astr, pos=startindex)
         try:
-            (order1, repeats, order2, dtype) = format_re.match(item).groups()
+            (order1, repeats, order2, dtype) = mo.groups()
         except (TypeError, AttributeError):
-            raise ValueError('format %s is not recognized' % item)
+            raise ValueError('format number %d of "%s" is not recognized' %
+                                            (len(result)+1, astr))
+        startindex = mo.end()
+        # Separator or ending padding
+        if startindex < len(astr):
+            if space_re.match(astr, pos=startindex):
+                startindex = len(astr)
+            else:
+                mo = sep_re.match(astr, pos=startindex)
+                if not mo:
+                    raise ValueError(
+                            'format number %d of "%s" is not recognized' %
+                                            (len(result)+1, astr))
+                startindex = mo.end()
 
         if order2 == asbytes(''):
             order = order1
@@ -192,7 +170,7 @@ def _commastring(astr):
             order1 = _convorder.get(order1, order1)
             order2 = _convorder.get(order2, order2)
             if (order1 != order2):
-                raise ValueError('in-consistent byte-order specification %s and %s' % (order1, order2))
+                raise ValueError('inconsistent byte-order specification %s and %s' % (order1, order2))
             order = order1
 
         if order in [asbytes('|'), asbytes('='), _nbo]:
@@ -203,7 +181,7 @@ def _commastring(astr):
         else:
             newitem = (dtype, eval(repeats))
         result.append(newitem)
-
+        
     return result
 
 def _getintp_ctype():

diff --git a/numpy/core/code_generators/generate_numpy_api.py b/numpy/core/code_generators/generate_numpy_api.py
@@ -220,7 +220,9 @@ def do_generate_api(targets, sources):
     for name, index in types_api.items():
         multiarray_api_dict[name] = TypeApi(name, index, 'PyTypeObject', api_name)
 
-    assert len(multiarray_api_dict) == len(multiarray_api_index)
+    if len(multiarray_api_dict) != len(multiarray_api_index):
+        raise AssertionError, "Multiarray API size mismatch %d %d" % \
+                        (len(multiarray_api_dict), len(multiarray_api_index))
 
     extension_list = []
     for name, index in genapi.order_dict(multiarray_api_index):

diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
@@ -318,6 +318,7 @@
     # End 1.6 API
     'PyArray_MaskedCopyInto':               281,
     'PyArray_MaskedMoveInto':               282,
+    'PyArray_SetBase':                      283,
 }
 
 ufunc_types_api = {

diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h
@@ -42,7 +42,8 @@ extern "C" CONFUSE_EMACS
 #define PyArray_HasArrayInterface(op, out)                                    \
         PyArray_HasArrayInterfaceType(op, NULL, NULL, out)
 
-#define PyArray_IsZeroDim(op) (PyArray_Check(op) && (PyArray_NDIM(op) == 0))
+#define PyArray_IsZeroDim(op) (PyArray_Check(op) && \
+                               (PyArray_NDIM((PyArrayObject *)op) == 0))
 
 #define PyArray_IsScalar(obj, cls)                                            \
         (PyObject_TypeCheck(obj, &Py##cls##ArrType_Type))
@@ -159,12 +160,18 @@ extern "C" CONFUSE_EMACS
                                             (k)*PyArray_STRIDES(obj)[2] + \
                                             (l)*PyArray_STRIDES(obj)[3]))
 
-#define PyArray_XDECREF_ERR(obj) \
-        if (obj && (PyArray_FLAGS(obj) & NPY_ARRAY_UPDATEIFCOPY)) { \
-                PyArray_FLAGS(PyArray_BASE(obj)) |= NPY_ARRAY_WRITEABLE; \
-                PyArray_FLAGS(obj) &= ~NPY_ARRAY_UPDATEIFCOPY; \
-        } \
-        Py_XDECREF(obj)
+static NPY_INLINE void
+PyArray_XDECREF_ERR(PyArrayObject *arr)
+{
+    if (arr != NULL) {
+        if (PyArray_FLAGS(arr) & NPY_ARRAY_UPDATEIFCOPY) {
+            PyArrayObject *base = (PyArrayObject *)PyArray_BASE(arr);
+            PyArray_ENABLEFLAGS(base, NPY_ARRAY_WRITEABLE);
+            PyArray_CLEARFLAGS(arr, NPY_ARRAY_UPDATEIFCOPY);
+        }
+        Py_DECREF(arr);
+    }
+}
 
 #define PyArray_DESCR_REPLACE(descr) do { \
                 PyArray_Descr *_new_; \