numba · seibert · Oct 31, 2018 · Sep 24, 2018 · Sep 24, 2018 · Sep 24, 2018
diff --git a/docs/source/reference/numpysupported.rst b/docs/source/reference/numpysupported.rst
@@ -263,6 +263,7 @@ The following top-level functions are supported:
 * :func:`numpy.convolve` (only the 2 first arguments)
 * :func:`numpy.copy` (only the first argument)
 * :func:`numpy.correlate` (only the 2 first arguments)
+* :func:`numpy.cov` (only the 5 first arguments, requires NumPy >= 1.10)
 * :func:`numpy.diag`
 * :func:`numpy.digitize`
 * :func:`numpy.dstack`

diff --git a/numba/targets/arraymath.py b/numba/targets/arraymath.py
@@ -1294,6 +1294,183 @@ def np_vander_seq_impl(x, N=None, increasing=False):
     elif isinstance(x, (types.Tuple, types.Sequence)):
         return np_vander_seq_impl
 
+#----------------------------------------------------------------------------
+# Statistics
+
+@register_jitable
+def row_wise_average(a):
+    assert a.ndim == 2
+
+    m, n = a.shape
+    out = np.empty((m, 1), dtype=a.dtype)
+
+    for i in range(m):
+        out[i, 0] = np.sum(a[i, :]) / n
+
+    return out
+
+@register_jitable
+def np_cov_impl_inner(X, bias, ddof):
+
+    # determine degrees of freedom
+    if ddof is None:
+        if bias:
+            ddof = 0
+        else:
+            ddof = 1
+
+    # determine the normalization factor
+    fact = X.shape[1] - ddof
+
+    # numpy warns if less than 0 and floors at 0
+    fact = max(fact, 0.0)
+
+    # de-mean
+    X -= row_wise_average(X)
+
+    # calculate result - requires blas
+    c = np.dot(X, np.conj(X.T))
+    c *= np.true_divide(1, fact)
+    return c
+
+def _prepare_cov_input():
+    pass
+
+@overload(_prepare_cov_input)
+def _prepare_cov_input_impl(m, y, rowvar, dtype):
+    if y in (None, types.none):
+        def _prepare_cov_input_inner(m, y, rowvar, dtype):
+            m_arr = np.atleast_2d(_asarray(m))
+
+            # transpose if asked to and not a (1, n) vector
+            if not rowvar and m_arr.shape[0] != 1:
+                m_arr = m_arr.T
+
+            return m_arr
+    else:
+        def _prepare_cov_input_inner(m, y, rowvar, dtype):
+            m_arr = np.atleast_2d(_asarray(m))
+            y_arr = np.atleast_2d(_asarray(y))
+
+            # transpose if asked to and not a (1, n) vector - this looks
+            # wrong as you might end up transposing one and not the other,
+            # but it's what numpy does
+            if not rowvar:
+                if m_arr.shape[0] != 1:
+                    m_arr = m_arr.T
+                if y_arr.shape[0] != 1:
+                    y_arr = y_arr.T
+
+            m_rows, m_cols = m_arr.shape
+            y_rows, y_cols = y_arr.shape
+
+            if m_cols != y_cols:
+                raise ValueError('m and y must have the same number of variables')
+                # 'variables' as the constraint on rows or columns depends on
+                # whether rowvar is True or False...
+
+            # allocate and fill output array
+            out = np.empty((m_rows + y_rows, m_cols), dtype=dtype)
+            out[:m_rows, :] = m_arr
+            out[-y_rows:, :] = y_arr
+
+            return out
+
+    return _prepare_cov_input_inner
+
+@register_jitable
+def _handle_m_dim_change(m):
+    if m.ndim == 2 and m.shape[0] == 1:
+        msg = ("2D array containing a single row is unsupported due to "
+               "ambiguity in type inference. To use numpy.cov in this case "
+               "simply pass the row as a 1D array, i.e. m[0].")
+        raise RuntimeError(msg)
+
+_handle_m_dim_nop = register_jitable(lambda x:x)
+
+def determine_dtype(array_like):
+    array_like_dt = np.float64
+    if isinstance(array_like, types.Array):
+        array_like_dt = as_dtype(array_like.dtype)
+    elif isinstance(array_like, (types.UniTuple, types.Tuple)):
+        coltypes = set()
+        for val in array_like:
+            if hasattr(val, 'count'):
+                [coltypes.add(v) for v in val]
+            else:
+                coltypes.add(val)
+        if len(coltypes) > 1:
+            array_like_dt = np.promote_types(*[as_dtype(ty) for ty in coltypes])
+        elif len(coltypes) == 1:
+            array_like_dt = as_dtype(coltypes.pop())
+
+    return array_like_dt
+
+def check_dimensions(array_like, name):
+    if isinstance(array_like, types.Array):
+        if array_like.ndim > 2:
+            raise TypeError("{0} has more than 2 dimensions".format(name))
+    elif isinstance(array_like, types.Sequence):
+        if isinstance(array_like.key[0], types.Sequence):
+            if isinstance(array_like.key[0].key[0], types.Sequence):
+                raise TypeError("{0} has more than 2 dimensions".format(name))
+
+if numpy_version >= (1, 10):  # replicate behaviour post numpy 1.10 bugfix release
+    @overload(np.cov)
+    def np_cov(m, y=None, rowvar=True, bias=False, ddof=None):
+
+        # reject problem if m and / or y are more than 2D
+        check_dimensions(m, 'm')
+        check_dimensions(y, 'y')
+
+        # special case for 2D array input with 1 row of data - select
+        # handler function which we'll call later when we have access
+        # to the shape of the input array
+        _M_DIM_HANDLER = _handle_m_dim_nop
+        if isinstance(m, types.Array):
+            _M_DIM_HANDLER = _handle_m_dim_change
+
+        # infer result dtype
+        m_dt = determine_dtype(m)
+        y_dt = determine_dtype(y)
+        dtype = np.result_type(m_dt, y_dt, np.float64)
+
+        def np_cov_impl(m, y=None, rowvar=True, bias=False, ddof=None):
+            _M_DIM_HANDLER(m)
+            X = _prepare_cov_input(m, y, rowvar, dtype).astype(dtype)
+
+            if np.any(np.array(X.shape) == 0):
+                return np.full((X.shape[0], X.shape[0]), fill_value=np.nan, dtype=dtype)
+            else:
+                return np_cov_impl_inner(X, bias, ddof)
+
+        def np_cov_impl_single_variable(m, y=None, rowvar=True, bias=False, ddof=None):
+            _M_DIM_HANDLER(m)
+            X = _prepare_cov_input(m, y, rowvar, dtype).astype(dtype)
+
+            if np.any(np.array(X.shape) == 0):
+                variance = np.nan
+            else:
+                variance = np_cov_impl_inner(X, bias, ddof).flat[0]
+
+            return np.array(variance)
+
+        # identify up front if output is 0D
+        if isinstance(m, types.Array) and m.ndim == 1 or isinstance(m, types.Tuple):
+            if y in (None, types.none):
+                return np_cov_impl_single_variable
+
+        if isinstance(m, (types.Integer, types.Float, types.Complex, types.Boolean)):
+            if y in (None, types.none):
+                return np_cov_impl_single_variable
+
+        if isinstance(m, types.Sequence):
+            if not isinstance(m.key[0], types.Sequence) and y in (None, types.none):
+                return np_cov_impl_single_variable
+
+        # otherwise assume it's 2D and we're good to go
+        return np_cov_impl
+
 #----------------------------------------------------------------------------
 # Element-wise computations
 

diff --git a/numba/tests/test_np_functions.py b/numba/tests/test_np_functions.py
@@ -14,6 +14,7 @@
 from numba.numpy_support import version as np_version
 from numba.errors import TypingError
 from .support import TestCase, CompilationCache, MemoryLeakMixin
+from .matmul_usecase import needs_blas
 
 no_pyobj_flags = Flags()
 no_pyobj_flags.set("nrt")
@@ -88,6 +89,9 @@ def vander(x, N=None, increasing=False):
 def partition(a, kth):
     return np.partition(a, kth)
 
+def cov(m, y=None, rowvar=True, bias=False, ddof=None):
+    return np.cov(m, y, rowvar, bias, ddof)
+
 def ediff1d(ary, to_end=None, to_begin=None):
     return np.ediff1d(ary, to_end, to_begin)
 
@@ -562,10 +566,10 @@ def test_convolve_exceptions(self):
             else:
                 self.assertIn("'v' cannot be empty", str(raises.exception))
 
-    def _check_output(self, pyfunc, cfunc, params):
+    def _check_output(self, pyfunc, cfunc, params, abs_tol=None):
         expected = pyfunc(**params)
         got = cfunc(**params)
-        self.assertPreciseEqual(expected, got)
+        self.assertPreciseEqual(expected, got, abs_tol=abs_tol)
 
     def test_vander_basic(self):
         pyfunc = vander
@@ -1088,6 +1092,170 @@ def test_partition_boolean_inputs(self):
             for kth in True, False, -1, 0, 1:
                 self.partition_sanity_check(pyfunc, cfunc, d, kth)
 
+    @unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+")
+    @needs_blas
+    def test_cov_basic(self):
+        pyfunc = cov
+        cfunc = jit(nopython=True)(pyfunc)
+        _check = partial(self._check_output, pyfunc, cfunc, abs_tol=1e-14)
+
+        def m_variations():
+            # array inputs
+            yield np.array([[0, 2], [1, 1], [2, 0]]).T
+            yield self.rnd.randn(100).reshape(5, 20)
+            yield np.array([0.3942, 0.5969, 0.7730, 0.9918, 0.7964])
+            yield np.full((4, 5), fill_value=True)
+            yield np.array([np.nan, 0.5969, -np.inf, 0.9918, 0.7964])
+            yield np.linspace(-3, 3, 33).reshape(33, 1, order='F')
+
+            # non-array inputs
+            yield ((0.1, 0.2), (0.11, 0.19), (0.09, 0.21))
+            yield (-2.1, -1, 4.3)
+            yield (1, 2, 3)
+            yield [4, 5, 6]
+            yield ((0.1, 0.2, 0.3), (0.1, 0.2, 0.3))
+            yield [(1, 2, 3), (1, 3, 2)]
+            yield 3.142
+
+            # empty data structures
+            yield np.array([])
+            yield np.array([]).reshape(0, 2)
+            yield np.array([]).reshape(2, 0)
+            yield ()
+
+        # all inputs other than the first are defaulted
+        for m in m_variations():
+            _check({'m': m})
+
+    @unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+")
+    @needs_blas
+    def test_cov_explicit_arguments(self):
+        pyfunc = cov
+        cfunc = jit(nopython=True)(pyfunc)
+        _check = partial(self._check_output, pyfunc, cfunc, abs_tol=1e-14)
+
+        m = self.rnd.randn(1050).reshape(150, 7)
+        y_choices = None, m[::-1]
+        rowvar_choices = False, True
+        bias_choices = False, True
+        ddof_choice = None, -1, 0, 1, 3
+
+        for y, rowvar, bias, ddof in itertools.product(y_choices, rowvar_choices, bias_choices, ddof_choice):
+            params = {'m': m, 'y': y, 'ddof': ddof, 'bias': bias, 'rowvar': rowvar}
+            _check(params)
+
+    @unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+")
+    @needs_blas
+    def test_cov_egde_cases(self):
+        pyfunc = cov
+        cfunc = jit(nopython=True)(pyfunc)
+        _check = partial(self._check_output, pyfunc, cfunc, abs_tol=1e-14)
+
+        # examples borrowed from numpy doc string / unit tests
+        m = np.array([-2.1, -1, 4.3])
+        y = np.array([3, 1.1, 0.12])
+        params = {'m': m, 'y': y}
+        _check(params)
+
+        m = np.array([[0, 2], [1, 1], [2, 0]]).T
+        params = {'m': m, 'ddof': 5}
+        _check(params)
+
+        m = np.array([1, 2, 3])  # test case modified such that m is 1D
+        y = np.array([[1j, 2j, 3j]])
+        params = {'m': m, 'y': y}
+        _check(params)
+
+        m = np.array([1, 2, 3])
+        y = (1j, 2j, 3j)
+        params = {'m': m, 'y': y}
+        _check(params)
+        params = {'m': y, 'y': m}  # flip real and complex inputs
+        _check(params)
+
+        m = np.array([1, 2, 3])
+        y = (1j, 2j, 3)  # note last item is not complex
+        params = {'m': m, 'y': y}
+        _check(params)
+        params = {'m': y, 'y': m}  # flip real and complex inputs
+        _check(params)
+
+        m = np.array([])
+        y = np.array([])
+        params = {'m': m, 'y': y}
+        _check(params)
+
+        m = 1.1
+        y = 2.2
+        params = {'m': m, 'y': y}
+        _check(params)
+
+        m = self.rnd.randn(10, 3)
+        y = np.array([-2.1, -1, 4.3]).reshape(1, 3) / 10
+        params = {'m': m, 'y': y}
+        _check(params)
+
+        # The following tests pass with numpy version >= 1.10, but fail with 1.9
+        m = np.array([-2.1, -1, 4.3])
+        y = np.array([[3, 1.1, 0.12], [3, 1.1, 0.12]])
+        params = {'m': m, 'y': y}
+        _check(params)
+
+        for rowvar in False, True:
+            m = np.array([-2.1, -1, 4.3])
+            y = np.array([[3, 1.1, 0.12], [3, 1.1, 0.12], [4, 1.1, 0.12]])
+            params = {'m': m, 'y': y, 'rowvar': rowvar}
+            _check(params)
+
+    @unittest.skipUnless(np_version >= (1, 10), "cov needs Numpy 1.10+")
+    @needs_blas
+    def test_cov_exceptions(self):
+        pyfunc = cov
+        cfunc = jit(nopython=True)(pyfunc)
+
+        # Exceptions leak references
+        self.disable_leak_check()
+
+        def _check_m(m):
+            with self.assertTypingError() as raises:
+                cfunc(m)
+            self.assertIn('m has more than 2 dimensions', str(raises.exception))
+
+        m = np.ones((5, 6, 7))
+        _check_m(m)
+
+        m = ((((1, 2, 3), (2, 2, 2)),),)
+        _check_m(m)
+
+        m = [[[5, 6, 7]]]
+        _check_m(m)
+
+        def _check_y(m, y):
+            with self.assertTypingError() as raises:
+                cfunc(m, y=y)
+            self.assertIn('y has more than 2 dimensions', str(raises.exception))
+
+        m = np.ones((5, 6))
+        y = np.ones((5, 6, 7))
+        _check_y(m, y)
+
+        m = np.array((1.1, 2.2, 1.1))
+        y = (((1.2, 2.2, 2.3),),)
+        _check_y(m, y)
+
+        m = np.arange(3)
+        y = np.arange(4)
+        with self.assertRaises(ValueError) as raises:
+            cfunc(m, y=y)
+        self.assertIn('m and y must have the same number of variables', str(raises.exception))
+        # Numpy raises ValueError: all the input array dimensions except for the
+        # concatenation axis must match exactly.
+
+        m = np.array([-2.1, -1, 4.3]).reshape(1, 3)
+        with self.assertRaises(RuntimeError) as raises:
+            cfunc(m)
+        self.assertIn('2D array containing a single row is unsupported', str(raises.exception))
+
     @unittest.skipUnless(np_version >= (1, 12), "ediff1d needs Numpy 1.12+")
     def test_ediff1d_basic(self):
         pyfunc = ediff1d