In [1]:
%load_ext cython
%load_ext fortranmagic

In [2]:
import numpy as np

In [3]:
def _get_windowed_view(x, win_len, stepsize):
    """
    Return a moving window view over the data

    Parameters
    ----------
    x : numpy.ndarray
        1- or 2-D array of signals to window. Windows occur along the 0 axis. MUST BE C-CONTIGUOUS.
    win_len : int
        Window length.
    stepsize : int
        Stride length/step size. How many places to step for the center of the windows being created.

    Returns
    -------
    x_win : numpy.ndarray
        2D array of windows of the original data, with shape (-1, L)
    """
    if not (x.ndim in [1, 2]):
        raise ValueError('Array cannot have more than 2 dimensions to window properly.')
    if not x.flags['C_CONTIGUOUS']:
        raise ValueError('Array must be C-contiguous to window properly.')
    if x.ndim == 1:
        nrows = ((x.size - win_len) // stepsize) + 1
        n = x.strides[0]
        return np.lib.stride_tricks.as_strided(x, shape=(nrows, win_len), strides=(stepsize * n, n), writeable=False)
    else:
        k = x.shape[1]
        nrows = ((x.shape[0] - win_len) // stepsize) + 1
        n = x.strides[1]

        new_shape = (nrows, win_len, k)
        new_strides = (stepsize * k * n, k * n, n)
        return np.lib.stride_tricks.as_strided(x, shape=new_shape, strides=new_strides, writeable=False)

In [4]:
%%fortran -vvv

subroutine mean_f(n, p, x, L, S, result)
    implicit none
    integer(8), intent(in) :: n, p, L, S
    real(8), intent(in) :: x(p, n)  ! backwards to c/python
    real(8), intent(out) :: result(p, (n-L)/S + 1)
!f2py intent(hide) :: n, p
    ! local 
    real(8) :: mn(p, n)
    integer(8) :: i, j
    
    mn(:, 1) = x(:, 1)
    do j=2, n
        do i=1, p
            mn(i, j) = mn(i, j-1) + x(i, j)
        end do
    end do
    
    i = 2_8
    result(:, 1) = mn(:, L)
    do j=L+S, n, S
        result(:, i) = mn(:, j) - mn(:, j-L)
        i = i + 1
    end do
    result = result / L
end subroutine mean_f

Running...
   /Users/adamol/opt/miniconda3/envs/pfymu/bin/python -m numpy.f2py -m _fortran_magic_9af1f6492bfd0eb6d06aa6c137696e6e -c /Users/adamol/.ipython/fortran/_fortran_magic_9af1f6492bfd0eb6d06aa6c137696e6e.f90
running build
running config_cc
unifing config_cc, config, build_clib, build_ext, build commands --compiler options
running config_fc
unifing config_fc, config, build_clib, build_ext, build commands --fcompiler options
running build_src
build_src
building extension "_fortran_magic_9af1f6492bfd0eb6d06aa6c137696e6e" sources
f2py options: []
f2py:> /var/folders/vp/zrt93yf92md23msl9phtgz6sfvz394/T/tmp15ydtv98/src.macosx-10.9-x86_64-3.8/_fortran_magic_9af1f6492bfd0eb6d06aa6c137696e6emodule.c
creating /var/folders/vp/zrt93yf92md23msl9phtgz6sfvz394/T/tmp15ydtv98/src.macosx-10.9-x86_64-3.8
Reading fortran codes...
	Reading file '/Users/adamol/.ipython/fortran/_fortran_magic_9af1f6492bfd0eb6d06aa6c137696e6e.f90' (format:free)
Post-processing...
	Block: _fortran_magic_9af1f6492bfd0eb

In file included from In file included from /var/folders/vp/zrt93yf92md23msl9phtgz6sfvz394/T/tmp15ydtv98/src.macosx-10.9-x86_64-3.8/fortranobject.c:2:
In file included from /var/folders/vp/zrt93yf92md23msl9phtgz6sfvz394/T/tmp15ydtv98/src.macosx-10.9-x86_64-3.8/fortranobject.h:13:
In file included from /Users/adamol/opt/miniconda3/envs/pfymu/lib/python3.8/site-packages/numpy/core/include/numpy/arrayobject.h:4:
In file included from /Users/adamol/opt/miniconda3/envs/pfymu/lib/python3.8/site-packages/numpy/core/include/numpy/ndarrayobject.h:12:
In file included from /Users/adamol/opt/miniconda3/envs/pfymu/lib/python3.8/site-packages/numpy/core/include/numpy/ndarraytypes.h/var/folders/vp/zrt93yf92md23msl9phtgz6sfvz394/T/tmp15ydtv98/src.macosx-10.9-x86_64-3.8/_fortran_magic_9af1f6492bfd0eb6d06aa6c137696e6emodule.c:16::
In file included from /var/folders/vp/zrt93yf92md23msl9phtgz6sfvz394/T/tmp15ydtv98/src.macosx-10.9-x86_64-3.8/fortranobject.h:183213:
:

In file included from /Users/adamol/o


Ok. The following fortran objects are ready to use: mean_f


In [5]:
def mean_p(x, L, S):
    xw = _get_windowed_view(x, L, S)
    return np.mean(xw, axis=-2)

In [6]:
N = 1500000
x = np.random.rand(N, 3)

In [7]:
np.mean(x, axis=0)

array([0.49991618, 0.49977127, 0.50017767])

In [8]:
mean_f(x.T, x.shape[0], x.shape[0])

array([[0.49991618],
       [0.49977127],
       [0.50017767]])

In [9]:
%timeit np.mean(x, axis=0)
%timeit mean_f(x.T, N, N)

25.9 ms ± 774 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
13.5 ms ± 716 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
%timeit mean_p(x, 500, 500)
%timeit mean_f(x.T, 500, 500)

26.6 ms ± 796 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
14 ms ± 503 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
