In [1]:
%load_ext cython



In [2]:
import pythran
%load_ext pythran.magic

In [3]:
import numpy as np

# max len seq inner

## cython version

In [4]:
%%cython
# Author: Eric Larson
# 2014

from __future__ import absolute_import

import numpy as np
cimport numpy as np
cimport cython


# Fast inner loop of max_len_seq.
@cython.cdivision(True)  # faster modulo
@cython.boundscheck(False)  # designed to stay within bounds
@cython.wraparound(False)  # we don't use negative indexing
def max_len_seq_inner_cython(Py_ssize_t[::1] taps,
                       np.int8_t[::1] state,
                       Py_ssize_t nbits, Py_ssize_t length,
                       np.int8_t[::1] seq):
    # Here we compute MLS using a shift register, indexed using a ring buffer
    # technique (faster than using something like np.roll to shift)
    cdef Py_ssize_t n_taps = taps.shape[0]
    cdef Py_ssize_t idx = 0
    cdef np.int8_t feedback
    cdef Py_ssize_t i
    for i in range(length):
        feedback = state[idx]
        seq[i] = feedback
        for ti in range(n_taps):
            feedback ^= state[(taps[ti] + idx) % nbits]
        state[idx] = feedback
        idx = (idx + 1) % nbits
    # state must be rolled s.t. next run, when idx==0, it's in the right place
    return np.roll(state, -idx, axis=0)


## pythran version

In [5]:
%%pythran
import numpy as np

#pythran export max_len_seq_inner_pythran(int64 [], int8[], int, int, int8[])
def max_len_seq_inner_pythran(taps, state, nbits, length, seq):
    # Here we compute MLS using a shift register, indexed using a ring buffer
    # technique (faster than using something like np.roll to shift)
    n_taps = taps.shape[0]
    idx = 0
    for i in range(length):
        feedback = state[idx]
        seq[i] = feedback
        for tap in taps:
            feedback ^= state[(tap + idx) % nbits]
        state[idx] = feedback
        idx = (idx + 1) % nbits
    # state must be rolled s.t. next run, when idx==0, it's in the right place
    return np.roll(state, -idx, axis=0)


## benchmark

In [6]:
n = 300
args = np.arange(n, dtype=np.int64), np.arange(n, dtype=np.int8), n, n, np.arange(10, dtype=np.int8)

In [7]:
%timeit max_len_seq_inner_cython(*args)

804 µs ± 32 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
%timeit max_len_seq_inner_pythran(*args)

794 µs ± 22.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
