In [1]:
from IPython.display import Image, IFrame
import numpy as np

# "Understanding Numpy's 'as.strided': Advanced array manipulation"

### Nicholas A. Del Grosso
### 11.07.2018

## Acknowledgment: Jaime Fernandez's 2017 PyData Barcelona Workshop
"Taking Numpy in Stride"
https://github.com/jaimefrio/pydatabcn2017

![](images/jaime_github.png)

## What is an Array?

In [3]:
Image(url='https://1.bp.blogspot.com/-DtHyrn-FvoI/Woz9IpBsPaI/AAAAAAAADDA/d_xjO3bAtKIBKcHDKkqNmVmDIoDB14DKgCLcBGAs/s640/1.JPG', width=800)

## Quick Review: Arrays in Numpy

In [4]:
np.arange(10, 16)

array([10, 11, 12, 13, 14, 15])

## Shape, DType and Strides

In [74]:
np.arange(6)

array([0, 1, 2, 3, 4, 5])

## "Reshaping" an Array: 1D --> ND Arrays and the "axis" keyword

In [76]:
np.arange(6).reshape(3, 2)

array([[0, 1],
       [2, 3],
       [4, 5]])

## "Views" vs "Copies" of an Array

In [99]:
aa = np.arange(6)
aa

array([0, 1, 2, 3, 4, 5])

## Multidimensional Arrays are Just "Views" of One-Dimensional Arrays

In [59]:
Image(url='https://i.stack.imgur.com/QFc9M.png', width=700)

## Manipulating Array Views in Numpy: Order, Base, Transpose

In [52]:
aa = np.arange(10, 16).reshape(3, 2)
aa

array([[10, 11],
       [12, 13],
       [14, 15]])

In [23]:
aa.base

array([10, 11, 12, 13, 14, 15])

In [43]:
np.isfortran(aa)

False

## Transposes and Order

In [195]:
IFrame(src='https://docs.scipy.org/doc/numpy/reference/routines.array-manipulation.html#transpose-like-operations', width=600, height=250)

## Order and Performance

In [93]:
aa = np.arange(1e6).reshape(1000, 1000, order='C')
%timeit aa.mean(axis=-1)

371 µs ± 1.57 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [94]:
%timeit aa.mean(axis=0)

525 µs ± 6.24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## Shape, Strides, and np.info()

In [98]:
aa = np.arange(6).reshape(3, 2)
aa

array([[0, 1],
       [2, 3],
       [4, 5]])

## Shape, Stride, and Dtype can all be directly manipulated

In [102]:
aa = np.arange(6).reshape(3, 2)


## np.array() vs np.ndarray(): factory function vs instance constructor

In [105]:
bytes([10, 12, 14])

b'\n\x0c\x0e'

In [111]:
np.ndarray(buffer=bytes([10, 12, 14]), shape=3, dtype=np.uint8)

array([10, 12, 14], dtype=uint8)

## np.ndarray() creates flexible views of an ndarray

In [121]:
aa = np.arange(6)
np.ndarray(buffer=aa, shape=aa.shape, dtype=aa.dtype, strides=aa.strides)

array([0, 1, 2, 3, 4, 5])

## as_strided() is a simpler-but-dangerous np.ndarray() alternative

In [13]:
from numpy.lib.stride_tricks import as_strided

In [14]:
as_strided(aa, shape=aa.shape)
as_strided(aa, shape=aa.shape, strides=aa.strides)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

# Putting Strides into Practice: Exercises

## How Much Memory Does this Array Take Up (in Bytes)?

In [8]:
aa = np.array([100], dtype=np.uint8)
as_strided(aa, shape=(1000000,), strides=(0,))

array([100, 100, 100, ..., 100, 100, 100], dtype=uint8)

## Make a Reversed View of an Array

In [169]:
aa = np.arange(8)
np.ndarray(buffer=aa, strides=-8, offset=8*len(aa)-8, shape=aa.shape, dtype=aa.dtype)

array([7, 6, 5, 4, 3, 2, 1, 0])

## Transpose an Array

In [8]:
aa = np.arange(15).reshape(5, 3)
aa

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [15]:
as_strided(aa, shape=aa.shape[::-1], strides=aa.strides[::-1])

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

## Make a Repeated 2D View of an Array

In [16]:
aa = np.arange(8)
as_strided(aa, shape=(5, 8), strides=(0, 8))

array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]])

## Perform a Windowed Average over an Array

In [26]:
aa = np.arange(9)
as_strided(aa, shape=(7, 3), strides=(8, 8))

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4],
       [3, 4, 5],
       [4, 5, 6],
       [5, 6, 7],
       [6, 7, 8]])

## Summary
  - The **axis** keyword allows fast operations across array dimensions
  - Reshaping, transposing, slicing, etc are all modified views of a 1D array
  - **Strides** are the steps, in bytes, the reader takes to interpret an array.
  - **np.ndarray()** and **as_strided** all produce views of an array with set-able parameters.

## Thank You for Your Attention!  Any Questions?

![](images/nick_github.png)