# Understand numpy indexing

In [259]:
#%matplotlib widget
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [260]:
import numpy as np
import matplotlib.pyplot as plt
import sympy

### A few ways to get test numpy arrays

In [261]:
np.arange(3), np.arange(4,8), np.arange(5,1,-2)

(array([0, 1, 2]), array([4, 5, 6, 7]), array([5, 3]))

For experiments with multiplication, arrays of primes may be helpful:

In [262]:
def arangep(n, starting_index=0):
    sympy.sieve.extend_to_no(starting_index + n)
    return np.array(sympy.sieve._list[starting_index:starting_index + n])

In [263]:
arangep(5), arangep(4,2)

(array([ 2,  3,  5,  7, 11]), array([ 5,  7, 11, 13]))

# Shapes and Indexing

Indexing [basics](https://numpy.org/devdocs/user/basics.indexing.html#basics-indexing) and [details](https://numpy.org/devdocs/reference/arrays.indexing.html#arrays-indexing)

In [264]:
a = np.arange(2*3*4).reshape(2,3,4); print(a)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


Indexing is row-major order (smallest-address-delta last) (C-style):

In [265]:
a[0,0,1], a[0,1,0], a[1,0,0]

(1, 4, 12)

In [266]:
a[0], a[0,0], a[0,0,0]

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]),
 array([0, 1, 2, 3]),
 0)

In [267]:
a[0], a[0][0], a[0][0][0]

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]),
 array([0, 1, 2, 3]),
 0)

In [268]:
a.flat[7:12]

array([ 7,  8,  9, 10, 11])

### Multiplicative-type operations

In [269]:
a = arangep(2)
b = arangep(2,2)
a,b

(array([2, 3]), array([5, 7]))

Binary scalar operations on vectors just map

In [270]:
a+1, a*2, a+b, a*b, b/a, b%a

(array([3, 4]),
 array([4, 6]),
 array([ 7, 10]),
 array([10, 21]),
 array([2.5       , 2.33333333]),
 array([1, 1]))

[`dot`](https://numpy.org/devdocs/reference/generated/numpy.dot.html) is "alternative matrix product with different broadcasting rules"

In [271]:
a.dot(b), b.dot(a)

(31, 31)

In [272]:
m = arangep(4,4).reshape(2,2); m

array([[11, 13],
       [17, 19]])

Matrix dot vector produces vector of dot products of rows of the matrix with the vector:

In [273]:
m.dot(a), a.dot(m[0]), a.dot(m[1]), m[0], m[1]

(array([61, 91]), 61, 91, array([11, 13]), array([17, 19]))

vector dot matrix produces vector of dot products of columns of matrix with the vector:

In [274]:
a.dot(m), a.dot(m[:,0]), a.dot(m[:,1]), m[:,0], m[:,1]

(array([73, 83]), 73, 83, array([11, 17]), array([13, 19]))

`@` is infix [matrix multiplication](https://numpy.org/devdocs/reference/generated/numpy.matmul.html#numpy.matmul)

In [275]:
a, m, m @ a, a @ m, m.T @ a

(array([2, 3]),
 array([[11, 13],
        [17, 19]]),
 array([61, 91]),
 array([73, 83]),
 array([73, 83]))

Right-multiplication by a matrix is equivalent to left-multiplication by its transpose:

In [276]:
a @ m, m.T @ a, a @ m.T, m @ a

(array([73, 83]), array([73, 83]), array([61, 91]), array([61, 91]))

## Einstein summation notation

Numpy provides [Einstein summation](https://mathworld.wolfram.com/EinsteinSummation.html) operations with [einsum](https://numpy.org/devdocs/reference/generated/numpy.einsum.html)
1. Repeated indices are implicitly summed over.
1. Each index can appear at most twice in any term.
1. Each term must contain identical non-repeated indices.

In [277]:
es = np.einsum

 $$a_{ik}a_{ij} \equiv \sum_{i} a_{ik}a_{ij}$$

$$M_{ij}v_j=\sum_{j}M_{ij}v_j$$

In [278]:
es('ij,j', m, a), es('ij,i', m, a)

(array([61, 91]), array([73, 83]))

In [279]:
es('j,ij', a, m), es('i,ij', a, m)

(array([61, 91]), array([73, 83]))

Scalar multiplication bei

In [280]:
all(es('ij,j', m, a) == es('j,ij', a, m))

True

### Lorem Ipsum

In [281]:
m2 = np.zeros((2,3), np.int); m2

array([[0, 0, 0],
       [0, 0, 0]])

In [282]:
m2[1] = np.arange(3); m2

array([[0, 0, 0],
       [0, 1, 2]])

In [283]:
m3 = arangep(8).reshape(4,2).T; m3

array([[ 2,  5, 11, 17],
       [ 3,  7, 13, 19]])

In [284]:
m3[:,0]

array([2, 3])

In [285]:
m @ m3[:,0]

array([61, 91])

In [286]:
h = m @ m3; h

array([[ 61, 146, 290, 434],
       [ 91, 218, 434, 650]])

In [287]:
b, b[...,np.newaxis]

(array([5, 7]),
 array([[5],
        [7]]))

In [288]:
h + b[...,np.newaxis]

array([[ 66, 151, 295, 439],
       [ 98, 225, 441, 657]])

## Convenient representations

Suppose you have many __x__ to run through a net. What is the convenient representation?

Consider a two-input net, e.g. the XOR net. We want to vectorize the evaluation of the net, and its backprop. In the case of XOR the entire input domain is four vectors: { (0,0), (0,1), (1,0), (1,1) }:

In [289]:
X = np.array([0,0, 0,1, 1,0, 1,1]).reshape(4,2); X

array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

This is a convenient ordering for input, with each input vector contiguous in memory. But it's not in the form of column vectors for the classical left-multiplication by a transformation matrix to yield a column matrix product.

In [290]:
m = np.arange(4).reshape(2,2) + 1; m

array([[1, 2],
       [3, 4]])

In [291]:
m @ np.array([1, 2]).reshape(2,1)

array([[ 5],
       [11]])

We can transpose the input before left-multiplying ...

In [292]:
m @ X.T

array([[0, 2, 1, 3],
       [0, 4, 3, 7]])

... and transpose it back:

In [293]:
Y = (m @ X.T).T; Y

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

Or we can be less pedantic about expressing the matrix multiply:

In [294]:
X @ m.T

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

In Einstein summation notation:

In [295]:
es('ij,kj', X, m)

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

If we really require the matrix on the left, we can index thus:

In [296]:
es('ij,kj->ki', m, X)

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

---
### What way is faster?

In [297]:
timeit(X @ m.T)

1.41 µs ± 14.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [298]:
timeit(es('ij,kj->ki', m, X))

3.03 µs ± 55.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [299]:
tm = m.T

In [300]:
timeit(X @ tm)

1.19 µs ± 41.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


No surprise, fastest is to have the transposed matrix ready. No surprise that the Einstein summation is slower, as it requires formulating a loop from the string of indexes. But what if the input data is much larger? E.g.

In [301]:
Xlarge = np.arange(2*10000).reshape(10000,2); Xlarge

array([[    0,     1],
       [    2,     3],
       [    4,     5],
       ...,
       [19994, 19995],
       [19996, 19997],
       [19998, 19999]])

In [302]:
timeit(Xlarge @ tm)

96.1 µs ± 592 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [303]:
timeit(es('ij,kj->ki', m, Xlarge))

160 µs ± 1.08 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


The parsing of the index string and formulating a plan is maybe 1.6 µs, but the loop is 

In [304]:
(156 + 1.4 - 3.01)/94.2

1.63895966029724

64% slower.

---

Adding another vector to each result vector of the multiply:

In [305]:
a, a + Y, Y + a

(array([2, 3]),
 array([[ 2,  3],
        [ 4,  7],
        [ 3,  6],
        [ 5, 10]]),
 array([[ 2,  3],
        [ 4,  7],
        [ 3,  6],
        [ 5, 10]]))

Applying a function to each result:

In [306]:
relu = np.vectorize(lambda x: max(0,x))

Try it out:

In [307]:
t = arangep(10).reshape(5,2) - 12; t

array([[-10,  -9],
       [ -7,  -5],
       [ -1,   1],
       [  5,   7],
       [ 11,  17]])

In [308]:
relu(t)

array([[ 0,  0],
       [ 0,  0],
       [ 0,  1],
       [ 5,  7],
       [11, 17]])

---

In [309]:
X @ m.T

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

In [310]:
es('ij,kj', X, m)

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

In [311]:
X, m

(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([[1, 2],
        [3, 4]]))

___

# Partials

## Preliminaries

A matrix __M__ left-multiplies a (column) vector __x__ to produce a (column) vector __y__:

$$ \mathbf{M} \mathbf{x} = \mathbf{y}$$

Using Einstein summation notation, the matrix multiply is

$$m_{ij}x_j\equiv\sum_{j}m_{ij}x_j$$

In that notation, our equation is:

$$ m_{ij}x_j = y_i $$

A `python` example:

In [312]:
M = arangep(4).reshape(2,2)
x = arangep(2,4)
# x = np.arange(2)+1
M,x

(array([[2, 3],
        [5, 7]]),
 array([11, 13]))

In [313]:
es = np.einsum

In [314]:
y = es('ij,j', M, x)
y

array([ 61, 146])

`numpy` treats a 1-d array as a row or a column vector for the matrix multiplication operator `@`, depending on what side of the matrix it appears, so we can also say

In [315]:
M @ x, all(M@x == y)

(array([ 61, 146]), True)

### Partial derivative of a matrix multiply

How does $y$ vary with $x$, with $M$ held constant? I.e. what is $\partial\mathbf{y}/\partial\mathbf{x}$?

"In general, the partial derivative of an [n-ary](http://en.wikipedia.org/wiki/Arity) function $f(x_1, \dots, x_n)$ in the direction $x_i$ at the point $(a_1, \dots, a_n)$ [is defined](https://en.wikipedia.org/w/index.php?title=Partial_derivative) to be:"

$$\frac{\partial f}{\partial x_i}(a_1, \ldots, a_n) = \lim_{h \to 0}\frac{f(a_1, \ldots, a_i+h,\ldots,a_n) - f(a_1,\ldots, a_i, \dots,a_n)}{h} \tag{2.1} \label{partial}$$

The matrix equation $\mathbf{M} \mathbf{x} = \mathbf{y}$ can be written as

$$\mathbf{M}\mathbf{x} = \mathbf{F}(\mathbf{x})=\sum_i f_i(x_1, x_2, \dots x_n) \mathbf{\hat{y}}_i=\mathbf{y} \tag{2.2} \label{mmul}$$

where

$$f_i(x_1, x_2, \dots x_n) = y_i \tag{2.3}$$

which, by definition of matrix multiplication, can be written

$$ f_i(x_1, x_2, \dots x_n) = \sum_{j=1}^{n} m_{ij}x_j \tag{2.4}$$

Substituting (2.4) into (2.1):

$$\frac{\partial f_i}{\partial x_j}(a_1, \ldots, a_n) = 
\lim_{h \to 0}\frac{
  \sum_{k=1}^{n} m_{ik}(a_k + \delta_{kj}h)
- \sum_{k=1}^{n} m_{ik}a_k}{h} = \lim_{h \to 0}\frac{m_{ij}h}{h} = m_{ij}
\tag{2.5}$$

Where $\delta_{ij}$ is the [Kronecker delta function](https://mathworld.wolfram.com/KroneckerDelta.html):

$$ \delta_{ij} =
    \begin{cases}
            1 &         \text{for } i=j,\\
            0 &         \text{for } i\neq j.
    \end{cases}
$$

Giving:

$$\frac{\partial f_i}{\partial x_j}(a_1, \ldots, a_n) = \lim_{h \to 0}\frac{\sum_{k=1}^{n} m_{ik}a_k + m_{ij}h - \sum_{k=1}^{n} m_{ik}a_k}{h} = \lim_{h \to 0}\frac{m_{ij}h}{h} = m_{ij} \tag{2.6}$$

In Einstein notation, $ f_i(x_1, x_2, \dots x_n) =  m_{ij}x_j = y_i $

Approximating numerically with our example:

In [316]:
(M@x - M@(x-np.array([0.001, 0]))) / 0.001, (M@x - M@(x-np.array([0, 0.001]))) / 0.001

(array([2., 5.]), array([3., 7.]))

# END
---

In [317]:
t =  np.array([0,0, 0,1, 1,0, 1,1]).reshape(4,2); t

array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

In [318]:
f = lambda a, b: 1 if (a > 0.5) ^ (b > 0.5) else 0

In [319]:
f(1,0), f(1,1)

(1, 0)

In [320]:
f(t[1,0], t[1,1])

1

In [321]:
[f(x[0], x[1]) for x in t]

[0, 1, 1, 0]

In [322]:
def exor(a, b):
    return 1 if (a > 0.5) ^ (b > 0.5) else 1

In [323]:
#np.vectorize(exor, signature='(i)->()')(t)

In [324]:
f2 = lambda v: 1 if (v[0] > 0.5) ^ (v[1] > 0.5) else 0

In [325]:
#np.vectorize(f2)(t)

In [326]:
np.vectorize(f2, signature='(i)->()')(t)

array([0, 1, 1, 0])

In [327]:
[a for a in t[0]]

[0, 0]

---

In [328]:
a = np.arange(25).reshape(5,5)
b = np.arange(5)
c = np.arange(6).reshape(2,3)

In [329]:
a,b,c

(array([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]]),
 array([0, 1, 2, 3, 4]),
 array([[0, 1, 2],
        [3, 4, 5]]))

In [330]:
np.einsum('ii', a)

60

In [331]:
np.einsum('ii->i', a)

array([ 0,  6, 12, 18, 24])

In [332]:
np.trace(a)

60

In [333]:
np.einsum('ji', a)

array([[ 0,  5, 10, 15, 20],
       [ 1,  6, 11, 16, 21],
       [ 2,  7, 12, 17, 22],
       [ 3,  8, 13, 18, 23],
       [ 4,  9, 14, 19, 24]])

In [334]:
np.einsum('ji,i', a, b)

array([ 30,  80, 130, 180, 230])

In [335]:
a.dot(b)

array([ 30,  80, 130, 180, 230])

In [336]:
a[:,0]

array([ 0,  5, 10, 15, 20])

In [337]:
a[:,0].dot(b)

150

In [338]:
a[:,1]

array([ 1,  6, 11, 16, 21])

In [339]:
d = np.arange(125).reshape(5,5,5)

In [340]:
np.einsum('iii', d)

310

In [341]:
sum([d[i][i][i] for i in range(5)])

310

In [342]:
np.einsum('iij',d)

array([300, 305, 310, 315, 320])

In [343]:
np.einsum('iiz', d)

array([300, 305, 310, 315, 320])

In [344]:
[sum([d[i][i][j] for i in range(5)]) for j in range(5)]

[300, 305, 310, 315, 320]

In [345]:
sum(a[:])

array([50, 55, 60, 65, 70])

In [346]:
a[0]

array([0, 1, 2, 3, 4])

In [347]:
timeit(np.einsum('iii', d))

2.71 µs ± 18.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [348]:
es = np.einsum

In [349]:
es('ijk,kji',d,d)

499250

In [350]:
timeit(es('iii', d))

2.68 µs ± 20.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [351]:
es('i,ij', b, a)

array([150, 160, 170, 180, 190])

In [352]:
es('ij', a)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [353]:
es('i', b)

array([0, 1, 2, 3, 4])

In [354]:
g = np.arange(4).reshape(2,2)

In [355]:
g

array([[0, 1],
       [2, 3]])

In [356]:
es('ij,jk',g,g)

array([[ 2,  3],
       [ 6, 11]])

In [357]:
g@g

array([[ 2,  3],
       [ 6, 11]])

In [358]:
g[:]

array([[0, 1],
       [2, 3]])

In [359]:
h = np.arange(2); h

array([0, 1])

In [360]:
h.dot(g)

array([2, 3])

In [361]:
es('i,ij', h, g)

array([2, 3])

In [362]:
g.dot(h)

array([1, 3])

In [363]:
es('ji,i', g, h)

array([1, 3])

In [364]:
es('ij,j', g, h)

array([1, 3])

In [365]:
g[0,1]

1

In [366]:
np.array(1)

array(1)

In [367]:
np.array([1])

array([1])

In [368]:
np.array(2)

array(2)

In [369]:
np.array([1,2])

array([1, 2])

In [370]:
np.array([2])

array([2])

In [371]:
np.array(0)

array(0)

In [372]:
np.array(0).shape

()

In [373]:
np.array([0]).shape

(1,)

In [374]:
np.array(0)+1

1

In [375]:
np.array([0])+1

array([1])

In [376]:
np.array(3).dot(np.array(5))

15

In [377]:
np.array([3]).dot(np.array(5))

array([15])

In [378]:
np.array([3]).dot(np.array([5]))

15

In [379]:
es('i,i', np.array([3]), np.array([5]))

15

In [380]:
es('i,i', np.array(3), np.array(5))

ValueError: einstein sum subscripts string contains too many subscripts for operand 0

In [None]:
es('', np.array(3), np.array(5))

In [None]:
int(np.array(3))

In [None]:
int(np.array([3]))

In [None]:
a = np.arange(4).reshape(2,2) + 1; print(a)

In [None]:
b = np.arange(2) + 1; print(b)

In [None]:
b.dot(a)

In [None]:
es('ij, jk', a, np.array([[1,0],[1,0]]))

In [None]:
a[:,0]

In [None]:
a[:,1]

In [None]:
sum(a[:,0])

In [None]:
a.T

In [None]:
es('...j->...', a)

In [None]:
a,b

In [None]:
a.dot(b), b.dot(a)

In [None]:
b.shape

In [None]:
c = b.reshape(2,1); c

In [None]:
b.dot(c), b@c

In [None]:
es('...i,i...', b, c)

In [None]:
timeit(b.dot(c))

In [None]:
timeit(b@c)

In [None]:
timeit(es('...i,i...', b, c))

In [None]:
timeit(es('i,i...', b, c))

In [None]:
a,b,c

In [None]:
a@b

In [None]:
a@c

In [None]:
b.shape, c.shape

In [None]:
es('ij,j...', a,c)

In [None]:
es('ij,j', a, b)

In [None]:
es('ij,j...', a, b)

In [None]:
es('ij,j->i', a, b)

In [None]:
es('ij,j->j', a, b)

In [None]:
es('ij,i...', a,c)

In [None]:
es('ij,j...', a, a)

In [None]:
es('...j,ij', a, a)

In [None]:
Xd = np.array([0,0,1,0,0,1,1,1]).reshape(2,4); Xd

In [None]:
a

In [None]:
a@Xd

In [None]:
Xd.reshape(4,2)

In [None]:
t = np.arange(8).reshape(4,2); t

In [None]:
a @ t.T

In [None]:
t.T

In [None]:
Ellipsis

In [None]:
b

In [None]:
b[:, np.newaxis]

In [None]:
t

In [None]:
t[np.newaxis]

In [None]:
x = np.arange(3); x

In [None]:
x[:,np.newaxis] + x[np.newaxis,:]

In [None]:
x[:,np.newaxis] * x[np.newaxis,:]

In [None]:
x[np.newaxis,:] * x[:,np.newaxis]

In [None]:
x[:,np.newaxis], x[np.newaxis,:]

In [None]:
t.dot(np.arange(2) + 1)

In [None]:
(np.arange(2) + 1).dot(t)

In [None]:
t,a

In [None]:
t.dot(a)

In [None]:
np.prime

In [None]:
import sympy

In [None]:
np.array(list(sympy.sieve.primerange(2000,2050)))

In [None]:
pa = lambda n: np.array([sympy.prime(i+1) for i in range(n)])

In [None]:
pa(50)

In [None]:
tp = pa(1000)

In [None]:
tp[-1]

In [None]:
tp2 = pa(10000)

In [None]:
np.sign(np.arange(5)-2)

In [None]:
np.array([1]*2).shape

In [None]:
np.ones(np.array([3,5]).shape)

In [386]:
M=np.arange(4).reshape(2,2)
b=np.arange(2)+1
x=np.arange(2)+5

In [389]:
M@x + b, (M@x) + b

(array([ 7, 30]), array([ 7, 30]))