# Understand numpy indexing

In [1]:
#%matplotlib widget
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import sympy

## A few ways to get test numpy arrays

In [3]:
np.arange(3), np.arange(4,8), np.arange(5,1,-2)

(array([0, 1, 2]), array([4, 5, 6, 7]), array([5, 3]))

For experiments with multiplication, arrays of primes may be helpful:

In [4]:
def arangep(n, starting_index=0):
    sympy.sieve.extend_to_no(starting_index + n)
    return np.array(sympy.sieve._list[starting_index:starting_index + n])

In [5]:
arangep(5), arangep(4,2)

(array([ 2,  3,  5,  7, 11]), array([ 5,  7, 11, 13]))

# Shapes and Indexing

Indexing [basics](https://numpy.org/devdocs/user/basics.indexing.html#basics-indexing) and [details](https://numpy.org/devdocs/reference/arrays.indexing.html#arrays-indexing)

In [6]:
a = np.arange(2*3*4).reshape(2,3,4); print(a)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


Indexing is row-major order (smallest-address-delta last) (C-style):

In [7]:
a[0,0,1], a[0,1,0], a[1,0,0]

(1, 4, 12)

In [8]:
a[0], a[0,0], a[0,0,0]

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]),
 array([0, 1, 2, 3]),
 0)

In [9]:
a[0], a[0][0], a[0][0][0]

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]),
 array([0, 1, 2, 3]),
 0)

In [10]:
a.flat[7:12]

array([ 7,  8,  9, 10, 11])

# Multiplicative-type operations

In [11]:
a = arangep(2)
b = arangep(2,2)
a,b

(array([2, 3]), array([5, 7]))

Binary scalar operations on vectors just map

In [12]:
a+1, a*2, a+b, a*b, b/a, b%a

(array([3, 4]),
 array([4, 6]),
 array([ 7, 10]),
 array([10, 21]),
 array([2.5       , 2.33333333]),
 array([1, 1]))

[`dot`](https://numpy.org/devdocs/reference/generated/numpy.dot.html) is "alternative matrix product with different broadcasting rules"

In [13]:
a.dot(b), b.dot(a)

(31, 31)

In [14]:
m = arangep(4,4).reshape(2,2); m

array([[11, 13],
       [17, 19]])

## Dot product

Matrix dot vector produces vector of dot products of rows of the matrix with the vector:

In [15]:
m.dot(a), a.dot(m[0]), a.dot(m[1]), m[0], m[1]

(array([61, 91]), 61, 91, array([11, 13]), array([17, 19]))

vector dot matrix produces vector of dot products of columns of matrix with the vector:

In [16]:
a.dot(m), a.dot(m[:,0]), a.dot(m[:,1]), m[:,0], m[:,1]

(array([73, 83]), 73, 83, array([11, 17]), array([13, 19]))

`@` is infix [matrix multiplication](https://numpy.org/devdocs/reference/generated/numpy.matmul.html#numpy.matmul)

In [17]:
a, m, m @ a, a @ m, m.T @ a

(array([2, 3]),
 array([[11, 13],
        [17, 19]]),
 array([61, 91]),
 array([73, 83]),
 array([73, 83]))

Right-multiplication by a matrix is equivalent to left-multiplication by its transpose:

In [18]:
a @ m, m.T @ a, a @ m.T, m @ a

(array([73, 83]), array([73, 83]), array([61, 91]), array([61, 91]))

### "Vectorizing" the dot product
e.g. when we batch inputs to the network. \
Imagine `a` and `b` are both to be run through a network which does multiplication by `m`

In [19]:
c = 2*a + b
a, b, c, a @ m, b @ m, c @ m

(array([2, 3]),
 array([5, 7]),
 array([ 9, 13]),
 array([73, 83]),
 array([174, 198]),
 array([320, 364]))

The convenient representation *(see below)*, is for the input vectors to be contiguous and adjacent in memory, as would happen if you read them into a memoryview of an array, and reshaped it appropriately, e.g.:

In [20]:
X = np.array([2,3, 5,7, 9,13]).reshape(-1, 2); X

array([[ 2,  3],
       [ 5,  7],
       [ 9, 13]])

In [21]:
X @ m

array([[ 73,  83],
       [174, 198],
       [320, 364]])

In [22]:
X @ m + np.array([1000, 2000])

array([[1073, 2083],
       [1174, 2198],
       [1320, 2364]])

In [23]:
X.shape

(3, 2)

## Einstein summation notation

Numpy provides [Einstein summation](https://mathworld.wolfram.com/EinsteinSummation.html) operations with [einsum](https://numpy.org/devdocs/reference/generated/numpy.einsum.html)
1. Repeated indices are implicitly summed over.
1. Each index can appear at most twice in any term.
1. Each term must contain identical non-repeated indices.

In [24]:
es = np.einsum

 $$a_{ik}a_{ij} \equiv \sum_{i} a_{ik}a_{ij}$$

$$M_{ij}v_j=\sum_{j}M_{ij}v_j$$

In [25]:
es('ij,j', m, a), es('ij,i', m, a)

(array([61, 91]), array([73, 83]))

In [26]:
es('j,ij', a, m), es('i,ij', a, m)

(array([61, 91]), array([73, 83]))

Scalar multiplication bei

In [27]:
all(es('ij,j', m, a) == es('j,ij', a, m))

True

### Lorem Ipsum

In [28]:
m2 = np.zeros((2,3), np.int); m2

array([[0, 0, 0],
       [0, 0, 0]])

In [29]:
m2[1] = np.arange(3); m2

array([[0, 0, 0],
       [0, 1, 2]])

In [30]:
m3 = arangep(8).reshape(4,2).T; m3

array([[ 2,  5, 11, 17],
       [ 3,  7, 13, 19]])

In [31]:
m3[:,0]

array([2, 3])

In [32]:
m @ m3[:,0]

array([61, 91])

In [33]:
h = m @ m3; h

array([[ 61, 146, 290, 434],
       [ 91, 218, 434, 650]])

In [34]:
b, b[...,np.newaxis]

(array([5, 7]),
 array([[5],
        [7]]))

In [35]:
h + b[...,np.newaxis]

array([[ 66, 151, 295, 439],
       [ 98, 225, 441, 657]])

## Convenient representations

Suppose you have many __x__ to run through a net. What is the convenient representation?

Consider a two-input net, e.g. the XOR net. We want to vectorize the evaluation of the net, and its backprop. In the case of XOR the entire input domain is four vectors: { (0,0), (0,1), (1,0), (1,1) }:

In [36]:
X = np.array([0,0, 0,1, 1,0, 1,1]).reshape(-1,2); X

array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

This is a convenient ordering for input, with each input vector contiguous in memory. But it's not in the form of column vectors for the classical left-multiplication by a transformation matrix to yield a column matrix product.

In [37]:
m = np.arange(4).reshape(2,2) + 1; m

array([[1, 2],
       [3, 4]])

In [38]:
m @ np.array([1, 2]).reshape(2,1)

array([[ 5],
       [11]])

We can transpose the input before left-multiplying ...

In [39]:
m @ X.T

array([[0, 2, 1, 3],
       [0, 4, 3, 7]])

... and transpose it back:

In [40]:
Y = (m @ X.T).T; Y

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

Or we can be less pedantic about expressing the matrix multiply:

In [41]:
X @ m.T

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

In Einstein summation notation:

In [42]:
es('ij,kj', X, m)

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

If we really require the matrix on the left, we can index thus:

In [43]:
es('ij,kj->ki', m, X)

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

---
### What way is faster?

In [44]:
timeit(X @ m.T)

1.05 µs ± 5.03 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [45]:
timeit(es('ij,kj->ki', m, X))

2.16 µs ± 9.84 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [46]:
tm = m.T

In [47]:
timeit(X @ tm)

897 ns ± 9.11 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


No surprise, fastest is to have the transposed matrix ready. No surprise that the Einstein summation is slower, as it requires formulating a loop from the string of indexes. But what if the input data is much larger? E.g.

In [48]:
Xlarge = np.arange(2*10000).reshape(10000,2); Xlarge

array([[    0,     1],
       [    2,     3],
       [    4,     5],
       ...,
       [19994, 19995],
       [19996, 19997],
       [19998, 19999]])

In [49]:
timeit(Xlarge @ tm)

86.9 µs ± 142 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [50]:
timeit(es('ij,kj->ki', m, Xlarge))

126 µs ± 126 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


The parsing of the index string and formulating a plan is maybe 1.6 µs, but the loop is 

In [51]:
(156 + 1.4 - 3.01)/94.2

1.63895966029724

64% slower.

---

Adding another vector to each result vector of the multiply:

In [52]:
a, a + Y, Y + a

(array([2, 3]),
 array([[ 2,  3],
        [ 4,  7],
        [ 3,  6],
        [ 5, 10]]),
 array([[ 2,  3],
        [ 4,  7],
        [ 3,  6],
        [ 5, 10]]))

Applying a function to each result:

In [53]:
relu = np.vectorize(lambda x: max(0,x))

Try it out:

In [54]:
t = arangep(10).reshape(5,2) - 12; t

array([[-10,  -9],
       [ -7,  -5],
       [ -1,   1],
       [  5,   7],
       [ 11,  17]])

In [55]:
relu(t)

array([[ 0,  0],
       [ 0,  0],
       [ 0,  1],
       [ 5,  7],
       [11, 17]])

---

In [56]:
X @ m.T

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

In [57]:
es('ij,kj', X, m)

array([[0, 0],
       [2, 4],
       [1, 3],
       [3, 7]])

In [58]:
X, m

(array([[0, 0],
        [0, 1],
        [1, 0],
        [1, 1]]),
 array([[1, 2],
        [3, 4]]))

___

### Outer product

In [59]:
a, b = arangep(2), arangep(3,2)
a, b

(array([2, 3]), array([ 5,  7, 11]))

In [60]:
es('i,j', a, b), es('j,i', a, b), np.outer(a, b), np.outer(b, a)

(array([[10, 14, 22],
        [15, 21, 33]]),
 array([[10, 15],
        [14, 21],
        [22, 33]]),
 array([[10, 14, 22],
        [15, 21, 33]]),
 array([[10, 15],
        [14, 21],
        [22, 33]]))

In [61]:
a, b = arangep(4).reshape(2,2), arangep(4,4).reshape(2,2)
a,b

(array([[2, 3],
        [5, 7]]),
 array([[11, 13],
        [17, 19]]))

$$ \sum_j{outer(a[:,j],b[:,j])}$$

In [62]:
es('...i,j', a, b)

ValueError: operand has more dimensions than subscripts given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.

# Vectorized dot product

In [76]:
t = np.arange(4*3).reshape(-1,3)
t

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [77]:
[t[i].dot(t[i]) for i in range(4)]

[5, 50, 149, 302]

In [78]:
es('...i,...i', t, t)

array([  5,  50, 149, 302])

In [81]:
es('...i,...i', t[1:], t[:-1])

array([ 14,  86, 212])

`np.arccos` range is $[0,\pi)$. Want to convert to $[-\pi/2, \pi/2)$.

In [105]:
a = np.arange(-5,6)*np.pi/5
ca = np.cos(a)
aca = np.arccos(ca)
a, ca, aca

(array([-3.14159265, -2.51327412, -1.88495559, -1.25663706, -0.62831853,
         0.        ,  0.62831853,  1.25663706,  1.88495559,  2.51327412,
         3.14159265]),
 array([-1.        , -0.80901699, -0.30901699,  0.30901699,  0.80901699,
         1.        ,  0.80901699,  0.30901699, -0.30901699, -0.80901699,
        -1.        ]),
 array([3.14159265, 2.51327412, 1.88495559, 1.25663706, 0.62831853,
        0.        , 0.62831853, 1.25663706, 1.88495559, 2.51327412,
        3.14159265]))

In [97]:
b = a >= (np.pi/2)
b

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [98]:
b * np.pi/2

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       1.57079633, 1.57079633, 1.57079633, 1.57079633, 1.57079633])

In [99]:
a[a>=np.pi/2] -= np.pi
a

array([ 0.        ,  0.31415927,  0.62831853,  0.9424778 ,  1.25663706,
       -1.57079633, -1.25663706, -0.9424778 , -0.62831853, -0.31415927])

In [111]:
a = np.arange(8)
a, a%3, a%3>0

(array([0, 1, 2, 3, 4, 5, 6, 7]),
 array([0, 1, 2, 0, 1, 2, 0, 1]),
 array([False,  True,  True, False,  True,  True, False,  True]))

In [112]:
a%3>0

SyntaxError: invalid syntax (<ipython-input-112-bbf543b248a7>, line 1)

# END
---

In [None]:
t =  np.array([0,0, 0,1, 1,0, 1,1]).reshape(4,2); t

In [None]:
f = lambda a, b: 1 if (a > 0.5) ^ (b > 0.5) else 0

In [None]:
f(1,0), f(1,1)

In [None]:
f(t[1,0], t[1,1])

In [None]:
[f(x[0], x[1]) for x in t]

In [None]:
def exor(a, b):
    return 1 if (a > 0.5) ^ (b > 0.5) else 1

In [None]:
#np.vectorize(exor, signature='(i)->()')(t)

In [None]:
f2 = lambda v: 1 if (v[0] > 0.5) ^ (v[1] > 0.5) else 0

In [None]:
#np.vectorize(f2)(t)

In [None]:
np.vectorize(f2, signature='(i)->()')(t)

In [None]:
[a for a in t[0]]

---

In [None]:
a = np.arange(25).reshape(5,5)
b = np.arange(5)
c = np.arange(6).reshape(2,3)

In [None]:
a,b,c

In [None]:
np.einsum('ii', a)

In [None]:
np.einsum('ii->i', a)

In [None]:
np.trace(a)

In [None]:
np.einsum('ji', a)

In [None]:
np.einsum('ji,i', a, b)

In [None]:
a.dot(b)

In [None]:
a[:,0]

In [None]:
a[:,0].dot(b)

In [None]:
a[:,1]

In [None]:
d = np.arange(125).reshape(5,5,5)

In [None]:
np.einsum('iii', d)

In [None]:
sum([d[i][i][i] for i in range(5)])

In [None]:
np.einsum('iij',d)

In [None]:
np.einsum('iiz', d)

In [None]:
[sum([d[i][i][j] for i in range(5)]) for j in range(5)]

In [None]:
sum(a[:])

In [None]:
a[0]

In [None]:
timeit(np.einsum('iii', d))

In [None]:
es = np.einsum

In [None]:
es('ijk,kji',d,d)

In [None]:
timeit(es('iii', d))

In [None]:
es('i,ij', b, a)

In [None]:
es('ij', a)

In [None]:
es('i', b)

In [None]:
g = np.arange(4).reshape(2,2)

In [None]:
g

In [None]:
es('ij,jk',g,g)

In [None]:
g@g

In [None]:
g[:]

In [None]:
h = np.arange(2); h

In [None]:
h.dot(g)

In [None]:
es('i,ij', h, g)

In [None]:
g.dot(h)

In [None]:
es('ji,i', g, h)

In [None]:
es('ij,j', g, h)

In [None]:
g[0,1]

In [None]:
np.array(1)

In [None]:
np.array([1])

In [None]:
np.array(2)

In [None]:
np.array([1,2])

In [None]:
np.array([2])

In [None]:
np.array(0)

In [None]:
np.array(0).shape

In [None]:
np.array([0]).shape

In [None]:
np.array(0)+1

In [None]:
np.array([0])+1

In [None]:
np.array(3).dot(np.array(5))

In [None]:
np.array([3]).dot(np.array(5))

In [None]:
np.array([3]).dot(np.array([5]))

In [None]:
es('i,i', np.array([3]), np.array([5]))

In [None]:
#es('i,i', np.array(3), np.array(5))

In [None]:
es('', np.array(3), np.array(5))

In [None]:
int(np.array(3))

In [None]:
int(np.array([3]))

In [None]:
a = np.arange(4).reshape(2,2) + 1; print(a)

In [None]:
b = np.arange(2) + 1; print(b)

In [None]:
b.dot(a)

In [None]:
es('ij, jk', a, np.array([[1,0],[1,0]]))

In [None]:
a[:,0]

In [None]:
a[:,1]

In [None]:
sum(a[:,0])

In [None]:
a.T

In [None]:
es('...j->...', a)

In [None]:
a,b

In [None]:
a.dot(b), b.dot(a)

In [None]:
b.shape

In [None]:
c = b.reshape(2,1); c

In [None]:
b.dot(c), b@c

In [None]:
es('...i,i...', b, c)

In [None]:
timeit(b.dot(c))

In [None]:
timeit(b@c)

In [None]:
timeit(es('...i,i...', b, c))

In [None]:
timeit(es('i,i...', b, c))

In [None]:
a,b,c

In [None]:
a@b

In [None]:
a@c

In [None]:
b.shape, c.shape

In [None]:
es('ij,j...', a,c)

In [None]:
es('ij,j', a, b)

In [None]:
es('ij,j...', a, b)

In [None]:
es('ij,j->i', a, b)

In [None]:
es('ij,j->j', a, b)

In [None]:
es('ij,i...', a,c)

In [None]:
es('ij,j...', a, a)

In [None]:
es('...j,ij', a, a)

In [None]:
Xd = np.array([0,0,1,0,0,1,1,1]).reshape(2,4); Xd

In [None]:
a

In [None]:
a@Xd

In [None]:
Xd.reshape(4,2)

In [None]:
t = np.arange(8).reshape(4,2); t

In [None]:
a @ t.T

In [None]:
t.T

In [None]:
Ellipsis

In [None]:
b

In [None]:
b[:, np.newaxis]

In [None]:
t

In [None]:
t[np.newaxis]

In [None]:
x = np.arange(3); x

In [None]:
x[:,np.newaxis] + x[np.newaxis,:]

In [None]:
x[:,np.newaxis] * x[np.newaxis,:]

In [None]:
x[np.newaxis,:] * x[:,np.newaxis]

In [None]:
x[:,np.newaxis], x[np.newaxis,:]

In [None]:
t.dot(np.arange(2) + 1)

In [None]:
(np.arange(2) + 1).dot(t)

In [None]:
t,a

In [None]:
t.dot(a)

In [None]:
np.prime

In [None]:
import sympy

In [None]:
np.array(list(sympy.sieve.primerange(2000,2050)))

In [None]:
pa = lambda n: np.array([sympy.prime(i+1) for i in range(n)])

In [None]:
pa(50)

In [None]:
tp = pa(1000)

In [None]:
tp[-1]

In [None]:
tp2 = pa(10000)

In [None]:
np.sign(np.arange(5)-2)

In [None]:
np.array([1]*2).shape

In [None]:
np.ones(np.array([3,5]).shape)

In [None]:
M=np.arange(4).reshape(2,2)
b=np.arange(2)+1
x=np.arange(2)+5

In [None]:
M@x + b, (M@x) + b

In [104]:
a, b = arangep(2), arangep(3,2)
a, b

(array([2, 3]), array([ 5,  7, 11]))

In [106]:
t = es('i,j', a, b)
t

array([[10, 14, 22],
       [15, 21, 33]])

In [114]:
t[0].dot(t[0]), t[1].dot(t[1]), sum([t[0].dot(t[0]), t[1].dot(t[1])]), es('ij,ij', t, t)

(780, 1755, 2535, 2535)

In [119]:
#es('ij,ij', t, t)
np.atleast_2d(t)

array([[10, 14, 22],
       [15, 21, 33]])