numpy - n-dimensional array library for python
===
* one of the most fundamental libraries for scientific computing
* off the charts efficiency
* used by sklearn, pandas, etc.

In [None]:
import numpy as np

In [None]:
np.array(range(100))

In [None]:
np.array([[1, 2, 3], [4, 5, 6]])

In [None]:
# no -- must pass a sequence data type like tuple or list
np.array(1, 2, 3)

## negative indexing

In [None]:
a = np.array([1, 2, 3])

a[-1], a[-2], a[-3]

## placeholder arrays

In [None]:
a = np.zeros((5,10,2))

a.dtype, a.ndim, a.size

In [None]:
# identity matrix (looks like a one-hot encoding eh?)
np.eye(4)

In [None]:
np.zeros((4,10)), \
np.ones((4, 10)), \
np.full((4, 10), '\u2605') # my god it's full of stars

## the maths

In [None]:
a = np.array([1, 2, 3])
b = np.array([3, 2, 1])

# elementwise! operations
a * b, \
a - b, \
a + b, \
a / b, \
a + 1, \
2**a

In [None]:
# matrix multiplication
a = np.random.randint(5, size=(3,3))
b = np.random.randint(5, size=(3,3))
np.dot(a, b)

In [None]:
a = np.random.random((5,5))

a.min(), \
a.max(), \
a.mean()

working with numpy datasets
=====

In [None]:
a = np.array(range(100))
np.save('a.npy', a)
np.load(open('a.npy', 'rb'))

In [None]:
from sklearn.datasets import load_wine

wine = load_wine()
data = wine['data']

type(data)

In [None]:
data.shape

In [None]:
data[0]

## multi-dimensional slicing (aka my new band name)

In [None]:
data[:, 0]

In [None]:
data[:10, 0]

## fancy slicing

In [None]:
indexes = np.array([0,-1])
data[indexes]

In [None]:
# column 0 has "alcohol content" (ABV)
# let's find all wines with an ABV less than 12%
where = data[:,0] < 12

where, \
data[where][:,0], \
data[:,0].min()

## changing shapes

In [None]:
a = np.array([[1, 2, 3], [10, 20, 30]])
a.reshape((2,3))

# can also leave a dimension unspecified (-1) and it will be inferred
a.reshape((2,-1))

In [None]:
# transpose
a = np.array([[1, 2, 3], [10, 20, 30]])
a.T

In [None]:
a = np.array([[1, 2, 3], [10, 20, 30]])

# ravel returns a view
a.ravel()

# flatten returns a copy
a.flatten()

In [None]:
a = np.array([[1, 2, 3], [10, 20, 30]])
b = np.array([[100, 200, 300], [1000, 2000, 3000]])
np.concatenate([a, b])

## axes
axes describe the order of indexing into the array

In [None]:
a = np.array([[1, 2, 3], [10, 20, 30]])
b = np.array([[100, 200, 300], [1000, 2000, 3000]])

np.concatenate([a, b], axis=1)

In [None]:
np.concatenate([a, b], axis=0)

In [None]:
a = np.random.randint(10, size=(5, 5, 2))
print(a)
a.sum(axis=-1)