# NumPy

Probably one of the most used Python libraries for data scientist.

NumPy adds the ability to avoid using loops called `vectorization` which is why NumPy is fast as f***.

In [1]:
import numpy as np

In [2]:
data = np.random.randn(2, 3)
data * data, data + data, data.shape, data.dtype, data.ndim

In [3]:
1 / data

In [4]:
# Empty array
np.empty(shape=(2, 3, 2))

In [5]:
# Zeros
np.zeros(10), np.zeros((3, 6))

In [6]:
# Array consists 0 - 14
np.arange(15)

## Slicing

In [7]:
arr = np.arange(10)

In [8]:
arr[5:8]

In [9]:
arr[:] = 10 # replace all values using `:`
arr

In [10]:
arr = np.array([[1, 7, 8], [4, 5, 6], [7, 9, 10]])

In [11]:
# naïve slicing
arr[0][2]

In [12]:
# same as (numpy feature)
arr[0, 2] 

In [13]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [14]:
arr3d[0]

In [15]:
arr3d[1, 0] # same as arr3d[1][0]

In [16]:
arr3d[:2, :1] # get first 2 elements, inside those elements get the 1st index

In [17]:
arr3d[:, 1:] # take all elements, inside those elements output elements above 1st index

In [18]:
arr3d[:2][1:]

### Boolean Indexing

In [19]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)

In [20]:
names[names == 'Bob'] # select elements that is `Bob`

In [21]:
names == 'Bob'

In [22]:
data[names == 'Bob'] 

In [23]:
data[[True, False, False, True, False, False, False]] # names == 'Bob'

### Fancy Indexing

Using integer arrays to describe indexing.

In [24]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr

In [25]:
arr[[4, 3, 0, 6]] # select a subset of rows

In [26]:
arr[[-3, -5, -7]] # select rows from the end

In [27]:
arr = np.arange(32).reshape((8, 4))
arr

In [28]:
arr[[1, 5, 6, 7]] # get 1st, 5th, 6th, 7th index columns

In [29]:
# [1, 5, 6, 7] -> just like above
arr[[1, 5, 6, 7], [0, 3, 1, 2]]

# [0, 3, 1, 2] 
# -> get the 1st index of 1st column,
# get the 3rd index of 5th column,
# and so on..

## Transposing and Swapping Axes

In [30]:
arr = np.arange(15).reshape((3, 5))
arr

In [31]:
arr.T # tranpose

In [32]:
np.dot(arr.T, arr)

In [33]:
arr = np.arange(16).reshape((2, 2, 4))
arr

In [34]:
arr.transpose((0, 1, 2))

In [35]:
arr.swapaxes(1, 2)

## Universal Functions

### ufuncs

In [36]:
arr = np.arange(10)
arr

In [37]:
np.sqrt(arr)

In [38]:
np.exp(arr)

#### unary ufancs

Takes 2 arrays then outputs a single array.

In [39]:
x = np.random.randn(8)
y = np.random.randn(8)
x, y

In [40]:
np.maximum(x, y)

## Array-Oriented Programming with Arrays

In [41]:
points = np.arange(-5, 5, 0.01)

In [42]:
xs, ys = np.meshgrid(points, points)
xs, ys

In [43]:
z = np.sqrt(xs ** 2 + ys ** 2)
z

In [44]:
import matplotlib.pyplot as plt
plt.imshow(z, cmap="gray")
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
plt.colorbar();

### Expressing Conditional Logic as Array Operations

In [45]:
x = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
y = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
condition = np.array([True, False, True, True, False])

In [46]:
# normal way
[(x if c else y) for x, y, c in zip(x, y, condition)]

In [47]:
# better approach
np.where(condition, x, y) # condition, if true: x, else: y

In [48]:
arr = np.random.randn(4, 4)
arr

In [49]:
arr > 0

In [50]:
np.where(arr > 0, 2, -2) # if element greater than 0, 2 else -2

In [51]:
np.where(arr > 0, 2, arr) # replace element > 0 with 2

### Mathematical and Statistical Methods

In [52]:
arr = np.random.randn(5, 4)
arr

In [53]:
arr.mean(), np.mean(arr), arr.sum()

In [54]:
arr.mean(axis=1), arr.sum(axis=0)

In [55]:
arr = np.arange(8)
arr

In [56]:
arr.cumsum()

In [57]:
arr = np.arange(9).reshape((3, 3))
arr

In [58]:
arr.cumsum(axis=0)

In [59]:
arr.cumprod(axis=1)

### Methods for Boolean Arrays

In [60]:
arr = np.random.randn(100)

In [61]:
(arr > 0).sum()

In [62]:
bools = np.array([False, False, True, False])
bools

In [63]:
bools.any() # check if bools has True

In [64]:
bools.all() # check if bools are all True

### Sorting

In [65]:
arr = np.random.randn(6)
arr

In [66]:
arr.sort()
arr

In [67]:
arr = np.random.randn(5, 3)
arr

In [68]:
arr.sort(1) # sort axis=1 (row)
arr

### Unique and Set Logic

In [69]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [70]:
np.unique(names) # same like sorted(set(names))

## File Input and Output with Arrays

In [71]:
arr = np.arange(10)

In [72]:
np.save('array', arr)

In [73]:
np.load('array.npy')

In [74]:
np.savez('arr.npz', a=arr, b=arr) # saving multiple arrays

In [76]:
arch = np.load('arr.npz')
arch['a'], arch['b']

In [77]:
np.savez_compressed('arr', a=arr, b=arr) # compressed

## Linear Algebra

In [87]:
x = np.arange(1, 7).astype('float32').reshape(2, 3)
y = np.array([[6., 23.], [-1, 7], [8, 9]])
x, y

In [88]:
x.dot(y) # same as np.dot(x, y) or x @ y

In [89]:
x @ y

In [90]:
from numpy.linalg import inv, qr

In [91]:
X = np.random.randn(5, 5)

In [94]:
mat = X.T.dot(X)
mat

In [95]:
inv(mat)

In [96]:
q, r = qr(mat)
q, r

## Pseudorandom Number Generation

In [100]:
np.random.seed(1234) # define seed for same reproduction (globally)

In [101]:
rng = np.random.RandomState(1234) # locally (apply to rng only)
rng.randn(10)