## 1 Data Structures
### 1.1 creating multi-dimensional arrays

In [2]:
import numpy as np
np.array([range(i, i+3) for i in [2,4,6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### 1.2 creating arrays from scratch

In [3]:
np.ones((3,5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [5]:
np.full((3,5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [6]:
np.random.randint(0,10,(3,3))

array([[3, 0, 0],
       [1, 1, 5],
       [9, 5, 2]])

### 1.3 Array slicing
One important thing to know about array slices is that they return views rather than copies of the array data. This is one area where NumPy array slicing differs from Python list slicing: in lists, slices will be copies.
#### 1.3.1 One-dimensional subarrays

In [8]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
x[::2] # every other element

array([0, 2, 4, 6, 8])

In [10]:
x[1::2] # every other element, starting at index 1

array([1, 3, 5, 7, 9])

In [11]:
x[::-1] # all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [12]:
x[5::-2] # reversed every other from index 5

array([5, 3, 1])

#### 1.3.2 Multi-dimensional subarrays

In [14]:
x2 = np.random.randint(10, size=(3, 4))
x2

array([[2, 6, 2, 9],
       [8, 5, 5, 5],
       [9, 1, 1, 3]])

In [15]:
x2[:2, :3]

array([[2, 6, 2],
       [8, 5, 5]])

In [16]:
x2[::-1, ::-1] # reverse the whole array

array([[3, 1, 1, 9],
       [5, 5, 5, 8],
       [9, 2, 6, 2]])

In [17]:
# To copy arrays, use
x2_copy = x2.copy()
x2_copy

array([[2, 6, 2, 9],
       [8, 5, 5, 5],
       [9, 1, 1, 3]])

### 1.4 Reshape arrays

In [19]:
x = np.array([1,2,3])
x[np.newaxis, :] # row vector

array([[1, 2, 3]])

In [20]:
x[:, np.newaxis] # column vector

array([[1],
       [2],
       [3]])

### 1.5 Array Concatenation and Splitting
#### 1.5.1 Concatenation of arrays

In [21]:
# can also concatenate more than two arrays at once
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y]) # default axis=0

array([1, 2, 3, 3, 2, 1])

In [22]:
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])

# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [23]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [26]:
# or you can use stack:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])

# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [27]:
# horizontally stack the arrays
y = np.array([[99],
              [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

#### 1.5.2 Splitting of arrays
The opposite of concatenation. Functions np.split, np.hsplit, np.vsplit

In [29]:
# pass a list of indices giving the split points
x = np.arange(10)
x1, x2, x3 = np.split(x, [3, 5])
x1, x2, x3

(array([0, 1, 2]), array([3, 4]), array([5, 6, 7, 8, 9]))

In [30]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [31]:
upper, lower = np.vsplit(grid, [2])
upper, lower

(array([[0, 1, 2, 3],
        [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]]))

In [32]:
left, right = np.hsplit(grid, [2])
left, right

(array([[ 0,  1],
        [ 4,  5],
        [ 8,  9],
        [12, 13]]), array([[ 2,  3],
        [ 6,  7],
        [10, 11],
        [14, 15]]))

## 2 Computation
### 2.1 Summing the values in an array

In [34]:
import numpy as np
L = np.random.random(100)
sum(L), np.sum(L) # np way is quicker

(43.55927281154038, 43.55927281154039)

### 2.2 Minimum and Maximum
#### 2.2.1 One dimensional aggregates

In [35]:
min(L), max(L)

(0.001091317090771704, 0.9751418432091619)

In [36]:
np.min(L), np.max(L) # quicker

(0.001091317090771704, 0.9751418432091619)

#### 2.2.2 Multi-dimensional aggregates
Also have NaN-safe Version(missing values)

In [37]:
M = np.random.random((3, 4))
M

array([[0.21518106, 0.31026366, 0.00234404, 0.63042886],
       [0.66666885, 0.43109356, 0.48426845, 0.81870422],
       [0.03957802, 0.08556169, 0.20431253, 0.54613003]])

In [38]:
M.sum()

4.434534960093447

it can take additional argument specifying the axis along which aggreagate is computed.

In [39]:
M.min(axis=0) # column

array([0.03957802, 0.08556169, 0.00234404, 0.54613003])

### 2.3 Broadcasting

In [41]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b # like matrix sum

array([5, 6, 7])

In [42]:
a + 5

array([5, 6, 7])

In [43]:
# when two array have no same size
a = np.arange(3)
b = np.arange(3)[:, np.newaxis]

print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


Broadcasting in NumPy follows a strict set of rules to determine the interaction between the two arrays:

1. Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.
2. Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
3. Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

In [45]:
# a duplicates its own vector to be 3*3
# b duplicates its own column to be 3*3
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [46]:
# example 1
M = np.ones((2, 3))
a = np.arange(3)
M + a

array([[1., 2., 3.],
       [1., 2., 3.]])

In [47]:
# example 2
a = np.arange(3).reshape((3, 1))
b = np.arange(3)
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [48]:
# example 3
M = np.ones((3, 2))
a = np.arange(3)
M + a

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

In [49]:
a[:, np.newaxis].shape
M + a[:, np.newaxis]

array([[1., 1.],
       [2., 2.],
       [3., 3.]])

### 2.4 Boolean arrays

In [50]:
rng = np.random.RandomState(0)
x = rng.randint(10, size=(3, 4))
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [51]:
# how many values less than 6?
np.count_nonzero(x < 6)

8

In [53]:
# same as above, since False is interpreted as 0, True 1
np.sum(x < 6) 

8

In [54]:
# how many values less than 6 in each row?
np.sum(x < 6, axis=1)

array([4, 2, 2])

In [55]:
# are there any values greater than 8?
np.any(x > 8)

True

In [56]:
# are all values less than 10?
np.all(x < 10)

True

## 3 Other fancy things
### 3.1 Indexing

In [2]:
import numpy as np
rand = np.random.RandomState(42)

x = rand.randint(100, size=10)
x

array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74])

In [3]:
# suppose we want to access three different elements
[x[3], x[7], x[2]]

[71, 86, 14]

In [5]:
# alternatively, we can pass a single list or array of indices
ind = [3, 7, 2]
x[ind]

array([71, 86, 14])

In [6]:
# the shape of results reflects the shape of the index arrays rather than the shape of the array being indexed.
ind = np.array([[3,7], 
               [4,5]])
x[ind]

array([[71, 86],
       [60, 20]])

In [7]:
# It also works in multi-dimensional
X = np.arange(12).reshape((3, 4))
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [8]:
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
X[row, col]

array([ 2,  5, 11])

In [9]:
X[2, [2,0,1]]

array([10,  8,  9])

In [10]:
X[1:, [2, 0, 1]]

array([[ 6,  4,  5],
       [10,  8,  9]])