In [0]:
import numpy as np

### Array manipulation

In [0]:
arr = np.arange(9)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [0]:
arr2D_1 = arr.reshape(3,3)
arr2D_1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [0]:
arr2D_2 = np.arange(10,19).reshape(3,3)
arr2D_2

array([[10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [0]:
np.concatenate((arr2D_1, arr2D_2))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [0]:
np.concatenate((arr2D_1, arr2D_2), axis=1)

array([[ 0,  1,  2, 10, 11, 12],
       [ 3,  4,  5, 13, 14, 15],
       [ 6,  7,  8, 16, 17, 18]])

In [0]:
np.concatenate((arr2D_1, arr2D_2, arr2D_1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18],
       [ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8]])

In [0]:
# Alternatives
np.vstack((arr2D_1, arr2D_2))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [0]:
np.hstack((arr2D_1, arr2D_2))

array([[ 0,  1,  2, 10, 11, 12],
       [ 3,  4,  5, 13, 14, 15],
       [ 6,  7,  8, 16, 17, 18]])

## np.argsort

In [2]:
score = np.array([70, 60, 50, 10, 90, 40, 80])
name = np.array(['Ada', 'Ben', 'Charlie', 'Danny', 'Eden', 'Fanny', 'George'])
sorted_name = name[np.argsort(score)] # an array of names in ascending order of their scores
print(sorted_name)   # ['Danny' 'Fanny' 'Charlie' 'Ben' 'Ada' 'George' 'Eden']

original_name = sorted_name[np.argsort(np.argsort(score))]
print(original_name) # ['Ada' 'Ben' 'Charlie' 'Danny' 'Eden' 'Fanny' 'George']

%timeit name[np.argsort(score)] 
# 1.83 µs ± 182 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

%timeit sorted(zip(score, name))
# 3.2 µs ± 76.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

['Danny' 'Fanny' 'Charlie' 'Ben' 'Ada' 'George' 'Eden']
['Ada' 'Ben' 'Charlie' 'Danny' 'Eden' 'Fanny' 'George']
The slowest run took 40.26 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.19 µs per loop
The slowest run took 12.29 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 4.11 µs per loop


## Broadcasting - shapes

In [3]:
Argument 1  (4D array): 7 × 5 × 3 × 1
Argument 2  (3D array):     1 × 3 × 9
Output      (4D array): 7 × 5 × 3 × 9

SyntaxError: ignored

## Ellipsis and NewAxis — dimensions

In [4]:
arr = np.array(range(1000)).reshape(2,5,2,10,-1)
print(arr[:,:,:,3,2] == arr[...,3,2])

# [[[ True,  True],
#   [ True,  True],
#   [ True,  True],
#   [ True,  True],
#   [ True,  True]],
#  [[ True,  True],
#   [ True,  True],
#   [ True,  True],
#   [ True,  True],
#   [ True,  True]]])

print(arr.shape)                       # (2, 5, 2, 10, 5)
print(arr[...,np.newaxis,:,:,:].shape) # (2, 5, 1, 2, 10, 5)

[[[ True  True]
  [ True  True]
  [ True  True]
  [ True  True]
  [ True  True]]

 [[ True  True]
  [ True  True]
  [ True  True]
  [ True  True]
  [ True  True]]]
(2, 5, 2, 10, 5)
(2, 5, 1, 2, 10, 5)


## Masked Array — selection

In [5]:
np.ma.MaskedArray(data=arr, mask=invalid_mask)

NameError: ignored

In [7]:
import math

def is_prime(n):
    assert n > 1, 'Input must be larger than 1'
    if n % 2 == 0 and n > 2: 
        return False
    return all(n % i for i in range(3, int(math.sqrt(n)) + 1, 2))

arr = np.array(range(2,100))
non_prime_mask = [not is_prime(n) for n in arr]
prime_arr = np.ma.MaskedArray(data=arr, mask=non_prime_mask)
print(prime_arr)

# [2 3 -- 5 -- 7 -- -- -- 11 -- 13 -- -- -- 17 -- 19 -- -- -- 23 -- -- -- --
#  -- 29 -- 31 -- -- -- -- -- 37 -- -- -- 41 -- 43 -- -- -- 47 -- -- -- --
#  -- 53 -- -- -- -- -- 59 -- 61 -- -- -- -- -- 67 -- -- -- 71 -- 73 -- --
#  -- -- -- 79 -- -- -- 83 -- -- -- -- -- 89 -- -- -- -- -- -- -- 97 -- --]

arr = np.array(range(11))
print(arr.sum())        # 55

arr[-1] = -999 # indicates missing value

masked_arr = np.ma.masked_values(arr, -999)
print(masked_arr.sum()) # 45  

[2 3 -- 5 -- 7 -- -- -- 11 -- 13 -- -- -- 17 -- 19 -- -- -- 23 -- -- -- --
 -- 29 -- 31 -- -- -- -- -- 37 -- -- -- 41 -- 43 -- -- -- 47 -- -- -- --
 -- 53 -- -- -- -- -- 59 -- 61 -- -- -- -- -- 67 -- -- -- 71 -- 73 -- --
 -- -- -- 79 -- -- -- 83 -- -- -- -- -- 89 -- -- -- -- -- -- -- 97 -- --]
55
45
