<a href="https://colab.research.google.com/github/spencer18001/Clustering-And-Dimensionality-Reduction---Deep-Dive/blob/main/03/0304.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Creating, slicing and modifying

### Lecture agenda

- Creating numpy arrays
  
- Slicing numpy arrays
- Modifying numpy arrays
- Searching for elements

In [1]:
import numpy as np

### Creating numpy arrays

In [2]:
# 1D array from a list
arr1 = np.array([1,2,3,4,5])
print(arr1)
print(type(arr1))

[1 2 3 4 5]
<class 'numpy.ndarray'>


In [3]:
# 2D array from a list
arr2 = np.array([[1,2,3], [4,5,6]])
print(arr2)
print(type(arr2))

[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'>


In [4]:
# shape
print(arr1.shape)
print(arr2.shape)

(5,)
(2, 3)


In [5]:
# 2D array of zeros
zero_arr = np.zeros(shape=(3,5))
zero_arr

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [6]:
# 1D array of zeros
zero_arr = np.zeros(shape=(3))
zero_arr

array([0., 0., 0.])

In [7]:
# Array containing a single unique value
const_arr = np.full(shape=(3,5), fill_value=7) # !!!!!
const_arr

array([[7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7]])

In [8]:
# Numpy arange
arrange1 = np.arange(10)
arrange1

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
# Create array based on start, stop and step
arrange1 = np.arange(0, 10, 0.5) # !!!!!
arrange1

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,
       6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])

In [10]:
# Random array
# np.random.randint: Return random integers from low (inclusive) to high (exclusive).
rand_arr = np.random.randint(0, 100, (5, 8)) # !!!!!
rand_arr

array([[ 3, 73, 40, 84, 74, 16, 60, 67],
       [ 5, 43, 44, 66, 71, 80, 50, 57],
       [45, 63, 85, 65, 70, 45, 93, 45],
       [50, 79, 62, 85,  0, 94, 38, 39],
       [60, 25, 52, 52, 48, 71, 46, 79]])

### Slicing numpy arrays

Slicing in NumPy is a way of accessing a subset of elements from a NumPy array.

##### 1D array

In [11]:
arr1 = np.array([1,2,3,4,5])
arr1

array([1, 2, 3, 4, 5])

In [12]:
arr1[0], arr1[2], arr1[-1]

(1, 3, 5)

In [13]:
arr1[2:4], arr1[:3], arr1[2:], arr1[1:-2]

(array([3, 4]), array([1, 2, 3]), array([3, 4, 5]), array([2, 3]))

In [14]:
# Slice with step
arr1[0:4:2] # !!!!!

array([1, 3])

In [15]:
# Slice with another numpy array
idx_array = np.array([0, 3])
arr1[idx_array], arr1[[0, 3]] # !!!!!

(array([1, 4]), array([1, 4]))

In [16]:
# Slice using boolean array
bool_arr = np.array([True, True, False, True, True])
arr1[bool_arr], arr1[[True, True, False, True, True]] # !!!!!

(array([1, 2, 4, 5]), array([1, 2, 4, 5]))

##### 2D arrays

In [17]:
# Random array
rand_arr = np.random.randint(0, 100, (5, 8))
rand_arr

array([[22, 32,  0, 91, 35, 43, 78, 39],
       [88, 26, 44, 18, 31, 63, 61, 69],
       [49, 72, 46, 72,  9, 39, 45, 21],
       [25, 94, 24, 36, 40, 11, 65, 55],
       [62, 14, 64,  3, 82, 73, 57, 61]])

In [18]:
# Get single element
rand_arr[0, 3]

91

In [19]:
# Get single row
rand_arr[2, :], rand_arr[2] # !!!!!

(array([49, 72, 46, 72,  9, 39, 45, 21]),
 array([49, 72, 46, 72,  9, 39, 45, 21]))

In [20]:
# Get multiple rows
rand_arr[2:4, :], rand_arr[2:4] # !!!!!

(array([[49, 72, 46, 72,  9, 39, 45, 21],
        [25, 94, 24, 36, 40, 11, 65, 55]]),
 array([[49, 72, 46, 72,  9, 39, 45, 21],
        [25, 94, 24, 36, 40, 11, 65, 55]]))

In [21]:
# Get single column
rand_arr[:, 2] # !!!!!

array([ 0, 44, 46, 24, 64])

In [22]:
# Get multiple cols
rand_arr[:, 2:4] # !!!!!

array([[ 0, 91],
       [44, 18],
       [46, 72],
       [24, 36],
       [64,  3]])

In [23]:
# Slice based on rows and cols
rand_arr[1:4, 3:5]

array([[18, 31],
       [72,  9],
       [36, 40]])

In [24]:
# Slice based on rows and cols and step
rand_arr[0:5:2, 0:4:2]

array([[22,  0],
       [49, 46],
       [62, 64]])

In [25]:
# Slice based on other numpy arrays
row_idx = np.array([0,3])
col_idx = np.array([0,1])

In [26]:
# Get rows
rand_arr[row_idx], rand_arr[row_idx, :], rand_arr[[0,3], :] # !!!!!

(array([[22, 32,  0, 91, 35, 43, 78, 39],
        [25, 94, 24, 36, 40, 11, 65, 55]]),
 array([[22, 32,  0, 91, 35, 43, 78, 39],
        [25, 94, 24, 36, 40, 11, 65, 55]]),
 array([[22, 32,  0, 91, 35, 43, 78, 39],
        [25, 94, 24, 36, 40, 11, 65, 55]]))

In [27]:
# Get cols
rand_arr[:, col_idx], rand_arr[:, [0,1]] # !!!!!

(array([[22, 32],
        [88, 26],
        [49, 72],
        [25, 94],
        [62, 14]]),
 array([[22, 32],
        [88, 26],
        [49, 72],
        [25, 94],
        [62, 14]]))

In [28]:
rand_arr[row_idx, col_idx] # fancy indexing: return pairwise-indexed values # !!!!!

array([22, 94])

In [29]:
# Get intersection of rows and cols
rand_arr[row_idx, :][:, col_idx] # !!!!!

array([[22, 32],
       [25, 94]])

In [30]:
np.ix_(row_idx, col_idx)

(array([[0],
        [3]]),
 array([[0, 1]]))

In [31]:
# Another way
rand_arr[np.ix_(row_idx, col_idx)], rand_arr[([[0],
                                               [3]],
                                              [[0, 1]])] # !!!!!

(array([[22, 32],
        [25, 94]]),
 array([[22, 32],
        [25, 94]]))

##### Slicing based on boolean expressions

In [32]:
row_bool = np.array([True, True, False, False, False])
cool_bool = np.array([True, True, False, False, False, False, False, False])

In [33]:
rand_arr[row_bool, cool_bool] # fancy indexing # !!!!!

array([22, 26])

In [34]:
rand_arr[row_bool, :][:, cool_bool] # !!!!!

array([[22, 32],
       [88, 26]])

In [35]:
np.ix_([True, True, False, False, True], [True, True, False, False, True, False, False, False])

(array([[0],
        [1],
        [4]]),
 array([[0, 1, 4]]))

In [36]:
rand_arr[(np.ix_(row_bool, cool_bool))] # !!!!!

array([[22, 32],
       [88, 26]])

In [37]:
rand_arr[:2, cool_bool] # row uses slice, and column uses boolean array

array([[22, 32],
       [88, 26]])

In [38]:
# We can use this for masking
mask = rand_arr > 5
mask

array([[ True,  True, False,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True, False,  True,  True,  True,  True]])

In [39]:
rand_arr[mask], rand_arr[rand_arr > 5] # !!!!!

(array([22, 32, 91, 35, 43, 78, 39, 88, 26, 44, 18, 31, 63, 61, 69, 49, 72,
        46, 72,  9, 39, 45, 21, 25, 94, 24, 36, 40, 11, 65, 55, 62, 14, 64,
        82, 73, 57, 61]),
 array([22, 32, 91, 35, 43, 78, 39, 88, 26, 44, 18, 31, 63, 61, 69, 49, 72,
        46, 72,  9, 39, 45, 21, 25, 94, 24, 36, 40, 11, 65, 55, 62, 14, 64,
        82, 73, 57, 61]))

### Modifying numpy arrays

In [61]:
eg_arr = np.random.randint(0, 100, (3, 4))
eg_arr

array([[41, 78,  6, 61],
       [ 3, 53, 40, 19],
       [11, 25,  1, 75]])

In [62]:
# Modifying single element
eg_arr[0,0] = 10
eg_arr

array([[10, 78,  6, 61],
       [ 3, 53, 40, 19],
       [11, 25,  1, 75]])

In [63]:
# Modifying row by 1D array
eg_arr[0, :] = np.array([100, 1000, 500, 900]) # !!!!!
eg_arr

array([[ 100, 1000,  500,  900],
       [   3,   53,   40,   19],
       [  11,   25,    1,   75]])

In [64]:
# Modifying row by 2D array
eg_arr[0, :] = np.array([[100, 1000, 500, 1900]]) # !!!!!
eg_arr

array([[ 100, 1000,  500, 1900],
       [   3,   53,   40,   19],
       [  11,   25,    1,   75]])

In [66]:
eg_arr[0, :] = np.array([[[100, 1000, 500, 1900]]]) # 感覺符合 broadcast 規則的可以減維操作 # !!!!!
eg_arr, eg_arr[0, :].shape, np.array([[[100, 1000, 500, 1900]]]).shape

(array([[ 100, 1000,  500, 1900],
        [   3,   53,   40,   19],
        [  11,   25,    1,   75]]),
 (4,),
 (1, 1, 4))

In [74]:
# Modifying column
eg_arr[:, -1] = np.array([-10, -1000, 9]) # !!!!!
eg_arr

array([[  100,  1000,   500,   -10],
       [    3,    53,    40, -1000],
       [   11,    25,     1,     9]])

In [75]:
eg_arr[:, -1] = np.array([[[-10, -1000, 9]]]) # !!!!!
eg_arr, eg_arr[:, -1].shape, np.array([[[-10, -1000, 9]]]).shape

(array([[  100,  1000,   500,   -10],
        [    3,    53,    40, -1000],
        [   11,    25,     1,     9]]),
 (3,),
 (1, 1, 3))

In [76]:
# Modify slice by single value
eg_arr[1:3, 1:3] = 222 # !!!!!
eg_arr

array([[  100,  1000,   500,   -10],
       [    3,   222,   222, -1000],
       [   11,   222,   222,     9]])

In [77]:
# Modify slice by 2D array
eg_arr[1:3, 1:3] = np.array([[1000, 2000], [3000, 4000]]) # !!!!!
eg_arr

array([[  100,  1000,   500,   -10],
       [    3,  1000,  2000, -1000],
       [   11,  3000,  4000,     9]])

In [78]:
# Modify based on index arrays
row_idx = np.array([0,2])
col_idx = np.array([0,3])
eg_arr[np.ix_(row_idx, col_idx)] = -17
eg_arr

array([[  -17,  1000,   500,   -17],
       [    3,  1000,  2000, -1000],
       [  -17,  3000,  4000,   -17]])

In [79]:
# Modify based on boolean expression
eg_arr[eg_arr < 0] = 0
eg_arr

array([[   0, 1000,  500,    0],
       [   3, 1000, 2000,    0],
       [   0, 3000, 4000,    0]])

### Searching for elements

In [80]:
rand_arr = np.random.randint(0, 100, (5, 8))
rand_arr

array([[78,  2, 83, 55, 28, 14, 67, 96],
       [32, 36, 11, 63, 90, 57, 81, 50],
       [80,  7, 77, 73, 46, 34, 18, 43],
       [91, 83, 31,  4, 48, 56,  3, 83],
       [15, 70, 14, 51, 91, 69, 26, 97]])

In [81]:
# Find index of elements according to condition
# When only condition is provided, this function is a shorthand for np.asarray(condition).nonzero().
# 回傳滿足條件的 index pair
np.where(rand_arr > 5) # !!!!!

(array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
        2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
 array([0, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6,
        7, 0, 1, 2, 4, 5, 7, 0, 1, 2, 3, 4, 5, 6, 7]))

In [82]:
# Unique elements
np.unique(rand_arr), np.unique(rand_arr, return_counts=True) # !!!!!

(array([ 2,  3,  4,  7, 11, 14, 15, 18, 26, 28, 31, 32, 34, 36, 43, 46, 48,
        50, 51, 55, 56, 57, 63, 67, 69, 70, 73, 77, 78, 80, 81, 83, 90, 91,
        96, 97]),
 (array([ 2,  3,  4,  7, 11, 14, 15, 18, 26, 28, 31, 32, 34, 36, 43, 46, 48,
         50, 51, 55, 56, 57, 63, 67, 69, 70, 73, 77, 78, 80, 81, 83, 90, 91,
         96, 97]),
  array([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1])))