<a href="https://colab.research.google.com/github/spencer18001/Clustering-And-Dimensionality-Reduction---Deep-Dive/blob/main/0304.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Creating, slicing and modifying

### Lecture agenda

- Creating numpy arrays
  
- Slicing numpy arrays
- Modifying numpy arrays
- Searching for elements

In [1]:
import numpy as np

### Creating numpy arrays

In [2]:
# 1D array from a list
arr1 = np.array([1,2,3,4,5])
print(arr1)
print(type(arr1))

[1 2 3 4 5]
<class 'numpy.ndarray'>


In [3]:
# 2D array from a list
arr2 = np.array([[1,2,3], [4,5,6]])
print(arr2)
print(type(arr2))

[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'>


In [4]:
# shape
print(arr1.shape)
print(arr2.shape)

(5,)
(2, 3)


In [5]:
# 2D array of zeros
zero_arr = np.zeros(shape=(3,5))
zero_arr

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [6]:
# 1D array of zeros
zero_arr = np.zeros(shape=(3))
zero_arr

array([0., 0., 0.])

In [7]:
# Array containing a single unique value
const_arr = np.full(shape=(3,5), fill_value=7)
const_arr

array([[7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7]])

In [8]:
# Numpy arange
arrange1 = np.arange(10)
arrange1

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
# Create array based on start, stop and step
arrange1 = np.arange(0, 10, 0.5)
arrange1

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,
       6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])

In [10]:
# Random array
# np.random.randint: Return random integers from low (inclusive) to high (exclusive).
rand_arr = np.random.randint(0, 100, (5, 8))
rand_arr

array([[ 8, 45, 66, 81, 15, 75, 50, 93],
       [84, 90, 10, 77, 81, 86,  2, 56],
       [47,  5, 31, 56, 40, 15, 35, 50],
       [61, 65, 80, 61, 97, 86, 90, 42],
       [77, 11, 95, 99, 26, 12, 90, 68]])

### Slicing numpy arrays

Slicing in NumPy is a way of accessing a subset of elements from a NumPy array.

##### 1D array

In [11]:
arr1 = np.array([1,2,3,4,5])
arr1

array([1, 2, 3, 4, 5])

In [15]:
arr1[0], arr1[2], arr1[-1]

(1, 3, 5)

In [17]:
arr1[2:4], arr1[:3], arr1[2:], arr1[1:-2]

(array([3, 4]), array([1, 2, 3]), array([3, 4, 5]), array([2, 3]))

In [18]:
# Slice with step
arr1[0:4:2]

array([1, 3])

In [24]:
# Slice with another numpy array
idx_array = np.array([0, 3])
arr1[idx_array], arr1[[0, 3]]

(array([1, 4]), array([1, 4]))

In [25]:
# Slice using boolean array
bool_arr = np.array([True, True, False, True, True])
arr1[bool_arr], arr1[[True, True, False, True, True]]

(array([1, 2, 4, 5]), array([1, 2, 4, 5]))

##### 2D arrays

In [26]:
# Random array
rand_arr = np.random.randint(0, 100, (5, 8))
rand_arr

array([[27, 64, 91, 92, 10, 73, 20, 60],
       [53, 63, 71, 10, 30, 28, 22, 65],
       [85, 26, 49, 58, 12, 92, 58, 81],
       [98, 34, 42, 30, 24, 61, 47, 49],
       [ 0, 81, 34,  5, 32, 81, 58, 59]])

In [31]:
# Get single element
rand_arr[0, 3]

92

In [35]:
# Get single row
rand_arr[2, :], rand_arr[2]

(array([85, 26, 49, 58, 12, 92, 58, 81]),
 array([85, 26, 49, 58, 12, 92, 58, 81]))

In [36]:
# Get multiple rows
rand_arr[2:4, :], rand_arr[2:4]

(array([[85, 26, 49, 58, 12, 92, 58, 81],
        [98, 34, 42, 30, 24, 61, 47, 49]]),
 array([[85, 26, 49, 58, 12, 92, 58, 81],
        [98, 34, 42, 30, 24, 61, 47, 49]]))

In [38]:
# Get single column
rand_arr[:, 2]

array([91, 71, 49, 42, 34])

In [39]:
# Get multiple cols
rand_arr[:, 2:4]

array([[91, 92],
       [71, 10],
       [49, 58],
       [42, 30],
       [34,  5]])

In [40]:
# Slice based on rows and cols
rand_arr[1:4, 3:5]

array([[10, 30],
       [58, 12],
       [30, 24]])

In [41]:
# Slice based on rows and cols and step
rand_arr[0:5:2, 0:4:2]

array([[27, 91],
       [85, 49],
       [ 0, 34]])

In [42]:
# Slice based on other numpy arrays
row_idx = np.array([0,3])
col_idx = np.array([0,1])

In [51]:
# Get rows
rand_arr[row_idx], rand_arr[row_idx, :], rand_arr[[0,3], :]

(array([[27, 64, 91, 92, 10, 73, 20, 60],
        [98, 34, 42, 30, 24, 61, 47, 49]]),
 array([[27, 64, 91, 92, 10, 73, 20, 60],
        [98, 34, 42, 30, 24, 61, 47, 49]]),
 array([[27, 64, 91, 92, 10, 73, 20, 60],
        [98, 34, 42, 30, 24, 61, 47, 49]]))

In [52]:
# Get cols
rand_arr[:, col_idx], rand_arr[:, [0,1]]

(array([[27, 64],
        [53, 63],
        [85, 26],
        [98, 34],
        [ 0, 81]]),
 array([[27, 64],
        [53, 63],
        [85, 26],
        [98, 34],
        [ 0, 81]]))

In [47]:
rand_arr[row_idx, col_idx] # fancy indexing: return pairwise-indexed values

array([27, 34])

In [53]:
# Get intersection of rows and cols
rand_arr[row_idx, :][:, col_idx]

array([[27, 64],
       [98, 34]])

In [55]:
np.ix_(row_idx, col_idx)

(array([[0],
        [3]]),
 array([[0, 1]]))

In [59]:
# Another way
rand_arr[np.ix_(row_idx, col_idx)], rand_arr[([[0],
                                               [3]],
                                              [[0, 1]])]

(array([[27, 64],
        [98, 34]]),
 array([[27, 64],
        [98, 34]]))

##### Slicing based on boolean expressions

In [60]:
row_bool = np.array([True, True, False, False, False])
cool_bool = np.array([True, True, False, False, False, False, False, False])

In [66]:
rand_arr[row_bool, cool_bool] # fancy indexing

array([27, 63])

In [61]:
rand_arr[row_bool, :][:, cool_bool]

array([[27, 64],
       [53, 63]])

In [63]:
np.ix_([True, True, False, False, True], [True, True, False, False, True, False, False, False])

(array([[0],
        [1],
        [4]]),
 array([[0, 1, 4]]))

In [64]:
rand_arr[(np.ix_(row_bool, cool_bool))]

array([[27, 64],
       [53, 63]])

In [67]:
rand_arr[:2, cool_bool] # row uses slice, and column uses boolean array

array([[27, 64],
       [53, 63]])

In [68]:
# We can use this for masking
mask = rand_arr > 5
mask

array([[ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True],
       [False,  True,  True, False,  True,  True,  True,  True]])

In [70]:
rand_arr[mask], rand_arr[rand_arr > 5]

(array([27, 64, 91, 92, 10, 73, 20, 60, 53, 63, 71, 10, 30, 28, 22, 65, 85,
        26, 49, 58, 12, 92, 58, 81, 98, 34, 42, 30, 24, 61, 47, 49, 81, 34,
        32, 81, 58, 59]),
 array([27, 64, 91, 92, 10, 73, 20, 60, 53, 63, 71, 10, 30, 28, 22, 65, 85,
        26, 49, 58, 12, 92, 58, 81, 98, 34, 42, 30, 24, 61, 47, 49, 81, 34,
        32, 81, 58, 59]))

### Modifying numpy arrays

In [71]:
eg_arr = np.random.randint(0, 100, (3, 4))
eg_arr

array([[87, 96, 26, 16],
       [52, 46, 35, 80],
       [99, 89, 33, 60]])

In [72]:
# Modifying single element
eg_arr[0,0] = 10
eg_arr

array([[10, 96, 26, 16],
       [52, 46, 35, 80],
       [99, 89, 33, 60]])

In [73]:
# Modifying row by 1D array
eg_arr[0, :] = np.array([100, 1000, 500, 900])
eg_arr

array([[ 100, 1000,  500,  900],
       [  52,   46,   35,   80],
       [  99,   89,   33,   60]])

In [74]:
# Modifying row by 2D array
eg_arr[0, :] = np.array([[100, 1000, 500, 1900]])
eg_arr

array([[ 100, 1000,  500, 1900],
       [  52,   46,   35,   80],
       [  99,   89,   33,   60]])

In [75]:
# Modifying column
eg_arr[:, -1] = np.array([-10, -1000, 9])
eg_arr

array([[  100,  1000,   500,   -10],
       [   52,    46,    35, -1000],
       [   99,    89,    33,     9]])

In [76]:
# Modify slice by single value
eg_arr[1:3, 1:3] = 222
eg_arr

array([[  100,  1000,   500,   -10],
       [   52,   222,   222, -1000],
       [   99,   222,   222,     9]])

In [77]:
# Modify slice by 2D array
eg_arr[1:3, 1:3] = np.array([[1000, 2000], [3000, 4000]])
eg_arr

array([[  100,  1000,   500,   -10],
       [   52,  1000,  2000, -1000],
       [   99,  3000,  4000,     9]])

In [78]:
# Modify based on index arrays
row_idx = np.array([0,2])
col_idx = np.array([0,3])
eg_arr[np.ix_(row_idx, col_idx)] = -17
eg_arr

array([[  -17,  1000,   500,   -17],
       [   52,  1000,  2000, -1000],
       [  -17,  3000,  4000,   -17]])

In [79]:
# Modify based on boolean expression
eg_arr[eg_arr < 0] = 0
eg_arr

array([[   0, 1000,  500,    0],
       [  52, 1000, 2000,    0],
       [   0, 3000, 4000,    0]])

### Searching for elements

In [80]:
rand_arr = np.random.randint(0, 100, (5, 8))
rand_arr

array([[80, 96, 61, 37, 46, 13, 96, 95],
       [20, 88, 72, 25, 34,  9, 34,  8],
       [29, 69, 41, 94, 23, 14, 11,  0],
       [87, 91, 93, 66, 72, 75, 27, 42],
       [69, 73, 63, 39, 87, 84, 26, 15]])

In [81]:
# Find index of elements according to condition
np.where(rand_arr > 5)

(array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
        2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
        6, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]))

In [83]:
# Unique elements
np.unique(rand_arr), np.unique(rand_arr, return_counts=True)

(array([ 0,  8,  9, 11, 13, 14, 15, 20, 23, 25, 26, 27, 29, 34, 37, 39, 41,
        42, 46, 61, 63, 66, 69, 72, 73, 75, 80, 84, 87, 88, 91, 93, 94, 95,
        96]),
 (array([ 0,  8,  9, 11, 13, 14, 15, 20, 23, 25, 26, 27, 29, 34, 37, 39, 41,
         42, 46, 61, 63, 66, 69, 72, 73, 75, 80, 84, 87, 88, 91, 93, 94, 95,
         96]),
  array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
         2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2])))