Selecting data from an array by boolean indexing always creates a copy of the data,
even if the returned array is unchanged.

In [1]:
import numpy as np

In [3]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [4]:
data = np.random.randn(7, 4)
data

array([[ 0.24048927,  0.23244935, -1.2656482 ,  1.04004593],
       [-1.74549481,  1.25023638, -0.89263227, -0.24421434],
       [-1.16819631,  0.49512583, -1.55169775,  0.09198449],
       [-0.57244475, -0.40471106, -1.32027632,  0.37528418],
       [-0.27188671, -1.36581265,  0.08487722, -0.99052893],
       [-1.41593214,  0.10217561, -0.1105508 ,  0.64117763],
       [ 0.32884806,  0.35951523,  0.70158476, -0.6118352 ]])

### boolean row indexing


Suppose each name corresponds to a row in the data array and we wanted to select
all the rows with corresponding name 'Bob'. Like arithmetic operations, compari‐
sons (such as ==) with arrays are also vectorized. Thus, comparing names with the
string 'Bob' yields a boolean array:

In [5]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

This boolean array can be passed when indexing the array:

In [6]:
data[names == 'Bob'] # mảng data ở index 0 và 3 tương ứng với vị trí của true(0 và 3)

array([[ 0.24048927,  0.23244935, -1.2656482 ,  1.04004593],
       [-0.57244475, -0.40471106, -1.32027632,  0.37528418]])

The boolean array must be of the same length as the array axis it’s indexing.

### boolean row and column indexing

In [7]:
data[names == 'Bob', 2:] # từ column 2 đến hết về sau

array([[-1.2656482 ,  1.04004593],
       [-1.32027632,  0.37528418]])

In [8]:
data[names == 'Bob', 3] # column ở vị trí 3

array([1.04004593, 0.37528418])

### To select everything but 'Bob'

In [9]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [10]:
data[~(names == 'Bob')]

array([[-1.74549481,  1.25023638, -0.89263227, -0.24421434],
       [-1.16819631,  0.49512583, -1.55169775,  0.09198449],
       [-0.27188671, -1.36581265,  0.08487722, -0.99052893],
       [-1.41593214,  0.10217561, -0.1105508 ,  0.64117763],
       [ 0.32884806,  0.35951523,  0.70158476, -0.6118352 ]])

In [11]:
cond = names == 'Bob' # nhớ để ý 1 dấu = và 2 dấu ==

In [12]:
data[~cond]

array([[-1.74549481,  1.25023638, -0.89263227, -0.24421434],
       [-1.16819631,  0.49512583, -1.55169775,  0.09198449],
       [-0.27188671, -1.36581265,  0.08487722, -0.99052893],
       [-1.41593214,  0.10217561, -0.1105508 ,  0.64117763],
       [ 0.32884806,  0.35951523,  0.70158476, -0.6118352 ]])

### multiple boolean conditions,

In [14]:
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [15]:
data[mask]

array([[ 0.24048927,  0.23244935, -1.2656482 ,  1.04004593],
       [-1.16819631,  0.49512583, -1.55169775,  0.09198449],
       [-0.57244475, -0.40471106, -1.32027632,  0.37528418],
       [-0.27188671, -1.36581265,  0.08487722, -0.99052893]])

### Setting values with boolean arrays

In [16]:
data[data < 0] = 0
data

array([[0.24048927, 0.23244935, 0.        , 1.04004593],
       [0.        , 1.25023638, 0.        , 0.        ],
       [0.        , 0.49512583, 0.        , 0.09198449],
       [0.        , 0.        , 0.        , 0.37528418],
       [0.        , 0.        , 0.08487722, 0.        ],
       [0.        , 0.10217561, 0.        , 0.64117763],
       [0.32884806, 0.35951523, 0.70158476, 0.        ]])

### Setting whole rows or columns using a one-dimensional boolean array

In [18]:
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.        , 1.25023638, 0.        , 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.10217561, 0.        , 0.64117763],
       [0.32884806, 0.35951523, 0.70158476, 0.        ]])