## Boolean Indexing 

Consider an example where we have some data in an array and an array of names with duplicates. 

Use the randn function in numpy.random to generate some random normally distributed data:

In [1]:
import numpy as np
names = np.array(['Arun', 'Bill', 'Jay', 'Arun', 'Jay', 'Butler'])
data = np.random.randn(6, 4)

In [2]:
print('names',names)

names ['Arun' 'Bill' 'Jay' 'Arun' 'Jay' 'Butler']


In [3]:
print('data', data)

data [[-0.37872613  1.17223464 -0.99871173 -0.38798951]
 [-0.46201744  0.41672788  0.08557869  1.32186669]
 [-0.51540156 -0.64393741 -0.07013691  0.35116435]
 [ 0.31843458 -0.19230838 -0.82800615 -0.78421484]
 [ 0.72898486  0.43962914  0.21475248  0.01778945]
 [-0.53407184 -1.82479575  0.99885712 -1.52308785]]


In [4]:
names == 'Jay' # Get the corresponding names with Jay

array([False, False,  True, False,  True, False])

In [5]:
data[names == 'Jay'] # Select from the rows where names == 'Jay' 

array([[-0.51540156, -0.64393741, -0.07013691,  0.35116435],
       [ 0.72898486,  0.43962914,  0.21475248,  0.01778945]])

In [6]:
data[names == 'Jay', 2:]# Select from the rows where names == 'Jay' index the columns, too:


array([[-0.07013691,  0.35116435],
       [ 0.21475248,  0.01778945]])

To select everything but 'Jay', you can either use != or negate the condition using ~:

In [7]:
data[names != 'Jay']

array([[-0.37872613,  1.17223464, -0.99871173, -0.38798951],
       [-0.46201744,  0.41672788,  0.08557869,  1.32186669],
       [ 0.31843458, -0.19230838, -0.82800615, -0.78421484],
       [-0.53407184, -1.82479575,  0.99885712, -1.52308785]])

In [8]:
condition = names == 'Jay'
data[~condition] 

array([[-0.37872613,  1.17223464, -0.99871173, -0.38798951],
       [-0.46201744,  0.41672788,  0.08557869,  1.32186669],
       [ 0.31843458, -0.19230838, -0.82800615, -0.78421484],
       [-0.53407184, -1.82479575,  0.99885712, -1.52308785]])

To set all of the negative values in data to 0 we need only do:

In [9]:
data[data < 0] = 0

In [10]:
print(data)

[[0.         1.17223464 0.         0.        ]
 [0.         0.41672788 0.08557869 1.32186669]
 [0.         0.         0.         0.35116435]
 [0.31843458 0.         0.         0.        ]
 [0.72898486 0.43962914 0.21475248 0.01778945]
 [0.         0.         0.99885712 0.        ]]


### Set whole rows or columns using a one-dimensional boolean array

In [11]:
data[names == 'Jay'] = 10
print(data)

[[ 0.          1.17223464  0.          0.        ]
 [ 0.          0.41672788  0.08557869  1.32186669]
 [10.         10.         10.         10.        ]
 [ 0.31843458  0.          0.          0.        ]
 [10.         10.         10.         10.        ]
 [ 0.          0.          0.99885712  0.        ]]


 ### Indexing using integer arrays

In [12]:
array = np.empty((8, 4))
for i in range(8):
    array[i] = i

print(array)

[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]]


For selecting out a subset of the rows in a particular order, you pass a list or ndarray of integers specifying the desired order.

In [13]:
array[[1, 2, 0, 6]]

array([[1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [14]:
array[[-1, 2, 0, -3]]

array([[7., 7., 7., 7.],
       [2., 2., 2., 2.],
       [0., 0., 0., 0.],
       [5., 5., 5., 5.]])

In [15]:
array1 = np.arange(32).reshape((4, 8)) # shape as 4 rows and 8 columns

In [16]:
print(array1)

[[ 0  1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]
 [16 17 18 19 20 21 22 23]
 [24 25 26 27 28 29 30 31]]


In [17]:
array2 = np.arange(32).reshape((8, 4)) # shape as 8 rows and 4 columns

In [18]:
print(array2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]


In [19]:
array1[[1, 3]][:, [0,2]] 

array([[ 8, 10],
       [24, 26]])

### Transposing Arrays and Swapping Axes

*Arrays have the transpose method and also the special T attribute:*

In [20]:
array3 = np.arange(20).reshape(4, 5)

In [21]:
print(array3)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


In [22]:
print(array3.T) # See rows and columns are interchanged

[[ 0  5 10 15]
 [ 1  6 11 16]
 [ 2  7 12 17]
 [ 3  8 13 18]
 [ 4  9 14 19]]


### Matrix computations

Compute the inner matrix product using np.dot

In [23]:
array3_square = np.dot(array3 .T, array3 )

In [24]:
print(array3_square); print(array3_square.shape)

[[350 380 410 440 470]
 [380 414 448 482 516]
 [410 448 486 524 562]
 [440 482 524 566 608]
 [470 516 562 608 654]]
(5, 5)


### If you multiply any array with its transpose, you get a square matrix.

For higher dimensional array (3-d),transpose will accept a tuple of axis numbers to permute the axes.

In [25]:
array4 = np.arange(16).reshape((2, 2, 4))

In [26]:
print(array4)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]


In [27]:
array4 = np.arange(16).reshape((2, 2, 4)) 
array4.T

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

ndarray has the method swapaxes, which takes a pair of axis numbers and switches the indicated axes to rearrange the data

In [28]:
array4 = np.arange(16).reshape((2, 2, 4))
array4.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [29]:
np.sqrt(array4) # To get square root

array([[[0.        , 1.        , 1.41421356, 1.73205081],
        [2.        , 2.23606798, 2.44948974, 2.64575131]],

       [[2.82842712, 3.        , 3.16227766, 3.31662479],
        [3.46410162, 3.60555128, 3.74165739, 3.87298335]]])

In [30]:
np.exp(array4) # To get exp 

array([[[1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01],
        [5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03]],

       [[2.98095799e+03, 8.10308393e+03, 2.20264658e+04, 5.98741417e+04],
        [1.62754791e+05, 4.42413392e+05, 1.20260428e+06, 3.26901737e+06]]])

In [31]:
np.min(array4) # To get minimum

0

In [32]:
np.max(array4) # To get maximum

15

In [33]:
np.abs(array4) # To get absolute value

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [34]:
np.floor(array4) # To get floor

array([[[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.]],

       [[ 8.,  9., 10., 11.],
        [12., 13., 14., 15.]]])

### Mathematical functions

In [35]:
array4.mean() 

7.5

In [36]:
np.mean(array4)

7.5

In [37]:
array4.sum()

120

*cumsum return an array of the same size, but with the partial aggregates computed along the indicated axis according to each lower dimensional slice*

In [38]:
array5 = np.array([[0, 1, 3], [2, 4, 5], [6, 7, 8]])
print('\narray5\n', array5)
print('\narray5.cumsum(axis=0)\n', array5.cumsum(axis=0)) 


array5
 [[0 1 3]
 [2 4 5]
 [6 7 8]]

array5.cumsum(axis=0)
 [[ 0  1  3]
 [ 2  5  8]
 [ 8 12 16]]


In [39]:
array5 = np.array([[0, 1, 3], [2, 4, 5], [6, 7, 8]])
print('\narray5\n', array5)
print('\narray5.cumprod(axis=0)\n', array5.cumprod(axis=0)) 


array5
 [[0 1 3]
 [2 4 5]
 [6 7 8]]

array5.cumprod(axis=0)
 [[  0   1   3]
 [  0   4  15]
 [  0  28 120]]


In [40]:
array5 = np.array([[0, 1, 3], [2, 4, 5], [6, 7, 8]])
print('\narray5\n', array5)
print('\narray5.std\n', array5.std())



array5
 [[0 1 3]
 [2 4 5]
 [6 7 8]]

array5.std
 2.581988897471611


In [41]:
array5 = np.array([[0, 1, 3], [2, 4, 5], [6, 7, 8]])
print('\narray5\n', array5)
print('\narray5.var\n', array5.var())


array5
 [[0 1 3]
 [2 4 5]
 [6 7 8]]

array5.var
 6.666666666666667


# END