# Comparisons, Masks, and Boolean Logic

1. To examine and manipulate values within NumPy arrays
2. Masking comes up when we want to extract, modify, count, or manipulate values in an array based on some criterion

3. For example, if we want to count all values greater than a certain value, or remove outliers that are above some threshold.

## Comparison Operators as ufuncs

In [2]:
import numpy as np

In [4]:
x = np.array([1,2,3,4,5])
print(x)

[1 2 3 4 5]


In [5]:
#less than
x < 3 #np.less(x,3)

array([ True,  True, False, False, False])

In [6]:
x > 3 #np.greater

array([False, False, False,  True,  True])

In [7]:
x <= 3 #np.greater_equal

array([ True,  True,  True, False, False])

In [8]:
x >= 5

array([False, False, False, False,  True])

In [9]:
x != 3

array([ True,  True, False,  True,  True])

In [10]:
x == 3

array([False, False,  True, False, False])

In [11]:
(2 ** x) == (x ** 2)

array([False,  True, False,  True, False])

## Working with Boolean Arrays

In [16]:
np.random.seed(1)
x = np.random.randint(10, size = (3,4))
print(x)

[[5 8 9 5]
 [0 0 1 7]
 [6 9 2 4]]


In [17]:
x < 6

array([[ True, False, False,  True],
       [ True,  True,  True, False],
       [False, False,  True,  True]])

In [19]:
bool(1)

True

In [20]:
#count the number of values that are less than 6

np.count_nonzero(x < 6)  #count the number of True values in a Boolean array

7

In [21]:
np.sum(x < 6) #True- 1, False- 0

7

In [22]:
np.sum(x < 6, axis = 1) # values less than 6 in each row

array([2, 3, 2])

In [24]:
#any()  #Not useful for multidimensional arrays
#all()

In [25]:
np.any(x > 8)

True

In [27]:
np.all(x < 10)

True

## Boolean arrays as Masks:

- Masking: To select values from the array by simply passing the Boolean array as an index.

In [29]:
print(x)

[[5 8 9 5]
 [0 0 1 7]
 [6 9 2 4]]


In [30]:
x < 5

array([[False, False, False, False],
       [ True,  True,  True, False],
       [False, False,  True,  True]])

In [32]:
x[x < 5]

array([0, 0, 1, 2, 4])

In [33]:
x = np.arange(10)

print(x)

[0 1 2 3 4 5 6 7 8 9]


In [36]:
(x > 4) & (x < 8) #bitwise and (&)

array([False, False, False, False, False,  True,  True,  True, False,
       False])

In [37]:
x[(x > 4) & (x < 8)]

array([5, 6, 7])

In [38]:
(x > 4) and (x < 8)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [40]:
bin(44)

'0b101100'

In [45]:
bin(20)

'0b10100'

In [46]:
bin(4)

'0b100'

In [44]:
0b101100 & 0b10100

4

## Note:

- For Boolean NumPy arrays, the bitwise(&) and (|) is always desired.

In [None]:
#& (and)- np.bitwise_and
# | (or)
# ^ (xor)
# - (not)

## Fancy Indexing:

- It is similar to normal indexing, but we pass arrays of indices in place of single indices(scalars)
- The pairing of indices in fancy indexing follows all the broadcasting rules 

In [47]:
np.random.seed(21)
x = np.random.randint(100, size = 10)

print(x)

[73 79 56  4 48 35 60 98 74 72]


In [48]:
[x[3], x[7], x[2]]

[4, 98, 56]

In [49]:
index = np.array([3, 7, 4])

x[index]

array([ 4, 98, 48])

In [50]:
lis = [1,2,3,4,5]

In [51]:
index = [3,4]

lis[index]

TypeError: list indices must be integers or slices, not list

In [53]:
print(x)

[73 79 56  4 48 35 60 98 74 72]


In [52]:
#The result of fancy indexing reflects the shape of the 
#index arrays rather than the shape of the array being indexed

index = np.array([[3,7],
                  [4,5]])

x[index]

array([[ 4, 98],
       [48, 35]])

In [54]:
X = np.arange(12).reshape((3,4))
print(X)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [55]:
row = np.array([0,1,2])
column = np.array([2,1,3])

X[row, column]

array([ 2,  5, 11])

In [56]:
#Combined Indexing

X[2, [2,0,1]]

array([10,  8,  9])

In [57]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [58]:
X[1:,[2,0,1]] #fancy indexing with slicing

array([[ 6,  4,  5],
       [10,  8,  9]])

In [59]:
## Modifying values using Fancy Indexing

x = np.arange(10)
index = np.array([2,1,8,4])

print(x)

[0 1 2 3 4 5 6 7 8 9]


In [60]:
x[index] = 99

print(x)

[ 0 99 99  3 99  5  6  7 99  9]


In [62]:
x[index] += 10 

print(x)

[  0 109 109   3 109   5   6   7 109   9]


## Sorting Arrays

In [63]:
#sorted()  list.sort()


# insertion sort, selection sort, quick sort(O[N logN]), merge sort, bubble sort, etc

In [64]:
#np.sort() and np.argsort()

In [65]:
x = np.array([2,1,5,6,7,8,3])

np.sort(x) #without modifying input

array([1, 2, 3, 5, 6, 7, 8])

In [67]:
x.sort() #in-place
print(x)

[1 2 3 5 6 7 8]


In [68]:
x = np.array([2,1,4,5,9,2,3])
i = np.argsort(x)

print(i)

[1 0 5 6 2 3 4]


In [69]:
x[i] #fancy indexing

array([1, 2, 2, 3, 4, 5, 9])

In [84]:
#Sorting along rows and columns

np.random.seed(0)

X = np.random.randint(0,10, (4,6))

print(X)

[[5 0 3 3 7 9]
 [3 5 2 4 7 6]
 [8 8 1 6 7 7]
 [8 1 5 9 8 9]]


In [85]:
#sort each column of X
np.sort(X, axis = 0)

array([[3, 0, 1, 3, 7, 6],
       [5, 1, 2, 4, 7, 7],
       [8, 5, 3, 6, 7, 9],
       [8, 8, 5, 9, 8, 9]])

In [86]:
#sort each row of X
np.sort(X, axis = 1)

array([[0, 3, 3, 5, 7, 9],
       [2, 3, 4, 5, 6, 7],
       [1, 6, 7, 7, 8, 8],
       [1, 5, 8, 8, 9, 9]])

## Structured Data: NumPy's Structured Arrays

In [89]:
name = np.array(['Alice', 'Bob', 'Max'])
age = np.array([24,45,33])
weight = np.array([55.5,73.7,78])

In [90]:
# Use a compound data type for structured arrays
data = np.zeros(3, dtype = {'names': ('name', 'age', 'weight'),
                             'formats': ('U10', 'i4', 'f8')})

In [93]:
# Here 'U10' translates to "Unicode string of maximum length 10"
# 'i4' translates to '4-byte(32-bit) integer'
# 'f8' translated tp '8-byte(64-bit) float'

In [91]:
print(data.dtype)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


In [92]:
print(data)

[('', 0, 0.) ('', 0, 0.) ('', 0, 0.)]


In [94]:
data['name'] = name
data['age'] = age
data['weight'] = weight

print(data)

[('Alice', 24, 55.5) ('Bob', 45, 73.7) ('Max', 33, 78. )]


In [95]:
# Get all names
data['name']

array(['Alice', 'Bob', 'Max'], dtype='<U10')

In [96]:
#Get first row of data
data[0]

('Alice', 24, 55.5)

In [98]:
#Get name from the last row

data[-1]['name']

'Max'

In [99]:
#Get names where age is under 30
data['age'] < 30

array([ True, False, False])

In [101]:
data[data['age'] < 30]['name']

array(['Alice'], dtype='<U10')