# Boolean Indexing

**Module 05 | Notebook 02**

---

## Objective
By the end of this notebook, you will master:
- Boolean mask creation
- Filtering with boolean arrays
- Combining multiple conditions
- np.where for conditional selection
- Practical filtering patterns

In [2]:
import numpy as np
np.set_printoptions(precision=2)

---
## 1. Creating Boolean Masks

In [3]:
arr = np.array([1, 5, 3, 8, 2, 9, 4, 7])
print(f"Array: {arr}")

Array: [1 5 3 8 2 9 4 7]


In [4]:
# Comparison operators create boolean arrays
mask_gt5 = arr > 5
print(f"arr > 5: {mask_gt5}")
print(f"Type: {mask_gt5.dtype}")

arr > 5: [False False False  True False  True False  True]
Type: bool


In [5]:
# All comparison operators
print(f"arr == 5: {arr == 5}")
print(f"arr != 5: {arr != 5}")
print(f"arr < 5: {arr < 5}")
print(f"arr <= 5: {arr <= 5}")
print(f"arr >= 5: {arr >= 5}")

arr == 5: [False  True False False False False False False]
arr != 5: [ True False  True  True  True  True  True  True]
arr < 5: [ True False  True False  True False  True False]
arr <= 5: [ True  True  True False  True False  True False]
arr >= 5: [False  True False  True False  True False  True]


In [6]:
# Use mask to filter
mask = arr > 5
filtered = arr[mask]
print(f"Elements > 5: {filtered}")

# More concise
print(f"Direct: {arr[arr > 5]}")

Elements > 5: [8 9 7]
Direct: [8 9 7]


---
## 2. Boolean Indexing Returns COPY

In [7]:
arr = np.arange(10)
print(f"Original: {arr}")

# Boolean indexing returns copy
filtered = arr[arr > 5]
print(f"Shares memory: {np.shares_memory(arr, filtered)}")

Original: [0 1 2 3 4 5 6 7 8 9]
Shares memory: False


In [8]:
# BUT assignment modifies original!
arr = np.arange(10)
arr[arr > 5] = 0
print(f"After zeroing >5: {arr}")

After zeroing >5: [0 1 2 3 4 5 0 0 0 0]


In [9]:
# Replace specific values
arr = np.array([1, -2, 3, -4, 5, -6])
arr[arr < 0] = 0  # Replace negatives with 0
print(f"Negatives replaced: {arr}")

Negatives replaced: [1 0 3 0 5 0]


---
## 3. Combining Multiple Conditions

In [10]:
arr = np.array([1, 5, 3, 8, 2, 9, 4, 7])
print(f"Array: {arr}")

Array: [1 5 3 8 2 9 4 7]


In [11]:
# AND: & (not 'and'!)
mask_and = (arr > 3) & (arr < 8)
print(f"3 < arr < 8: {arr[mask_and]}")

3 < arr < 8: [5 4 7]


In [12]:
# OR: | (not 'or'!)
mask_or = (arr < 3) | (arr > 7)
print(f"arr < 3 or arr > 7: {arr[mask_or]}")

arr < 3 or arr > 7: [1 8 2 9]


In [13]:
# NOT: ~ (not 'not'!)
mask_not = ~(arr > 5)
print(f"not (arr > 5): {arr[mask_not]}")

not (arr > 5): [1 5 3 2 4]


In [14]:
# XOR: ^
mask_xor = (arr > 3) ^ (arr > 6)
print(f"XOR condition: {arr[mask_xor]}")
# True when exactly one condition is True (3 < x <= 6)

XOR condition: [5 4]


In [15]:
# IMPORTANT: Always use parentheses!
# arr > 3 & arr < 8  # Error! & has higher precedence than >
# (arr > 3) & (arr < 8)  # Correct!

---
## 4. Logical Functions

In [16]:
arr = np.array([1, 5, 3, 8, 2, 9, 4, 7])

# np.logical_and, np.logical_or, np.logical_not
mask = np.logical_and(arr > 3, arr < 8)
print(f"logical_and: {arr[mask]}")

logical_and: [5 4 7]


In [17]:
# any() and all()
mask = arr > 5
print(f"Any > 5? {np.any(mask)}")
print(f"All > 5? {np.all(mask)}")

Any > 5? True
All > 5? False


In [18]:
# 2D with axis
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(f"Array:\n{arr2d}")

# Any element > 5 per row
print(f"Any > 5 per row: {np.any(arr2d > 5, axis=1)}")

# All elements > 2 per column
print(f"All > 2 per column: {np.all(arr2d > 2, axis=0)}")

Array:
[[1 2 3]
 [4 5 6]
 [7 8 9]]
Any > 5 per row: [False  True  True]
All > 2 per column: [False False  True]


---
## 5. np.where()

In [19]:
arr = np.array([1, 5, 3, 8, 2, 9, 4, 7])

# With one argument: returns indices where True
indices = np.where(arr > 5)
print(f"Indices where > 5: {indices}")
print(f"Values: {arr[indices]}")

Indices where > 5: (array([3, 5, 7]),)
Values: [8 9 7]


In [20]:
# With three arguments: conditional replacement
# np.where(condition, value_if_true, value_if_false)
result = np.where(arr > 5, arr, 0)
print(f"Keep if > 5 else 0: {result}")

Keep if > 5 else 0: [0 0 0 8 0 9 0 7]


In [21]:
# Create labels
labels = np.where(arr > 5, 'high', 'low')
print(f"Labels: {labels}")

Labels: ['low' 'low' 'low' 'high' 'low' 'high' 'low' 'high']


In [22]:
# 2D np.where
arr2d = np.arange(12).reshape(3, 4)
print(f"Array:\n{arr2d}")

rows, cols = np.where(arr2d > 5)
print(f"Row indices: {rows}")
print(f"Col indices: {cols}")
print(f"Values: {arr2d[rows, cols]}")

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Row indices: [1 1 2 2 2 2]
Col indices: [2 3 0 1 2 3]
Values: [ 6  7  8  9 10 11]


---
## 6. np.nonzero() and np.argwhere()

In [23]:
arr = np.array([0, 5, 0, 8, 0, 9, 0, 7])

# np.nonzero: indices of non-zero elements
indices = np.nonzero(arr)
print(f"Non-zero indices: {indices}")
print(f"Non-zero values: {arr[indices]}")

Non-zero indices: (array([1, 3, 5, 7]),)
Non-zero values: [5 8 9 7]


In [24]:
# Works with boolean too
arr = np.array([1, 5, 3, 8, 2])
indices = np.nonzero(arr > 3)
print(f"Indices > 3: {indices}")

Indices > 3: (array([1, 3]),)


In [25]:
# np.argwhere: returns coordinates as rows
arr2d = np.array([[1, 0, 3], [0, 5, 0], [7, 0, 9]])
print(f"Array:\n{arr2d}")

coords = np.argwhere(arr2d > 4)
print(f"Coordinates > 4:\n{coords}")

Array:
[[1 0 3]
 [0 5 0]
 [7 0 9]]
Coordinates > 4:
[[1 1]
 [2 0]
 [2 2]]


In [26]:
# Difference: where vs argwhere
# where: returns tuple of arrays (one per dimension)
# argwhere: returns array of [row, col] pairs

---
## 7. Special Boolean Functions

In [27]:
# np.isin: check membership
arr = np.array([1, 2, 3, 4, 5, 6])
valid = [2, 4, 6]

mask = np.isin(arr, valid)
print(f"In {valid}: {mask}")
print(f"Filtered: {arr[mask]}")

In [2, 4, 6]: [False  True False  True False  True]
Filtered: [2 4 6]


In [28]:
# Invert isin
mask = ~np.isin(arr, valid)
print(f"Not in {valid}: {arr[mask]}")

Not in [2, 4, 6]: [1 3 5]


In [29]:
# np.isnan, np.isinf, np.isfinite
arr = np.array([1.0, np.nan, np.inf, -np.inf, 5.0])

print(f"Array: {arr}")
print(f"isnan: {np.isnan(arr)}")
print(f"isinf: {np.isinf(arr)}")
print(f"isfinite: {np.isfinite(arr)}")

Array: [  1.  nan  inf -inf   5.]
isnan: [False  True False False False]
isinf: [False False  True  True False]
isfinite: [ True False False False  True]


In [30]:
# Filter out NaN
clean = arr[~np.isnan(arr)]
print(f"Without NaN: {clean}")

# Keep only finite
finite = arr[np.isfinite(arr)]
print(f"Only finite: {finite}")

Without NaN: [  1.  inf -inf   5.]
Only finite: [1. 5.]


In [31]:
# np.isclose: approximate equality
a = np.array([1.0, 2.0, 3.0])
b = np.array([1.0 + 1e-9, 2.1, 3.0])

print(f"isclose: {np.isclose(a, b)}")
print(f"isclose with rtol=0.1: {np.isclose(a, b, rtol=0.1)}")

isclose: [ True False  True]
isclose with rtol=0.1: [ True  True  True]


---
## 8. 2D Boolean Indexing

In [32]:
arr = np.arange(12).reshape(3, 4)
print(f"Array:\n{arr}")

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [33]:
# Filter elements (returns 1D)
filtered = arr[arr > 5]
print(f"Elements > 5: {filtered}")

Elements > 5: [ 6  7  8  9 10 11]


In [34]:
# Filter rows based on condition
row_mask = arr[:, 0] > 2  # First column > 2
print(f"Row mask: {row_mask}")
print(f"Filtered rows:\n{arr[row_mask]}")

Row mask: [False  True  True]
Filtered rows:
[[ 4  5  6  7]
 [ 8  9 10 11]]


In [35]:
# Filter columns based on condition
col_mask = arr[0, :] < 2  # First row < 2
print(f"Col mask: {col_mask}")
print(f"Filtered columns:\n{arr[:, col_mask]}")

Col mask: [ True  True False False]
Filtered columns:
[[0 1]
 [4 5]
 [8 9]]


In [36]:
# Rows where any element > 8
row_mask = np.any(arr > 8, axis=1)
print(f"Rows with any > 8:\n{arr[row_mask]}")

# Rows where all elements > 2
row_mask = np.all(arr > 2, axis=1)
print(f"Rows with all > 2:\n{arr[row_mask]}")

Rows with any > 8:
[[ 8  9 10 11]]
Rows with all > 2:
[[ 4  5  6  7]
 [ 8  9 10 11]]


---
## Key Points Summary

**Creating Masks:**
- Comparison operators create boolean arrays
- Use `&`, `|`, `~` (not `and`, `or`, `not`)
- Always use parentheses around conditions

**Filtering:**
- `arr[mask]` returns COPY of matching elements
- `arr[mask] = value` modifies original in-place
- 2D filtering returns 1D array

**Key Functions:**
- `np.where(cond)` - indices
- `np.where(cond, a, b)` - conditional values
- `np.isin()` - membership test
- `np.isnan()`, `np.isfinite()` - special values

---
## Interview Tips

**Q1: Why use & instead of 'and' for array conditions?**
> Python's `and` evaluates whole objects (truthiness), but we need element-wise boolean logic. `&` is overloaded by NumPy for element-wise AND.

**Q2: How do you filter rows where at least one column meets a condition?**
> `arr[np.any(condition, axis=1)]` - any() along axis 1 gives row-wise result.

**Q3: What's the difference between np.where with 1 vs 3 arguments?**
> - 1 arg: returns tuple of indices where True
> - 3 args: returns array with value_if_true or value_if_false

**Q4: How do you remove NaN values from an array?**
> `arr[~np.isnan(arr)]` or `arr[np.isfinite(arr)]` for both NaN and Inf.

---
## Practice Exercises

### Exercise 1: Filter outliers using IQR

In [37]:
np.random.seed(42)
data = np.concatenate([np.random.normal(50, 10, 100), [200, -50]])  # Some outliers
# Remove outliers using IQR method


In [38]:
# Solution
np.random.seed(42)
data = np.concatenate([np.random.normal(50, 10, 100), [200, -50]])

q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
iqr = q3 - q1

lower = q1 - 1.5 * iqr
upper = q3 + 1.5 * iqr

mask = (data >= lower) & (data <= upper)
clean_data = data[mask]

print(f"Original count: {len(data)}")
print(f"After removing outliers: {len(clean_data)}")
print(f"Removed: {data[~mask]}")

Original count: 102
After removing outliers: 99
Removed: [ 23.8 200.  -50. ]


### Exercise 2: Replace multiple values at once

In [39]:
# Replace values [1, 3, 5] with [10, 30, 50] respectively
arr = np.array([1, 2, 3, 4, 5, 1, 2, 3])


In [40]:
# Solution
arr = np.array([1, 2, 3, 4, 5, 1, 2, 3])
print(f"Original: {arr}")

old_vals = [1, 3, 5]
new_vals = [10, 30, 50]

result = arr.copy()
for old, new in zip(old_vals, new_vals):
    result[result == old] = new

print(f"Replaced: {result}")

Original: [1 2 3 4 5 1 2 3]
Replaced: [10  2 30  4 50 10  2 30]


### Exercise 3: Clip values within percentile range

In [41]:
# Clip array values to [5th percentile, 95th percentile]
np.random.seed(42)
data = np.random.randn(1000)


In [42]:
# Solution
np.random.seed(42)
data = np.random.randn(1000)

p5 = np.percentile(data, 5)
p95 = np.percentile(data, 95)

clipped = np.clip(data, p5, p95)
print(f"Original range: [{data.min():.2f}, {data.max():.2f}]")
print(f"Clipped range: [{clipped.min():.2f}, {clipped.max():.2f}]")
print(f"P5, P95: [{p5:.2f}, {p95:.2f}]")

Original range: [-3.24, 3.85]
Clipped range: [-1.53, 1.68]
P5, P95: [-1.53, 1.68]


---
## Next Notebook
**03_advanced_slicing_techniques.ipynb** - Structured arrays, record arrays, and advanced memory techniques.