# Vectorization Techniques

**Module 04 | Notebook 02**

---

## Objective
By the end of this notebook, you will master:
- Replacing loops with vectorized operations
- Using np.vectorize and np.frompyfunc
- Conditional operations without loops
- Performance comparison techniques
- Common loop-replacement patterns

In [2]:
import numpy as np
import time
np.set_printoptions(precision=3)

---
## 1. Why Vectorization?

Python loops are slow due to:
- Dynamic typing (type checking each iteration)
- Interpreter overhead
- Poor cache utilization

NumPy vectorized operations:
- Compiled C/Fortran (fast)
- Type homogeneity (no checking)
- Cache-friendly memory access

In [3]:
# Performance comparison: loop vs vectorized
def timer(func, *args, n_runs=5):
    """Time a function over multiple runs."""
    times = []
    for _ in range(n_runs):
        start = time.perf_counter()
        result = func(*args)
        times.append(time.perf_counter() - start)
    return np.mean(times), result

In [4]:
# Sum of squares: loop vs vectorized
n = 1_000_000
arr = np.arange(n, dtype=float)

def loop_sum_squares(arr):
    result = 0
    for x in arr:
        result += x * x
    return result

def vectorized_sum_squares(arr):
    return np.sum(arr ** 2)

loop_time, _ = timer(loop_sum_squares, arr)
vec_time, _ = timer(vectorized_sum_squares, arr)

print(f"Loop: {loop_time*1000:.2f} ms")
print(f"Vectorized: {vec_time*1000:.2f} ms")
print(f"Speedup: {loop_time/vec_time:.1f}x")

Loop: 101.56 ms
Vectorized: 2.72 ms
Speedup: 37.3x


---
## 2. Basic Loop Replacement Patterns

In [5]:
# Pattern 1: Element-wise operations
arr = np.array([1, 2, 3, 4, 5])

# Loop version (DON'T DO THIS)
result_loop = np.zeros_like(arr)
for i in range(len(arr)):
    result_loop[i] = arr[i] * 2 + 1

# Vectorized (DO THIS)
result_vec = arr * 2 + 1

print(f"Loop: {result_loop}")
print(f"Vectorized: {result_vec}")

Loop: [ 3  5  7  9 11]
Vectorized: [ 3  5  7  9 11]


In [6]:
# Pattern 2: Aggregations
arr = np.array([1, 2, 3, 4, 5])

# Loop (DON'T)
total = 0
for x in arr:
    total += x

# Vectorized (DO)
total_vec = np.sum(arr)

print(f"Sum: {total_vec}")

Sum: 15


In [7]:
# Pattern 3: Conditionals with np.where
arr = np.array([-2, -1, 0, 1, 2])

# Loop (DON'T)
result_loop = np.zeros_like(arr)
for i in range(len(arr)):
    if arr[i] > 0:
        result_loop[i] = arr[i]
    else:
        result_loop[i] = 0

# Vectorized (DO)
result_vec = np.where(arr > 0, arr, 0)

print(f"ReLU: {result_vec}")

ReLU: [0 0 0 1 2]


In [8]:
# Pattern 4: Boolean indexing
arr = np.array([1, 5, 3, 8, 2, 9, 4])

# Loop (DON'T)
result_loop = []
for x in arr:
    if x > 4:
        result_loop.append(x)
result_loop = np.array(result_loop)

# Vectorized (DO)
result_vec = arr[arr > 4]

print(f"Elements > 4: {result_vec}")

Elements > 4: [5 8 9]


---
## 3. Conditional Operations

In [9]:
# np.where for if-else
arr = np.array([-3, -1, 0, 2, 5])

# Ternary: positive -> keep, else -> 0
result = np.where(arr > 0, arr, 0)
print(f"np.where: {result}")

np.where: [0 0 0 2 5]


In [10]:
# Nested np.where for if-elif-else
arr = np.array([-5, -2, 0, 3, 7])

# if x < 0: -1, elif x == 0: 0, else: 1
result = np.where(arr < 0, -1, np.where(arr == 0, 0, 1))
print(f"Sign-like: {result}")

# Or use np.sign
print(f"np.sign: {np.sign(arr)}")

Sign-like: [-1 -1  0  1  1]
np.sign: [-1 -1  0  1  1]


In [11]:
# np.select for multiple conditions
arr = np.array([15, 25, 45, 65, 85])

conditions = [
    arr < 20,
    arr < 40,
    arr < 60,
    arr < 80
]
choices = ['F', 'D', 'C', 'B']
default = 'A'

grades = np.select(conditions, choices, default=default)
print(f"Scores: {arr}")
print(f"Grades: {grades}")

Scores: [15 25 45 65 85]
Grades: ['F' 'D' 'C' 'B' 'A']


In [12]:
# np.clip for range limiting
arr = np.array([-5, 0, 5, 10, 15])

clipped = np.clip(arr, 0, 10)
print(f"Original: {arr}")
print(f"Clipped [0, 10]: {clipped}")

Original: [-5  0  5 10 15]
Clipped [0, 10]: [ 0  0  5 10 10]


In [13]:
# np.minimum and np.maximum
arr = np.array([1, 5, 3, 8, 2])

# Cap at 4
capped = np.minimum(arr, 4)
print(f"Capped at 4: {capped}")

# Floor at 3
floored = np.maximum(arr, 3)
print(f"Floored at 3: {floored}")

Capped at 4: [1 4 3 4 2]
Floored at 3: [3 5 3 8 3]


---
## 4. Advanced Vectorization Patterns

In [14]:
# Pattern: Running/Cumulative operations
arr = np.array([1, 2, 3, 4, 5])

# Running sum
cumsum = np.cumsum(arr)
print(f"Cumulative sum: {cumsum}")

# Running product
cumprod = np.cumprod(arr)
print(f"Cumulative product: {cumprod}")

# Running max
cummax = np.maximum.accumulate(arr)
print(f"Cumulative max: {cummax}")

Cumulative sum: [ 1  3  6 10 15]
Cumulative product: [  1   2   6  24 120]
Cumulative max: [1 2 3 4 5]


In [15]:
# Pattern: Differences and gradients
arr = np.array([1, 4, 6, 8, 15])

# First difference
diff1 = np.diff(arr)
print(f"Array: {arr}")
print(f"1st difference: {diff1}")

# Second difference
diff2 = np.diff(arr, n=2)
print(f"2nd difference: {diff2}")

Array: [ 1  4  6  8 15]
1st difference: [3 2 2 7]
2nd difference: [-1  0  5]


In [16]:
# Pattern: Sliding window operations using stride tricks
from numpy.lib.stride_tricks import sliding_window_view

arr = np.array([1, 2, 3, 4, 5, 6])
window_size = 3

# Create sliding windows
windows = sliding_window_view(arr, window_size)
print(f"Array: {arr}")
print(f"Windows:\n{windows}")

# Moving average
moving_avg = windows.mean(axis=1)
print(f"Moving average: {moving_avg}")

Array: [1 2 3 4 5 6]
Windows:
[[1 2 3]
 [2 3 4]
 [3 4 5]
 [4 5 6]]
Moving average: [2. 3. 4. 5.]


In [17]:
# Pattern: Binning with np.digitize
data = np.array([0.5, 1.2, 2.5, 3.8, 4.1, 5.5, 6.9])
bins = np.array([2, 4, 6])

# Get bin indices
indices = np.digitize(data, bins)
print(f"Data: {data}")
print(f"Bins: {bins}")
print(f"Bin indices: {indices}")

# Map to labels
labels = np.array(['low', 'medium', 'high', 'very high'])
result = labels[indices]
print(f"Labels: {result}")

Data: [0.5 1.2 2.5 3.8 4.1 5.5 6.9]
Bins: [2 4 6]
Bin indices: [0 0 1 1 2 2 3]
Labels: ['low' 'low' 'medium' 'medium' 'high' 'high' 'very high']


---
## 5. np.vectorize (Use with Caution)

In [18]:
# np.vectorize: convenience, NOT performance
# It's basically a loop wrapper - not truly vectorized!

def my_func(x):
    if x < 0:
        return 0
    elif x < 10:
        return x
    else:
        return 10

# Vectorize it
vectorized_func = np.vectorize(my_func)

arr = np.array([-5, 0, 5, 10, 15])
result = vectorized_func(arr)
print(f"Result: {result}")

Result: [ 0  0  5 10 10]


In [19]:
# Better: true vectorization with np.select or np.clip
arr = np.array([-5, 0, 5, 10, 15])

# Same logic but truly vectorized
result = np.clip(np.maximum(arr, 0), 0, 10)
print(f"True vectorized: {result}")

True vectorized: [ 0  0  5 10 10]


In [20]:
# Performance comparison
arr = np.random.randn(100000)

# np.vectorize (slow)
vec_func = np.vectorize(lambda x: x if x > 0 else 0)
time_vec, _ = timer(lambda: vec_func(arr))

# True vectorization (fast)
time_true, _ = timer(lambda: np.maximum(arr, 0))

print(f"np.vectorize: {time_vec*1000:.2f} ms")
print(f"True vectorized: {time_true*1000:.2f} ms")
print(f"Speedup: {time_vec/time_true:.1f}x")

np.vectorize: 18.43 ms
True vectorized: 0.08 ms
Speedup: 236.6x


---
## 6. Common Vectorized Functions

In [21]:
# Distance calculations
# Euclidean distance without loop
point1 = np.array([0, 0, 0])
point2 = np.array([3, 4, 0])

distance = np.sqrt(np.sum((point2 - point1) ** 2))
# Or using linalg
distance_linalg = np.linalg.norm(point2 - point1)
print(f"Distance: {distance}")

Distance: 5.0


In [22]:
# Batch distance: many points at once
points = np.random.rand(1000, 3)  # 1000 3D points
origin = np.zeros(3)

# All distances at once
distances = np.linalg.norm(points - origin, axis=1)
print(f"Shape: {distances.shape}")
print(f"First 5 distances: {distances[:5]}")

Shape: (1000,)
First 5 distances: [0.751 1.041 0.895 0.733 0.82 ]


In [23]:
# Softmax (vectorized)
def softmax(x, axis=-1):
    exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

logits = np.array([[1, 2, 3], [4, 5, 6]])
probs = softmax(logits, axis=1)
print(f"Softmax:\n{probs}")
print(f"Row sums: {probs.sum(axis=1)}")

Softmax:
[[0.09  0.245 0.665]
 [0.09  0.245 0.665]]
Row sums: [1. 1.]


In [24]:
# One-hot encoding (vectorized)
labels = np.array([0, 2, 1, 0, 2])
n_classes = 3

# Create one-hot matrix
one_hot = np.zeros((len(labels), n_classes))
one_hot[np.arange(len(labels)), labels] = 1

print(f"Labels: {labels}")
print(f"One-hot:\n{one_hot}")

Labels: [0 2 1 0 2]
One-hot:
[[1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]]


In [25]:
# Polynomial evaluation (Horner's method)
# p(x) = c0 + c1*x + c2*x^2 + ...
coefficients = np.array([1, 2, 3])  # 1 + 2x + 3x^2
x = np.array([0, 1, 2, 3])

# Vectorized polynomial evaluation
result = np.polyval(coefficients[::-1], x)  # polyval expects highest degree first
print(f"p(x) = 1 + 2x + 3x^2")
print(f"p({x}) = {result}")

p(x) = 1 + 2x + 3x^2
p([0 1 2 3]) = [ 1  6 17 34]


---
## 7. Avoiding Loops: Real Examples

In [26]:
# Example 1: Normalize each row (common in ML)
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float)

# Bad: loop
normalized_loop = np.zeros_like(data)
for i in range(len(data)):
    row = data[i]
    normalized_loop[i] = (row - row.mean()) / row.std()

# Good: vectorized
mean = data.mean(axis=1, keepdims=True)
std = data.std(axis=1, keepdims=True)
normalized_vec = (data - mean) / std

print(f"Vectorized:\n{normalized_vec}")

Vectorized:
[[-1.225  0.     1.225]
 [-1.225  0.     1.225]
 [-1.225  0.     1.225]]


In [27]:
# Example 2: Count occurrences of each value
arr = np.array([0, 1, 1, 2, 2, 2, 3])

# Bad: loop
counts_loop = {}
for x in arr:
    counts_loop[x] = counts_loop.get(x, 0) + 1

# Good: vectorized
unique, counts = np.unique(arr, return_counts=True)
print(f"Values: {unique}")
print(f"Counts: {counts}")

Values: [0 1 2 3]
Counts: [1 2 3 1]


In [28]:
# Example 3: Apply function to groups
values = np.array([1, 2, 3, 4, 5, 6])
groups = np.array([0, 0, 1, 1, 2, 2])

# Bad: loop
group_means_loop = []
for g in np.unique(groups):
    group_means_loop.append(values[groups == g].mean())

# Good: np.bincount with weights
sums = np.bincount(groups, weights=values)
counts = np.bincount(groups)
group_means_vec = sums / counts

print(f"Group means: {group_means_vec}")

Group means: [1.5 3.5 5.5]


In [29]:
# Example 4: Pairwise operations
a = np.array([1, 2, 3])
b = np.array([10, 20])

# Bad: nested loops
result_loop = np.zeros((len(a), len(b)))
for i in range(len(a)):
    for j in range(len(b)):
        result_loop[i, j] = a[i] + b[j]

# Good: broadcasting
result_vec = a[:, np.newaxis] + b[np.newaxis, :]

print(f"Pairwise sum:\n{result_vec}")

Pairwise sum:
[[11 21]
 [12 22]
 [13 23]]


---
## Key Points Summary

**Core Principles:**
1. Use built-in NumPy functions (sum, mean, etc.)
2. Use broadcasting instead of loops
3. Use boolean indexing for filtering
4. Use np.where/np.select for conditionals

**Common Replacements:**
| Loop Pattern | Vectorized |
|-------------|------------|
| Sum of elements | `np.sum(arr)` |
| Element-wise ops | Direct operators `+, -, *, /` |
| if-else per element | `np.where(cond, a, b)` |
| Filter elements | `arr[condition]` |
| Running sum/product | `np.cumsum`, `np.cumprod` |

**Caution:**
- `np.vectorize` is NOT truly vectorized
- Profile before optimizing
- Sometimes loops are more readable

---
## Interview Tips

**Q1: How would you speed up a Python loop that processes an array?**
> Replace with NumPy vectorized operations. Element-wise ops use built-in operators, aggregations use np.sum/mean/etc., conditionals use np.where.

**Q2: What is np.vectorize and when should you use it?**
> np.vectorize is a convenience wrapper that makes a scalar function work on arrays. However, it's NOT truly vectorized (still loops internally). Use it for convenience with complex logic, but prefer true vectorization for performance.

**Q3: How do you implement ReLU without loops?**
> `np.maximum(arr, 0)` or `np.where(arr > 0, arr, 0)`

**Q4: Why is NumPy faster than Python loops?**
> - Compiled C code (no interpreter overhead)
> - Homogeneous data types (no type checking per element)
> - Contiguous memory (cache-friendly)

---
## Practice Exercises

### Exercise 1: Replace loop with vectorization

In [30]:
# Replace this loop with vectorized code:
arr = np.array([1, -2, 3, -4, 5])
result = np.zeros_like(arr)
for i in range(len(arr)):
    if arr[i] > 0:
        result[i] = arr[i] ** 2
    else:
        result[i] = 0
print(f"Loop result: {result}")

# Your vectorized version:


Loop result: [ 1  0  9  0 25]


In [31]:
# Solution
arr = np.array([1, -2, 3, -4, 5])
result = np.where(arr > 0, arr ** 2, 0)
print(f"Vectorized result: {result}")

Vectorized result: [ 1  0  9  0 25]


### Exercise 2: Vectorized moving average

In [32]:
# Compute 3-element moving average without explicit loops
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# Expected: [2, 3, 4, 5, 6, 7, 8, 9] (averages of consecutive triplets)


In [33]:
# Solution
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# Method 1: Using convolution
kernel = np.ones(3) / 3
moving_avg = np.convolve(arr, kernel, mode='valid')
print(f"Moving average: {moving_avg}")

# Method 2: Using sliding window view
from numpy.lib.stride_tricks import sliding_window_view
windows = sliding_window_view(arr, 3)
moving_avg2 = windows.mean(axis=1)
print(f"Moving average (window): {moving_avg2}")

Moving average: [2. 3. 4. 5. 6. 7. 8. 9.]
Moving average (window): [2. 3. 4. 5. 6. 7. 8. 9.]


### Exercise 3: Batch dot product

In [34]:
# Compute dot product of each row in A with vector b
# Without explicit loops
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([1, 0, -1])


In [35]:
# Solution
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([1, 0, -1])

# Method 1: Matrix-vector multiplication
result = A @ b
print(f"Using @: {result}")

# Method 2: Element-wise multiply and sum
result2 = (A * b).sum(axis=1)
print(f"Using multiply+sum: {result2}")

# Method 3: einsum
result3 = np.einsum('ij,j->i', A, b)
print(f"Using einsum: {result3}")

Using @: [-2 -2 -2]
Using multiply+sum: [-2 -2 -2]
Using einsum: [-2 -2 -2]


---
## Module 04 Complete!

You have mastered Broadcasting and Vectorization:
- Broadcasting Fundamentals
- Vectorization Techniques

**Next Module:** 05_advanced_indexing - Fancy indexing, boolean indexing, and advanced selection techniques!