# CPU Kernels 1D Basics - Understanding Element-wise Operations
This notebook demonstrates how array operations work element by element.

Each operation inside the loop is a "kernel" - the core computation
that will later be parallelised on the GPU.

In [None]:
import numpy as np

## Create Random Data

In [None]:
rng = np.random.default_rng(seed=831)
array_size = 20
data = rng.integers(1, 10, array_size).astype(float)
print(f"Original array (first 10): {data[:10]}...")

## Element-wise Operations
### Method 1: Using NumPy
NumPy hides the loop.

In [None]:
numpy_result = data + 5
print(f"Result: {numpy_result[:10]}...")

### Method 2: Explicit loop
This demonstrates the concept of a kernel.

In [None]:
def kernel_add_constant(input_array, constant):
    """
    Add a constant to each element.
    
    This function shows explicitly what happens inside NumPy.
    each iteration of the loop is independent - this is key!
    on a GPU, each iteration would run on a different thread.
    """
    output_array = np.zeros_like(input_array)
    
    # this loop will become parallel threads on gpu
    for i in range(input_array.size):
        # the kernel: what each thread does independently
        output_array[i] = input_array[i] + constant
    
    return output_array

In [None]:
loop_result = kernel_add_constant(data, 5)
print(f"Result: {loop_result[:10]}...")

## Different Operations
Multiplication, squaring and thresholding.

In [None]:
def kernel_multiply(input_array, factor):
    """Multiply each element by a factor."""
    output_array = np.zeros_like(input_array)
    
    for i in range(input_array.size):
        # kernel: multiply operation
        output_array[i] = input_array[i] * factor
    
    return output_array

def kernel_square(input_array):
    """Square each element."""
    output_array = np.zeros_like(input_array)
    
    for i in range(input_array.size):
        # kernel: square operation
        output_array[i] = input_array[i] ** 2
    
    return output_array

def kernel_threshold(input_array, threshold):
    """Set to 1 if above threshold, 0 otherwise."""
    output_array = np.zeros_like(input_array)
    
    for i in range(input_array.size):
        # kernel: threshold operation
        if input_array[i] > threshold:
            output_array[i] = 1.0
        else:
            output_array[i] = 0.0
    
    return output_array

In [None]:
print("Applying different kernels to the same data:")
print(f"Original:    {data[:10]}...")

multiplied = kernel_multiply(data, 2)
print(f"Multiplied:  {multiplied[:10]}...")

squared = kernel_square(data)
print(f"Squared:     {squared[:10]}...")

thresholded = kernel_threshold(data, 5)
print(f"Thresholded: {thresholded[:10]}...")

## Independence Operations
This demonstrate each iteration only needs `input_array[i]` and does not need results from other threads.

This is sometimes called _embarrassingly parallel_.

In [None]:
def demonstrate_independence(input_array):
    """
    Show each element is processed independently
    """
    # only process and return what we demonstrate
    demo_size = min(10, input_array.size)
    output_array = np.zeros(demo_size)
    
    print("Processing each element independently:")
    for i in range(demo_size):
        output_array[i] = input_array[i] * 2 + 3
        print(f"  Thread {i}: input[{i}] = {input_array[i]:.1f} "
              f"-> output[{i}] = {output_array[i]:.1f}")
    
    return output_array

In [None]:
demonstrate_independence(data)

## Dependence Operations

Each output depends on the previous output and this dependency prevents parallel execution.

In [None]:
def kernel_cumulative_sum(input_array):
    """
    Cumulative summing depends on the previous result.
    """
    output_array = np.zeros_like(input_array)
    
    # the first element is the same
    output_array[0] = input_array[0]
    
    for i in range(1, input_array.size):
        # this operation depends on the previous iteration
        # thread i needs the result from thread i-1
        output_array[i] = output_array[i-1] + input_array[i]
    
    return output_array

In [None]:
cumulative_result = kernel_cumulative_sum(data)

print(f"Input:           {data}")
print(f"Cumulative sum:  {cumulative_result}")

In [None]:
def kernel_sequential_random(size, seed):
    """
    Each random number depends on the previous state.
    """
    output_array = np.zeros(size)
    state = seed
    
    for i in range(size):
        # each random number depends entirely on previous state
        state = (1103515245 * state + 831721) % (2**31)
        output_array[i] = state / (2**31)
    
    return output_array

In [None]:
random_sequence = kernel_sequential_random(10, seed=20250701)
print(f"Random sequence: {random_sequence[:5]}...")