# Cobra vs. Pandas: The Power of Kernel Fusion

This notebook provides a simple, visual demonstration of the massive performance advantage of Cobra's JIT-compiled kernel fusion engine compared to a standard pandas/NumPy workflow.

In [None]:
import numpy as np
import pandas as pd
import cobra
import time

## 1. The Data Processing Pipeline

We will define a simple, multi-step data transformation pipeline. Notice the logic is identical for both versions.

In [None]:
def pipeline(df):
    # A series of common data science operations
    df['col_c'] = df['col_a'] + 10.0
    df['col_d'] = df['col_b'] * 2.5
    df['col_e'] = df['col_c'] / df['col_d']
    return df

## 2. Setup

Let's create a large dataset and prepare both a pandas DataFrame and a CobraFrame.

In [None]:
size = 20_000_000

# Pandas DataFrame
df_pandas = pd.DataFrame({
    'col_a': np.random.uniform(1, 10, size),
    'col_b': np.random.uniform(1, 10, size),
})

# CobraFrame
cf_cobra = cobra.CobraFrame(df_pandas.to_dict('list'))

## 3. Benchmarking

### Pandas/NumPy Execution
This creates at least 3 large intermediate copies of the data in memory.

In [None]:
start = time.perf_counter()
result_pandas = pipeline(df_pandas.copy())
duration_pandas = (time.perf_counter() - start) * 1000
print(f"Pandas execution time: {duration_pandas:.2f} ms")

### Cobra JIT Execution
The `@cobra.jit` decorator compiles the entire pipeline into a single kernel, eliminating all intermediate memory copies.

In [None]:
@cobra.jit
def pipeline_jit(df):
    # The exact same logic
    df['col_c'] = df['col_a'] + 10.0
    df['col_d'] = df['col_b'] * 2.5
    df['col_e'] = df['col_c'] / df['col_d']
    return df

# Use the profiler to capture and display the results
with cobra.profile() as p:
    # First run is for JIT compilation
    pipeline_jit(cf_cobra)

start = time.perf_counter()
result_cobra = pipeline_jit(cf_cobra)
duration_cobra = (time.perf_counter() - start) * 1000
print(f"Cobra JIT execution time: {duration_cobra:.2f} ms")

print("\n--- Profiler Report for Cobra Execution ---")
p.print_report()

## 4. Results

In [None]:
speedup = duration_pandas / duration_cobra
print(f"Speedup Factor: {speedup:.2f}x")