In [4]:
import concurrent.futures
from functools import partial

def compare_swap(arr, i):
    """
    Compare and swap elements at indices i and i+1 if out of order.
    Returns True if a swap occurred.
    """
    if arr[i] > arr[i + 1]:
        arr[i], arr[i + 1] = arr[i + 1], arr[i]
        return True
    return False

def odd_even_sort_parallel(input_list):
    """
    Parallel odd-even transposition sort.
    - Uses threading to parallelize comparisons/swaps within each phase.
    - Stops early if no swaps occur in a full iteration (optimistic bubble sort style).
    - Assumes input_list contains comparable elements (e.g., integers).
    """
    arr = input_list[:]  # Work on a copy to avoid modifying the original
    n = len(arr)
    if n <= 1:
        return arr
    
    is_sorted = False
    phase = 0
    
    while not is_sorted:
        is_sorted = True
        # Determine if even (start=0) or odd (start=1) phase
        start = phase % 2
        indices = list(range(start, n - 1, 2))
        
        if indices:
            # Parallelize the compare-swaps using threads
            with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(indices), 8)) as executor:
                swaps = list(executor.map(partial(compare_swap, arr), indices))
                if any(swaps):
                    is_sorted = False
        
        phase += 1
    
    return arr

# Example usage
if __name__ == '__main__':
    test_list = [5, 3, 8, 6, 1, 9, 2, 7, 4]
    sorted_list = odd_even_sort_parallel(test_list)
    print("Original:", test_list)
    print("Sorted:", sorted_list)

Original: [5, 3, 8, 6, 1, 9, 2, 7, 4]
Sorted: [1, 2, 3, 4, 5, 6, 7, 8, 9]


In [None]:
# Benchmark and plot runtime vs. number of threads for parallel odd-even sort
# Notes:
# - Uses matplotlib (no seaborn, no custom colors/styles)
# - Generates two separate charts: (1) Runtime vs Threads, (2) Speedup vs Threads
# - Shows a results table you can download if needed

import random
import time
import statistics
from functools import partial
import concurrent.futures
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

try:
    from caas_jupyter_tools import display_dataframe_to_user
except Exception:
    display_dataframe_to_user = None


def compare_swap(arr, i):
    if arr[i] > arr[i + 1]:
        arr[i], arr[i + 1] = arr[i + 1], arr[i]
        return True
    return False


def odd_even_sort_parallel(input_list, max_workers=1):
    """
    Parallel odd-even transposition sort.
    - max_workers: number of worker threads to use per phase.
    - Stops early if no swaps occur in a full iteration.
    """
    arr = input_list[:]  # Work on a copy
    n = len(arr)
    if n <= 1:
        return arr

    is_sorted = False
    phase = 0

    while not is_sorted:
        is_sorted = True
        start = phase % 2
        indices = list(range(start, n - 1, 2))

        if indices and max_workers > 1:
            # Threaded path
            with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(indices), max_workers)) as executor:
                swaps = list(executor.map(partial(compare_swap, arr), indices))
            if any(swaps):
                is_sorted = False
        else:
            # Single-threaded path or no indices
            for i in indices:
                if compare_swap(arr, i):
                    is_sorted = False

        phase += 1

    return arr


def benchmark_once(n, max_workers, seed=None):
    if seed is not None:
        random.seed(seed)
    data = [random.randint(0, 10**6) for _ in range(n)]
    t0 = time.perf_counter()
    odd_even_sort_parallel(data, max_workers=max_workers)
    t1 = time.perf_counter()
    return t1 - t0


def benchmark(n=800, trials=5, thread_list=(1, 2, 4, 8, 16)):
    results = []
    for p in thread_list:
        times = [benchmark_once(n, p, seed=1000 + k) for k in range(trials)]
        results.append({
            "threads": p,
            "n": n,
            "trial_times": times,
            "time_median": statistics.median(times),
            "time_mean": statistics.mean(times),
            "time_std": statistics.pstdev(times),
        })
    return results


# Configure benchmark
N = 800            # problem size (odd-even sort is O(n^2) in comparisons)
TRIALS = 5
THREADS = (1, 2, 4, 8, 16)

results = benchmark(n=N, trials=TRIALS, thread_list=THREADS)

# Prepare DataFrame
df = pd.DataFrame([
    {
        "threads": r["threads"],
        "n": r["n"],
        "time_mean_sec": r["time_mean"],
        "time_median_sec": r["time_median"],
        "time_std_sec": r["time_std"],
    }
    for r in results
]).sort_values("threads")

# Compute speedup vs 1 thread (using mean)
t1 = float(df.loc[df["threads"] == 1, "time_mean_sec"])
df["speedup_vs_1thread"] = t1 / df["time_mean_sec"]

# Compute an "estimated complexity constant" ~ time / n^2 for each thread count
df["C_est_time_per_n2"] = df["time_mean_sec"] / (N**2)

# Show table to user (interactive)
if display_dataframe_to_user is not None:
    display_dataframe_to_user("Odd-Even Sort Parallel Benchmark", df)

# Save CSV
csv_path = "/mnt/data/odd_even_parallel_benchmark.csv"
df.to_csv(csv_path, index=False)

# Plot 1: Runtime vs Threads
plt.figure()
plt.title("Odd-Even Sort (Parallel) — Runtime vs Threads")
plt.xlabel("Threads")
plt.ylabel("Mean Runtime (s)")
plt.xticks(df["threads"].tolist())
plt.plot(df["threads"].to_numpy(), df["time_mean_sec"].to_numpy(), marker="o")
plt.grid(True, linestyle="--", alpha=0.4)
plt.show()

# Plot 2: Speedup vs Threads
plt.figure()
plt.title("Odd-Even Sort (Parallel) — Speedup vs Threads")
plt.xlabel("Threads")
plt.ylabel("Speedup (vs 1 thread)")
plt.xticks(df["threads"].tolist())
plt.plot(df["threads"].to_numpy(), df["speedup_vs_1thread"].to_numpy(), marker="o")
plt.grid(True, linestyle="--", alpha=0.4)
plt.show()

# Fit a simple inverse model: time ≈ a / threads + b  (not theoretically exact, but illustrative)
p = np.polyfit(1.0 / df["threads"].to_numpy(), df["time_mean_sec"].to_numpy(), deg=1)
a_hat, b_hat = p[0], p[1]

# Generate a prediction line for threads 1..max
tt = np.array(sorted(df["threads"].unique().tolist()))
pred_time = a_hat * (1.0 / tt) + b_hat

# Plot 3: Observed vs Fitted (time ≈ a/threads + b)
plt.figure()
plt.title("Observed Runtime vs Fitted Model (a/threads + b)")
plt.xlabel("Threads")
plt.ylabel("Mean Runtime (s)")
plt.xticks(tt.tolist())
plt.plot(df["threads"].to_numpy(), df["time_mean_sec"].to_numpy(), marker="o", label="Observed")
plt.plot(tt, pred_time, marker="x", label="Fitted")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.4)
plt.show()

# Save a small README-style text summary
summary_path = "Benchmark_summary.txt"
with open(summary_path, "w") as f:
    f.write("Odd-Even Sort Parallel Benchmark Summary\n")
    f.write(f"Array size N = {N}, Trials = {TRIALS}\n\n")
    f.write(df.to_string(index=False))
    f.write("\n\nFitted model: time ≈ a/threads + b\n")
    f.write(f"a = {a_hat:.6f}, b = {b_hat:.6f}\n")

csv_path, summary_path
