# Performance Analysis of Parallel N-Body Simulation Using CPU and GPU Acceleration

**Project Team:** Partha Pratim Das, Manash Das  
**Submission:** CDAC CINE & IIT Guwahati Campus

---

## 1. Introduction
In this notebook, we implement and benchmark a classic N-Body simulation. We will efficiently compare a sequential CPU implementation (NumPy) against a parallel GPU implementation (CuPy) to demonstrate the power of High Performance Computing (HPC).

In [None]:
# Setup and Imports
import sys
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Ensure local modules can be imported
sys.path.append(os.path.abspath('../src'))

try:
    from cpu_nbody import NBodyCPU
    from gpu_nbody import NBodyGPU, HAS_GPU
except ImportError as e:
    print(f"Error importing modules: {e}")
    print("Make sure you are running this from the efficient directory structure.")

# Plotting Style
plt.style.use('dark_background')
sns.set_context("talk")

## 2. Benchmarking Configuration
We will test the simulation performance across a range of particle counts ($N$).

- **N values**: 500, 1000, 2000, 5000, 10000
- **Steps per run**: 10 (to average out fluctuations)
- **Repetitions**: 3 independent runs per N

In [None]:
N_VALUES = [500, 1000, 2000, 5000, 10000]
STEPS = 10
DT = 0.01

cpu_times = []
gpu_times = []

## 3. Running the Benchmarks

In [None]:
print(f"{'N Particles':<15} | {'CPU Time (s)':<15} | {'GPU Time (s)':<15} | {'Speedup':<10}")
print("-" * 65)

for n in N_VALUES:
    # --- CPU Benchmark ---
    try:
        # Limit CPU runs for very large N to avoid hanging forever during demo
        if n > 5000:
            print(f"{n:<15} | {'(Skipped)':<15} | ...")
            cpu_avg = None  # Too slow to wait for
        else:
            cpu_runs = []
            for _ in range(3):
                sim_cpu = NBodyCPU(n)
                t = sim_cpu.run_simulation(STEPS, DT)
                cpu_runs.append(t)
            cpu_avg = np.mean(cpu_runs)
    except Exception as e:
        cpu_avg = float('nan')

    # --- GPU Benchmark ---
    try:
        if HAS_GPU:
            gpu_runs = []
            for _ in range(3):
                sim_gpu = NBodyGPU(n)
                t = sim_gpu.run_simulation(STEPS, DT)
                gpu_runs.append(t)
            gpu_avg = np.mean(gpu_runs)
        else:
            gpu_avg = float('nan')
    except Exception as e:
        print(f"GPU Error: {e}")
        gpu_avg = float('nan')

    # Store results
    cpu_times.append(cpu_avg)
    gpu_times.append(gpu_avg)

    # Speedup calc
    if cpu_avg and gpu_avg:
        speedup = cpu_avg / gpu_avg
        print(f"{n:<15} | {cpu_avg:<15.4f} | {gpu_avg:<15.4f} | {speedup:<10.2f}x")
    else:
        print(f"{n:<15} | {str(cpu_avg):<15} | {str(gpu_avg):<15} | N/A")

## 4. Visualizing Results
We generate the comparison plots to verify our hypothesis.

In [None]:
# Filter data for plotting (remove Nones/Skips)
valid_indices = [i for i, v in enumerate(cpu_times) if v is not None and gpu_times[i] is not None]
plot_n = [N_VALUES[i] for i in valid_indices]
plot_cpu = [cpu_times[i] for i in valid_indices]
plot_gpu = [gpu_times[i] for i in valid_indices]

# 1. Performance Comparison Plot
plt.figure(figsize=(10, 6))
plt.plot(plot_n, plot_cpu, 'o-', color='#FF5252', label='CPU (NumPy)', linewidth=2)
plt.plot(plot_n, plot_gpu, 's-', color='#69F0AE', label='GPU (CuPy)', linewidth=2)
plt.xlabel('Number of Particles (N)')
plt.ylabel('Execution Time (seconds)')
plt.title('CPU vs GPU Performance: N-Body Simulation')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('../results/performance_comparison.png')
plt.show()

# 2. Speedup Plot
if len(plot_n) > 0:
    speedups = [c/g for c, g in zip(plot_cpu, plot_gpu)]
    plt.figure(figsize=(10, 6))
    plt.plot(plot_n, speedups, '^-', color='#448AFF', linewidth=2)
    plt.xlabel('Number of Particles (N)')
    plt.ylabel('Speedup Factor (CPU/GPU)')
    plt.title('GPU Speedup Scaling')
    plt.grid(True, alpha=0.3)
    plt.savefig('../results/speedup_curve.png')
    plt.show()

## 5. Animation (Verification)
Finally, we run a short visual simulation to ensure the physics looks correct (particles clustering).

In [None]:
import matplotlib.animation as animation

# Use CPU for small N visualization (easier to handle locally)
sim = NBodyCPU(n_particles=200)
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')

def update(frame):
    sim.update_particles(0.05)
    ax.clear()
    ax.scatter(sim.pos[:, 0], sim.pos[:, 1], sim.pos[:, 2], s=10, c='cyan', alpha=0.6)
    ax.set_title(f"N-Body Simulation (Frame {frame})")
    ax.set_xlim(-3, 3)
    ax.set_ylim(-3, 3)
    ax.set_zlim(-3, 3)
    ax.set_axis_off()

ani = animation.FuncAnimation(fig, update, frames=50, interval=50)
ani.save('../results/nbody_simulation.gif', writer='pillow', fps=20)
plt.show()
print("Animation saved to results/nbody_simulation.gif")