In [None]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np
import time
import matplotlib.pyplot as plt

In [None]:
# Kernel for left rotation
kernel_code = """
__global__ void left_rotate(int *input, int *output, int n) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;

    if (idx < n) {
        // Calculate the new index after left rotation
        int new_idx = (idx + n - 1) % n;
        output[new_idx] = input[idx];
    }
}
"""

In [None]:
# Compile the kernel
mod = SourceModule(kernel_code)
left_rotate = mod.get_function("left_rotate")

# Input array
input_array = np.random.randint(0, 100, size=1024, dtype=np.int32)
array_size = input_array.size

# Allocate memory for input and output arrays on the GPU
input_gpu = cuda.mem_alloc(input_array.nbytes)
output_gpu = cuda.mem_alloc(input_array.nbytes)

# Measure data transfer time (Host to Device)
start_transfer_htod = time.time()
cuda.memcpy_htod(input_gpu, input_array)
end_transfer_htod = time.time()
htod_transfer_time = end_transfer_htod - start_transfer_htod

# Measure kernel execution time
start_kernel = time.time()
left_rotate(input_gpu, output_gpu, np.int32(array_size), block=(array_size, 1, 1), grid=(1, 1))
end_kernel = time.time()
kernel_execution_time = end_kernel - start_kernel

# Measure data transfer time (Device to Host)
start_transfer_dtoh = time.time()
cuda.memcpy_dtoh(output_array, output_gpu)
end_transfer_dtoh = time.time()
dtoh_transfer_time = end_transfer_dtoh - start_transfer_dtoh

# Total data transfer time
gpu_data_transfer_time = htod_transfer_time + dtoh_transfer_time

# Total GPU execution time
gpu_execution_time = gpu_data_transfer_time + kernel_execution_time

# Sequential CPU implementation
start_cpu = time.time()
cpu_output = np.roll(input_array, -1)
end_cpu = time.time()
cpu_execution_time = end_cpu - start_cpu

In [None]:
# Save plots
# Graph 1: Execution Time Comparison
plt.figure(figsize=(8, 6))
plt.bar(["CPU", "GPU"], [cpu_execution_time, gpu_execution_time], color=["blue", "orange"])
plt.ylabel("Execution Time (seconds)")
plt.title("Execution Time Comparison: CPU vs GPU")
plt.savefig("execution_time_comparison.png")

In [None]:
# Graph 2: Overhead Breakdown for GPU
plt.figure(figsize=(8, 6))
plt.bar(
    ["Data Transfer", "Kernel Execution"],
    [gpu_data_transfer_time, gpu_kernel_execution_time],
    color=["green", "red"]
)
plt.ylabel("Time (seconds)")
plt.title("GPU Execution Time Breakdown")
plt.savefig("gpu_execution_time_breakdown.png")

In [None]:
# Graph 3: Relative Speedup
relative_speedup = cpu_execution_time / gpu_execution_time
plt.figure(figsize=(8, 6))
plt.bar(["Relative Speedup"], [relative_speedup], color="purple")
plt.ylabel("Speedup Factor (CPU/GPU)")
plt.title("Relative Speedup (GPU vs CPU)")
plt.axhline(y=1, color="black", linestyle="--", label="Equal Performance")
plt.legend()
plt.savefig("relative_speedup.png")

In [None]:
# Graph 4: Performance Overhead
plt.figure(figsize=(8, 6))
labels = ["Data Transfer", "Kernel Execution"]
sizes = [gpu_data_transfer_time, gpu_kernel_execution_time]
colors = ["green", "red"]
plt.pie(sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle=140)
plt.title("GPU Performance Overhead")
plt.savefig("gpu_performance_overhead.png")

In [None]:
# Print results
print("Input Array:", input_array)
print("Output Array (Left Rotated) [GPU]:", output_array)
print("GPU Execution Time:", gpu_execution_time, "seconds")
print("Output Array (Left Rotated) [CPU]:", cpu_output)
print("CPU Execution Time:", cpu_execution_time, "seconds")
print("Relative Speedup:", relative_speedup)
if np.array_equal(output_array, cpu_output):
    print("Validation Passed: GPU and CPU results match.")
else:
    print("Validation Failed: GPU and CPU results do not match.")