In [15]:
import torch
import time
import plotly.graph_objs as go
import plotly.io as pio
import tqdm

device = torch.device("cuda")  # Use GPU
sizes = [x * 5000 for x in range(1, 500)]  # Vectors of increasing sizes to test
num_iterations = 10  # Number of times to repeat each test

In [16]:
avg_times = []
bandwidths = []

for _ in range(10 * num_iterations):
    x = torch.randn(sizes[-1]).to(device)
    torch.relu(x)
torch.cuda.synchronize()

for size in tqdm.notebook.tqdm(sizes):
    x = torch.randn(size).to(
        device
    )  # Generate random tensor of size 'size' and move to GPU
    total_time = 0.0

    # warmup
    for i in range(num_iterations):
        torch.relu(x)
    torch.cuda.synchronize()

    for i in range(num_iterations):
        start_time = time.time_ns()  # Start timer
        torch.relu(x)
        torch.cuda.synchronize()
        end_time = time.time_ns()  # End timer
        total_time += end_time - start_time

    avg_time = total_time / num_iterations
    avg_times.append(avg_time / 1000)

    bandwidth = size * 4 / avg_time
    bandwidths.append(bandwidth)

  0%|          | 0/499 [00:00<?, ?it/s]

In [17]:
# Save results in a CSV file
with open("relu_performance.csv", mode="w") as file:
    file.write("Size,Avg Time (ms),Bandwidth GB/s\n")
    for i in range(len(sizes)):
        file.write(f"{sizes[i]},{avg_times[i]},{bandwidths[i]}\n")

In [18]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=sizes, y=avg_times, mode="lines", name="Avg Time (ms)"))
fig.add_trace(
    go.Scatter(x=sizes, y=bandwidths, mode="lines", name="Bandwidth GB/s", yaxis="y2")
)

fig.update_layout(
    title="ReLU performance on GPU",
    xaxis_title="Number of fp32 entries",
    yaxis_title="Avg Time (ms)",
    yaxis2=dict(title="Bandwidth GB/s", showgrid=False, overlaying="y", side="right"),
    legend=dict(x=0.05, y=0.95),
)

pio.write_image(fig, file="relu_performance.png", format="png")
fig.show()