Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmarks: Add Benchmark - Add the source code of rocm kernel launch overhead benchmark. #136

Merged
merged 6 commits into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,57 @@
# Licensed under the MIT License.

cmake_minimum_required(VERSION 3.18)
project(kernel_launch_overhead LANGUAGES CUDA CXX)

include(../cuda_common.cmake)
project(kernel_launch_overhead LANGUAGES CXX)
cp5555 marked this conversation as resolved.
Show resolved Hide resolved

find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
# Cuda environment
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)

include(../cuda_common.cmake)

add_executable(kernel_launch_overhead cuda_kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
# ROCm environment
# Set ROCM_PATH
if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH /opt/rocm)
else()
set(ROCM_PATH $ENV{ROCM_PATH})
endif()
# Set HIP_PATH
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH ${ROCM_PATH}/hip)
else()
set(HIP_PATH $ENV{HIP_PATH})
endif()
# Check if ROCm environment
if(EXISTS ${HIP_PATH})
# Search for hip in common locations
list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} ${ROCM_PATH})
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "${HIP_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
guoshzhao marked this conversation as resolved.
Show resolved Hide resolved

# Find hip
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})

# Add HIP targets
set_source_files_properties(rocm_kernel_launch.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead rocm_kernel_launch.cc)
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No HIP found. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.")
endif()
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()


add_executable(kernel_launch_overhead cuda_kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

// Kernel launch benchmark which will launch one empty kernel and record the cost in event mode and wall mode.
// event mode: using hip event to record the elapsed time of kernel launch on device.
// wall mode: using host timer to record the elapsed time kernel launch on both host and device.

#include <algorithm>
#include <chrono>
#include <stdio.h>
#include <string>
#include <sys/time.h>
#include <thread>

#include "hip/hip_runtime.h"

__global__ void EmptyKernel() {}

double test_rocm_kernel_launch_event_time(int num_warmups, int num_steps) {
float time = 0.f;
double total_time = 0.0;

hipEvent_t start, stop;
hipEventCreate(&start);
hipEventCreate(&stop);

for (int i = 0; i < num_warmups; i++) {
hipEventRecord(start, 0);
EmptyKernel<<<1, 1>>>();
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
}

for (int i = 0; i < num_steps; i++) {
hipEventRecord(start, 0);
EmptyKernel<<<1, 1>>>();
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&time, start, stop);
total_time += time;
}

hipEventDestroy(start);
hipEventDestroy(stop);

return total_time;
}

double test_rocm_kernel_launch_wall_time(int num_warmups, int num_steps) {
double total_time = 0.0;

for (int i = 0; i < num_warmups; i++) {
EmptyKernel<<<1, 1>>>();
hipDeviceSynchronize();
}

struct timeval begin_tv, end_tv;
for (int i = 0; i < num_steps; i++) {
gettimeofday(&begin_tv, NULL);
EmptyKernel<<<1, 1>>>();
hipDeviceSynchronize();
gettimeofday(&end_tv, NULL);
total_time += (((end_tv.tv_sec) * 1000 + (end_tv.tv_usec) / 1000) -
((begin_tv.tv_sec) * 1000 + (begin_tv.tv_usec) / 1000));
}

return total_time;
}

char *getCmdOption(char **begin, char **end, const std::string &option) {
char **itr = std::find(begin, end, option);
if (itr != end && ++itr != end) {
return *itr;
}
return 0;
}

int main(int argc, char *argv[]) {
int num_warmups = 100;
int num_steps = 2000000;
int interval = 2000;

if (char *value = getCmdOption(argv, argv + argc, "-w")) {
num_warmups = std::stoi(value);
}

if (char *value = getCmdOption(argv, argv + argc, "-n")) {
num_steps = std::stoi(value);
}

if (char *value = getCmdOption(argv, argv + argc, "-i")) {
interval = std::stoi(value);
}

// Test the kernel launch event time.
double event_total_time = test_rocm_kernel_launch_event_time(num_warmups, num_steps);
printf("Kernel launch overhead - event time: %3.5f ms \n", event_total_time / num_steps);

// Sleep for interval milliseconds and run the next test.
std::this_thread::sleep_for(std::chrono::milliseconds(interval));

// Test the kernel launch wall time.
double wall_total_time = test_rocm_kernel_launch_wall_time(num_warmups, num_steps);
printf("Kernel launch overhead - wall time: %3.5f ms \n", wall_total_time / num_steps);

return 0;
}