In [1]:
# Write CUDA C++ style code
code = r'''
#include <iostream>
#include <cuda_runtime.h>

__global__ void helloFromGPU() {
    printf("Hello, World from GPU!");
}

int main() {
    std::cout << "Hello, World from CPU!" << std::endl;

    // Launch the GPU kernel
    helloFromGPU<<<1, 1>>>();

    // Error checking
    cudaError_t errSync  = cudaGetLastError();
    cudaError_t errAsync = cudaDeviceSynchronize();

    if (errSync != cudaSuccess)
        std::cerr << "Sync kernel error: " << cudaGetErrorString(errSync) << std::endl;
    if (errAsync != cudaSuccess)
        std::cerr << "Async kernel error: " << cudaGetErrorString(errAsync) << std::endl;

    return 0;
}
'''

# Save to file
with open("hello_cpp.cu", "w") as f:
    f.write(code)

# Compile using nvcc with C++11 support (and correct GPU arch)
!nvcc -arch=sm_75 -std=c++11 hello_cpp.cu -o hello_cpp

# Run it
!./hello_cpp

Hello, World from CPU!
Hello, World from GPU!