In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [2]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-vc4m8im6
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-vc4m8im6
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit aac710a35f52bb78ab34d2e52517237941399eff
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4304 sha256=a13531e6a6fbbca0d8578f715f29a185d8f43b80583d2cb02e215c9d334016bd
  Stored in directory: /tmp/pip-ephem-wheel-cache-ewcgnush/wheels/db/c1/1f/a2bb07bbb4a1ce3c43921252aeafaa6205f08637e292496f04
Successfully built NVCCPlugin
Installing collecte

In [3]:
%load_ext nvcc_plugin

created output directory at /content/src
Out bin /content/result.out


In [5]:
%%cuda --name testGoogleColab.cu

#include <cuda_runtime_api.h>
#include <iostream>

// Define a function that will only be compiled for and called from host
__host__ void HostOnly()
{
    std::cout << "This function may only be called from the host" << std::endl;
}

// Define a function that will only be compiled for and called from device
__device__ void DeviceOnly()
{
    printf("This function may only be called from the device\n");
}

// Define a function that will be compiled for both architectures
__host__ __device__ float SquareAnywhere(float x)
{
    return x * x;
}

// Call device and portable functions from a kernel
__global__ void RunGPU(float x)
{
    DeviceOnly();
    printf("%f\n", SquareAnywhere(x));
}

/*
 Call host and portable functions from a kernel
 Note that, by default, if a function has no architecture
 specified, it is assumed to be __host__ by NVCC.
*/
void RunCPU(float x)
{
    HostOnly();
    std::cout << SquareAnywhere(x) << std::endl;
}

int main()
{
    std::cout << "==== Sample 02 - Host / Device Functions ====\n" << std::endl;
    /*
     Expected output:
     "This function may only be called from the host"
     1764
     "This function may only be called from the device"
     1764.00
    */

    RunCPU(42);
    RunGPU<<<1, 1>>>(42);
    cudaDeviceSynchronize();
    return 0;
}

/*
Exercises:
1) Write a function that prints a message and can run on both the device and host
2) Revise the function from 1, such that the CPU version use std::cout. Use the 
__CUDA_ARCH__ macro to write code paths that contain architecture-specific code.
*/

'File written in /content/src/testGoogleColab.cu'

In [6]:
!nvcc -arch=sm_75 -o "/content/src/testGoogleColab.o" /content/src/testGoogleColab.cu

In [7]:
!chmod 755 /content/src/testGoogleColab.o
!/content/src/testGoogleColab.o

==== Sample 02 - Host / Device Functions ====

This function may only be called from the host
1764
This function may only be called from the device
1764.000000
