Notebook to run code examples of [CUDA By Example](https://developer.nvidia.com/cuda-example) book in Google Colab

# Setup

In [1]:
!git clone https://github.com/tpn/cuda-by-example book

fatal: destination path 'book' already exists and is not an empty directory.


In [2]:
!apt install freeglut3-dev xvfb ffmpeg
!pip install pyvirtualdisplay


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
freeglut3-dev is already the newest version (2.8.1-6).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.11).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


In [3]:
!pip install nvcc4jupyter
%load_ext nvcc4jupyter

Detected platform "Colab". Running its setup...
Source files will be saved in "/tmp/tmpm4aqstvr".


In [4]:
import os
import subprocess

import pyvirtualdisplay
import moviepy.editor

# Build and run examples

## Basic console-only examples

In [5]:
!nvcc book/chapter03/enum_gpu.cu
!time ./a.out

   --- General Information for device 0 ---
Name:  Tesla T4
Compute capability:  7.5
Clock rate:  1590000
Device copy overlap:  Enabled
Kernel execution timeout :  Disabled
   --- Memory Information for device 0 ---
Total global mem:  15835660288
Total constant Mem:  65536
Max mem pitch:  2147483647
Texture Alignment:  512
   --- MP Information for device 0 ---
Multiprocessor count:  40
Shared mem per mp:  49152
Registers per mp:  65536
Threads in warp:  32
Max threads per block:  1024
Max thread dimensions:  (1024, 1024, 64)
Max grid dimensions:  (2147483647, 65535, 65535)


real	0m0.064s
user	0m0.001s
sys	0m0.032s


## Examples with OpenGL visual output

In [6]:
!nvcc -lglut -lGLdispatch -lGL book/chapter05/ripple.cu


          char* dummy = "";
                        ^


          char* dummy = "";
                        ^




In [7]:
TIMEOUT=10
W=1024
H=768
RECORDING_PATH="output.mp4"

display = pyvirtualdisplay.Display(visible=False, size=(W, H))
display.start()
print(f"{os.environ['DISPLAY']=}")

if os.path.exists(RECORDING_PATH):
  os.remove(RECORDING_PATH)
ffmpeg_proc = subprocess.Popen(f"ffmpeg -f x11grab -s {W}x{H} -i {os.environ['DISPLAY']} -r 30 -t {TIMEOUT} {RECORDING_PATH}", shell=True)
!timeout {TIMEOUT} ./a.out
ffmpeg_proc.wait()
!ls -lh {RECORDING_PATH}

display.stop()
moviepy.editor.ipython_display(RECORDING_PATH)

os.environ['DISPLAY']=':0'
-rw-r--r-- 1 root root 103K Aug  3 18:50 output.mp4


# Run cuda and book samples inline

In [8]:
%%cuda -c "-I book/common"
#include "book.h"

__global__ void add( int a, int b, int *c ) {
    *c = a + b;
}

int main( void ) {
    int c;
    int *dev_c;
    HANDLE_ERROR( cudaMalloc( (void**)&dev_c, sizeof(int) ) );

    add<<<1,1>>>( 2, 7, dev_c );

    HANDLE_ERROR( cudaMemcpy( &c, dev_c, sizeof(int),
                              cudaMemcpyDeviceToHost ) );
    printf( "2 + 7 = %d\n", c );
    HANDLE_ERROR( cudaFree( dev_c ) );

    return 0;
}

2 + 7 = 9

