# cuda-base

## 编译简化demo

In [36]:
import os 
class CudaAutoBuild(object):
    def __init__(self, root_dir = "/tmp/cuda-base"):
        self.root_dir = root_dir 
        assert root_dir.startswith("/tmp")
        os.makedirs(self.root_dir, exist_ok=True)
        os.system(f"rm -rf {os.path.join(self.root_dir, '*')}")


    def __enter__(self):
        return self
    
    def __exit__(self, *args, **kws):
        pass 
    
    def add_file(self, filename, filestr):
        with open(os.path.join(self.root_dir, filename), "w") as f:
            f.write(f"{filestr.strip()}\n\n")

    def build(self, cmd=None):
        if cmd is None:
            cmd = "nvcc hello.cu --generate-code arch=compute_50,code=sm_50 -o hello"
        os.system(f"cd {self.root_dir} && {cmd}")
    
    def exec(self, cmd=None):
        if cmd is None:
            cmd = "hello"
        os.system(f"cd {self.root_dir} && chmod +x {cmd} && ./{cmd}")

In [37]:
with CudaAutoBuild() as cab:
    cab.add_file("hello.cu", r"""
#include <stdio.h>

__global__ void print_HelloWorld(void) {
    printf("Hello World! from thread [%d, %d] From device.\n", threadIdx.x, blockIdx.x);
}

int main() {
    printf("Hello World from host!\n");
    print_HelloWorld<<<1, 1>>>();

    cudaDeviceSynchronize();
    return 0;
}
    """)
    cab.build()
    cab.exec()



Hello World from host!
Hello World! from thread [0, 0] From device.


## e01-HelloWorld

In [21]:
# create hello-world file
custr_HelloWorld = r"""
#include <stdio.h>

__global__ void print_HelloWorld(void) {
    printf("Hello World! from thread [%d, %d] From device.\n", threadIdx.x, blockIdx.x);
}

int main() {
    printf("Hello World from host!\n");
    print_HelloWorld<<<1, 1>>>();

    cudaDeviceSynchronize();
    return 0;
}
    """

!rm -rf build/*
!mkdir build
with open("build/hello-world.cu", "w") as f:
    f.write(custr_HelloWorld)


mkdir: cannot create directory ‘build’: File exists


In [23]:
# build
!nvcc build/hello-world.cu \
    --generate-code arch=compute_50,code=sm_50 \
    -o build/hello-world


In [24]:
# exec
!chmod +x build/hello-world 
!build/hello-world

Hello World from host!
Hello World! from thread [0, 0] From device.


## e02-add

In [44]:
with CudaAutoBuild() as cab:
    cab.add_file("hello.cu", r"""
#include <stdio.h>


// Definition of kernel functin to add two variable
__global__ void gpu_add(int d_a, int d_b, int *d_c) {
    *d_c = d_a + d_b;
}

// main function
int main() {
    // Defining host variable to store answer
    int h_a = 125, h_b = 236;
    int h_c;

    // Defining device pointer
    int *d_c;

    // Allocating memory for device pointer
    cudaMalloc((void**)&d_c, sizeof(int));

    // Kernal call
    gpu_add<<<1, 1>>>(h_a, h_b, d_c);

    // Copy result from device memory to host memory
    cudaMemcpy(&h_c, d_c, sizeof(int), cudaMemcpyDeviceToHost);

    printf("%d + %d = %d\n", h_a, h_b, h_c);

    // Free up memory
    cudaFree(d_c);

}
    """)
    cab.build()
    cab.exec()

125 + 236 = 361


## device count

In [46]:
with CudaAutoBuild() as cab:
    cab.add_file("hello.cu", r"""
#include <stdio.h>

// main function
int main() {
    
    int deviceCount = -1;
    cudaGetDeviceCount(&deviceCount);
    for (int device = 0; device < deviceCount; ++device) {
        cudaDeviceProp deviceProp;
        cudaGetDeviceProperties(&deviceProp, device);
        printf("Device %d has compute capability %d.%d. \n",
            device, deviceProp.major, deviceProp.minor
        );
    }
    return 0;    

}
    """)
    cab.build()
    cab.exec()

Device 0 has compute capability 5.0.
