In [1]:
import pycuda
import pycuda.driver as cuda
import pycuda.autoinit

from pycuda.compiler import SourceModule

import numpy as np
import time

print(pycuda.VERSION)

(2019, 1, 1)


In [2]:
# Create a tensor and copy it to gpu memory
# Initialize a zero vector
a = np.zeros(shape=(16,16))
# Cast it to float32 type
a = a.astype(np.float32)
# Allocate memory at device
a_gpu = cuda.mem_alloc(a.nbytes)
# Copy the cpu vector to gpu
cuda.memcpy_htod(a_gpu, a)

In [3]:
# mod = SourceModule("""
#     __global__ void cudaTest(float *a, float *b, float *c)
#     {
#         // For x axis
#         int idx = blockIdx.x * blockDim.x + threadIdx.x;
#         // For y axis
#         idx += (blockDim.y * gridDim.x) * (blockDim.y * blockIdx.y + threadIdx.y);
#         // Do your own operations
#         a[idx] += gridDim.x;
#         b[idx] += gridDim.y;
#         c[idx] += threadIdx.z;
#     }
# """)
mod = SourceModule("""
    __global__ void cudaTest2D(float *a)
    {
        int blockId = blockIdx.x + blockIdx.y * gridDim.x;
        int idx = blockId * (blockDim.x * blockDim.y * blockDim.z)
        + (threadIdx.z * (blockDim.x * blockDim.y))
        + (threadIdx.y * blockDim.x) + threadIdx.x;
        // Do your own operations
        a[idx] += 1;
    }
""")

In [4]:
# Let's compare operating time
# GPU
startTime = time.time()
func = mod.get_function("cudaTest2D")
func(a_gpu, b_gpu, c_gpu, block=(8,2,1), grid =(2,8))
consumedTime = time.time() - startTime
print("Time for gpu operation : ", consumedTime)

Time for gpu operation :  0.0005726814270019531


In [5]:
# CPU
startTime = time.time()
aNp = a + 1
consumedTime = time.time() - startTime
print("Time for cpu operation : ", consumedTime)

Time for cpu operation :  0.0002925395965576172


In [6]:
# Copy the result from device to host
a_result = np.empty_like(a)
cuda.memcpy_dtoh(a_result, a_gpu)
a_gpu.free()

In [7]:
print(a_result.astype(np.int))

[[0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]]
