In [6]:
# !pip3 install pycuda cupy-cuda11x pynvjpeg line_profiler 

In [4]:
import cupy as cp
import numpy as np
import cv2 

img = cv2.imread('/py/crop/9/3.jpg')

pinned_memory_pool = cp.cuda.PinnedMemoryPool()
cp.cuda.set_pinned_memory_allocator(pinned_memory_pool.malloc)


def _pin_memory(array):
    mem = cp.cuda.alloc_pinned_memory(array.nbytes)
    ret = np.frombuffer(mem, array.dtype, array.size).reshape(array.shape)
    ret[...] = array
    return ret

# SIZE = 1920 * 1080 *3
# x_cpu_src = np.arange(SIZE, dtype=np.uint8)
x_cpu_src = cv2.imread('/py/crop/9/3.jpg')
print(x_cpu_src.shape,x_cpu_src.size)
x_gpu_src = cp.arange(x_cpu_src.size, dtype=np.uint8).reshape(x_cpu_src.shape)

# # synchronous
# stream = cp.cuda.Stream.null
# start = stream.record()
# x_gpu_dst = cp.empty(x_cpu_src.shape, x_cpu_src.dtype)
# x_gpu_dst.set(x_cpu_src)
# x_cpu_dst = x_gpu_src.get()
# end = stream.record()

# print('Synchronous Device to Host / Host to Device (ms)')
# print(cp.cuda.get_elapsed_time(start, end))

# asynchronous
x_gpu_dst = cp.empty(x_cpu_src.shape, x_cpu_src.dtype)
x_cpu_dst = np.empty(x_gpu_src.shape, x_gpu_src.dtype)

x_pinned_cpu_src = _pin_memory(x_cpu_src)
x_pinned_cpu_dst = _pin_memory(x_cpu_dst)

with cp.cuda.stream.Stream() as stream_htod:
    start = stream_htod.record()
    x_gpu_dst.set(x_pinned_cpu_src)
    with cp.cuda.stream.Stream() as stream_dtoh:
        x_gpu_src.get(out=x_pinned_cpu_dst)
        stream_dtoh.synchronize()
    stream_htod.synchronize()
    end = stream_htod.record()

print('Asynchronous Device to Host / Host to Device (ms)')
print(cp.cuda.get_elapsed_time(start, end))

In [5]:
import numpy as np

import cupy as cp
from nvjpeg import NvJpeg

nj = NvJpeg()
img_host = nj.read('/py/crop/9/3.jpg') 

pinned_memory_pool = cp.cuda.PinnedMemoryPool()
cp.cuda.set_pinned_memory_allocator(pinned_memory_pool.malloc)

def _pin_memory(array):
    device_mem = cp.cuda.alloc_pinned_memory(array.nbytes)
    ret = np.frombuffer(device_mem, array.dtype, array.size).reshape(array.shape)
    ret[...] = array
    return ret

img_device = cp.empty(img_host.shape, img_host.dtype)
img_pinned_host = _pin_memory(img_host)

with cp.cuda.stream.Stream() as stream_htod:
    start = stream_htod.record()
    img_device.set(img_pinned_host)
    stream_htod.synchronize()
    end = stream_htod.record()
print('Asynchronous Host to Device (ms)')
print(cp.cuda.get_elapsed_time(start, end))


Asynchronous Host to Device (ms)
0.09881599992513657


In [8]:
# import cv2
# from nvjpeg import NvJpeg
# from line_profiler import LineProfiler

# nj = NvJpeg()
# profile = LineProfiler()

# @profile
# def inner_nj():
#     img = nj.read('/py/crop/9/3.jpg')
#     nj_jpg = nj.encode(img)
    
# @profile
# def inner_cv2():
#     img = cv2.imread('/py/crop/9/3.jpg')
#     cv2_jpg = cv2.imencode('.jpg',img)[1]

# for _ in range(1000):
#     inner_nj()
#     inner_cv2()
# profile.print_stats()

In [9]:
!nvidia-smi

Tue Sep 27 22:18:49 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 470.63.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:09:00.0 Off |                  N/A |
| 31%   32C    P2    54W / 300W |    380MiB / 11016MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [10]:
!ls /usr/local/

bin   cuda-11	 cuda-11.4  games    lib  mpi  sbin   src
cuda  cuda-11.3  etc	    include  man  nvm  share  ucx
