___
<a href='https://cafe.naver.com/jmhonglab'><p style="text-align:center;"><img src='https://lh3.googleusercontent.com/lY3ySXooSmwsq5r-mRi7uiypbo0Vez6pmNoQxMFhl9fmZJkRHu5lO2vo7se_0YOzgmDyJif9fi4_z0o3ZFdwd8NVSWG6Ea80uWaf3pOHpR4GHGDV7kaFeuHR3yAjIJjDgfXMxsvw=w2400'  class="center" width="50%" height="50%"/></p></a>
___
<center><em>Content Copyright by HongLab, Inc.</em></center>

# GPU 가속 소개

CPU 메모리 -> GPU 메모리 -> GPU로 연산 -> CPU 메모리

### [CuPy](https://cupy.dev/)

NVidia 그래픽스 카드 필요

[CudaToolkit 설치](https://developer.nvidia.com/cuda-11.3.0-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exe_local) 후 버전을 맞춰서 설치  (여기서는 11.3)  
```conda install -c conda-forge cupy cudatoolkit=11.3```



In [1]:
import time


class Timer:
    def __init__(self):
        self.start = time.perf_counter()

    def __enter__(self):
        return self  # as로 사용할 수 있도록 self 반환

    def __exit__(self, *args):
        print("Elapsed time = ", time.perf_counter() - self.start)

In [2]:
import numpy as np

np.random.seed(0)

# 데이터 만드는 시간은 제외
data_np = np.random.rand(100_000_000)

print(data_np)
print(data_np.dtype)

with Timer():

    for r in range(5):
        data_np = np.sqrt(data_np)
        data_np = np.exp(data_np)
        data_np = np.power(data_np, 0.1)
    result = data_np.sum()

print(result)

# Elapsed time =  14.9029647000134
# 111116795.31966303

[0.5488135  0.71518937 0.60276338 ... 0.08870192 0.45256207 0.17769845]
float64
Elapsed time =  15.251978100044653
111116795.31966303


In [3]:
# Restart 후 측정
import cupy as cp
import numpy as np

np.random.seed(0)
data_np = np.random.rand(100_000_000)

# 데이터 만드는 시간은 제외
data_cp = cp.asarray(data_np)

print(type(data_cp))
print(data_cp.device)
print(data_cp.dtype)
print(data_cp.shape)
print(data_cp)

with Timer():

    for r in range(5):
        data_cp = cp.sqrt(data_cp)
        data_cp = cp.exp(data_cp)
        data_cp = cp.power(data_cp, 0.1)
    result = data_cp.sum()

print(result)

# Elapsed time =  0.36881319992244244
# 111116795.31966344

<class 'cupy._core.core.ndarray'>
<CUDA Device 0>
float64
(100000000,)
[0.5488135  0.71518937 0.60276338 ... 0.08870192 0.45256207 0.17769845]
Elapsed time =  0.3724399001803249
111116795.31966344


### [PyTorch](https://pytorch.org/get-started/locally/)

CUDA Toolkit과 cuDNN을 설치  
NVidia GPU가 없어도 CPU로 사용 가능


In [4]:
import torch
import numpy as np

np.random.seed(0)
data_np = np.random.rand(100_000_000)

data_torch = torch.from_numpy(data_np)

print(data_torch.device)

data_cuda = data_torch.cuda()

print(data_cuda.device)

with Timer():
    for r in range(5):
        data_cuda = torch.sqrt(data_cuda)
        data_cuda = torch.exp(data_cuda)
        data_cuda = torch.pow(data_cuda, 0.1)
    result = data_cuda.sum()

print(result.cpu().numpy())

# Elapsed time =  1.0253177001141012
# 111116795.31966342

cpu
cuda:0
Elapsed time =  1.0253177001141012
111116795.31966342
