In [1]:
!nvidia-smi

Thu May  1 15:27:09 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 Ti     Off | 00000000:02:00.0  On |                  N/A |
|  0%   46C    P5              18W / 200W |   1235MiB /  8192MiB |     29%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [5]:
import numpy as np
import cupy as cp
import time

In [6]:
def numpy_test(n):
    a = np.random.rand(n,n)
    b = np.random.rand(n,n)
    result1 = np.matmul(a,b)

def cupy_test(n):
    a = cp.random.rand(n,n)
    b = cp.random.rand(n,n)
    result1 = cp.matmul(a,b)

In [7]:
stime = time.time()
numpy_test(100)
print(time.time()-stime)

stime = time.time()
cupy_test(100)
print(time.time()-stime)

0.03816032409667969
0.41429758071899414


# 교수님이랑 수업에소~~

## 1. 구냥 for문

In [34]:
## 99 페이지
# 이건 구냥 for문 돌릴때 !
import time
stime = time.time()
x =   np.random.normal(0,1,1000000)
sum = 0
for i in range (len(x)):
    sum += x[i]*x[i]
print(time.time()-stime)
sum

0.20935368537902832


1000705.6947982503

## 2. cupy - for 문

In [37]:
# 쿠파이로 하는 방법~
stime = time.time()
x_gpu = cp.asarray(x)
sum = 0
for i in range(len(x_gpu)):
    sum += x_gpu[i]*x_gpu[i]
sum

print(time.time()-stime)

14.363718271255493


## 3. cupy - x.T * x

In [None]:
# 쿠파이로 하는 방법~
stime = time.time()
x_gpu = cp.asarray(x)
cp.dot(x_gpu.T, x_gpu)
# for i in range(len(x_gpu)):
    # sum += x_gpu[i]*x_gpu[i]

print(time.time()-stime)


0.003134489059448242


## 4. numpy - x.T * x

In [None]:
# 쿠파이로 하는 방법~
stime = time.time()

np.dot(x.T, x)
# for i in range(len(x_gpu)):
    # sum += x_gpu[i]*x_gpu[i]
sum
print(time.time()-stime)

0.007008790969848633


In [8]:
stime = time.time()
numpy_test(1000)
print(time.time()-stime)

stime = time.time()
cupy_test(1000)
print(time.time()-stime)

0.0206301212310791
0.0017046928405761719


In [9]:
stime = time.time()
numpy_test(10000)
print(time.time()-stime)

stime = time.time()
cupy_test(10000)
print(time.time()-stime)

5.790710210800171
0.0010285377502441406


In [10]:
stime = time.time()
x = np.random.normal(0,1,1000)
summ = 0.
for i in range(x.shape[0]):
    summ += x[i]
etime = time.time() - stime
print(summ, etime)

32.22710801550753 0.0052487850189208984


In [11]:
stime = time.time()
x = cp.random.normal(0,1,1000)
summ = 0.
for i in range(x.shape[0]):
    summ += x[i]
etime = time.time() - stime
print(summ, etime)


-0.20879929846965112 0.2813432216644287


In [12]:
squared_diff = cp.ElementwiseKernel(
...    'float32 x, float32 y',
...    'float32 z',
...    'z = (x - y) * (x - y)',
...    'squared_diff')

In [13]:
xx = cp.array([[0., 1., 2., 3., 4.],
              [5., 6., 7., 8., 9.]]).astype('float32')

In [14]:
yy = cp.array([0., 1., 2., 3., 4.]).astype('float32')

In [15]:
zz = squared_diff(xx, yy)
zz

array([[ 0.,  0.,  0.,  0.,  0.],
       [25., 25., 25., 25., 25.]], dtype=float32)

In [16]:
xx = cp.array([0., 1., 2., 3., 4.]).astype('float32')
yy = cp.array([0., 1., 2., 3., 4.]).astype('float32')

In [17]:
add_reverse = cp.ElementwiseKernel(
     'float32 x, raw float32 y',
     'float32 z',
     'z = x + y[_ind.size() - i - 1]',
     'add_reverse')

In [18]:
zz = add_reverse(xx, yy)
zz

array([4., 4., 4., 4., 4.], dtype=float32)

In [19]:
l2norm_kernel = cp.ReductionKernel(
    'T x',  # input params
    'T y',  # output params
    'x * x',  # map
    'a + b',  # reduce
    'y = sqrt(a)',  # post-reduction map
    '0',  # identity value   <- 뭔지 모르겠음
    'l2norm'  # kernel name
)
x = cp.arange(10, dtype=np.float32)
l2norm_kernel(x)

array(16.881943, dtype=float32)

In [20]:
x

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)

In [21]:
x_cpu = x.get()

In [20]:
x_cpu

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)

In [21]:
np.sqrt(np.sum(x_cpu**2))

16.881943

In [22]:
from numpy import linalg as la
la.norm(x_cpu)

16.881943

In [None]:
@cp.fuse()
def squared_diff(x, y):
    return (x - y) * (x - y)

In [None]:
x_cp = cp.arange(10)
y_cp = cp.arange(10)[::-1]
squared_diff(x_cp, y_cp)



array([81, 49, 25,  9,  1,  1,  9, 25, 49, 81])

In [None]:
x_cp, y_cp

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))

In [None]:
@cp.fuse()
def l2norm(x):
    return cp.sqrt(cp.sum(x * x))
l2norm(x)

array(16.881943, dtype=float32)