In [1]:
import numpy as np
import cupy as cp

In [2]:
def get_w_np(x, t):
    xx = np.dot(x.T, x)
    xx_inv = np.linalg.inv(xx)
    xt = np.dot(x.T, t)
    w = np.dot(xx_inv, xt)
    return w

In [3]:
# 一番小さいサイズの行列の準備
N = 10

x = np.random.rand(N, N)
t = np.random.rand(N, 1)
w = get_w_np(x, t)

# 求めた w を表示
print(w)

[[-0.70975636]
 [ 0.55478865]
 [-0.89382007]
 [ 0.88947015]
 [ 0.19210896]
 [-0.93472751]
 [ 0.79495975]
 [-2.29963564]
 [ 2.20434969]
 [ 0.80323226]]


In [4]:
import time

In [5]:
time_start = time.time()

# - - - 処理 - - -
w = get_w_np(x, t)
# - - - - - - - - -

time_end = time.time()

elapsed_time = time_end - time_start  # 経過時間

print('{:.5f} sec'.format(elapsed_time))

0.00000 sec


In [6]:
times_cpu = []  # CPUの計算時間保存用

for N in [10, 100, 1000, 10000]:
    np.random.seed(0)
    x = np.random.rand(N, N)
    t = np.random.rand(N, 1)

    time_start = time.time()

    # - - - 処理 - - -
    w = get_w_np(x, t)
    # - - - - - - - - -

    time_end = time.time()

    elapsed_time = time_end - time_start  # 経過時間

    print('N={:>5}:{:>8.5f} sec'.format(N, elapsed_time))

    times_cpu.append(elapsed_time)

N=   10: 0.00000 sec
N=  100: 0.01300 sec
N= 1000: 0.06000 sec
N=10000:26.81429 sec


In [7]:
def get_w_cp(x, t):
    xx = cp.dot(x.T, x)
    xx_inv = cp.linalg.inv(xx)
    xt = cp.dot(x.T, t)
    w = cp.dot(xx_inv, xt)
    return w

In [8]:
# NumPy を用いた乱数生成
N = 10
x_np = np.random.rand(N, N)
t_np = np.random.rand(N, 1)

In [9]:
# NumPy の ndarray から CuPy の ndarray へ変換
x_cp = cp.asarray(x_np)
t_cp = cp.asarray(t_np)

In [10]:
# NumPy
w_np = get_w_np(x_np, t_np)

# CuPy
w_cp = get_w_cp(x_cp, t_cp)

In [11]:
print('NumPy:\n', w_np)
print('\nCuPy:\n', w_cp)

NumPy:
 [[ 3.10913241]
 [-4.32028319]
 [ 1.09894125]
 [ 1.63321226]
 [ 1.25977854]
 [-0.89789306]
 [-0.87023945]
 [ 1.09654016]
 [ 1.19753311]
 [-1.3647516 ]]

CuPy:
 [[ 3.10913241]
 [-4.32028319]
 [ 1.09894125]
 [ 1.63321226]
 [ 1.25977854]
 [-0.89789306]
 [-0.87023945]
 [ 1.09654016]
 [ 1.19753311]
 [-1.3647516 ]]


In [12]:
times_gpu = []  # GPUの計算時間保存用

for N in [10, 100, 1000, 10000]:
    cp.random.seed(0)
    x = cp.random.rand(N, N)
    t = cp.random.rand(N, 1)

    # GPU 上での処理が終わるのを待機
    cp.cuda.Stream.null.synchronize()

    time_start = time.time()

    # - - - 処理 - - -
    w = get_w_cp(x, t)
    # - - - - - - - - -

    # GPU 上での処理が終わるのを待機
    cp.cuda.Stream.null.synchronize()

    time_end = time.time()

    elapsed_time = time_end - time_start  # 経過時間

    print('N={:>5}:{:>8.5f} sec'.format(N, elapsed_time))

    times_gpu.append(elapsed_time)

N=   10: 0.00099 sec
N=  100: 0.00200 sec
N= 1000: 0.05400 sec
N=10000:22.26084 sec


In [14]:
import tabulate

# N ごとの実行時間の差
N = [10, 100, 1000, 10000]
times_cpu = np.asarray(times_cpu)
times_gpu = np.asarray(times_gpu)
ratio = ['{:.2f} x'.format(r) for r in times_cpu / times_gpu]

# tabulate を用いてテーブルを作成
table = tabulate.tabulate(
    zip(N, times_cpu, times_gpu, ratio),
    headers=['N', 'NumPyでの実行時間 (sec)', 'CuPy での実行時間 (sec)', '高速化倍率'])

print(table)

N    NumPyでの実行時間 (sec)    CuPy での実行時間 (sec)  高速化倍率
-----  -------------------------  -------------------------  ------------
   10                  0                        0.000993729  0.00 x
  100                  0.0129998                0.002002     6.49 x
 1000                  0.0600016                0.0539973    1.11 x
10000                 26.8143                  22.2608       1.20 x
