<a href="https://colab.research.google.com/github/kosei-s/MachineLearningPractice/blob/master/chainer_tutorial_note5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 10章

In [0]:
import numpy as np
import cupy as cp

In [0]:
def get_w_np(x, t):
  return np.linalg.inv(x.T.dot(x)).dot(x.T).dot(t)

In [0]:
N = 10

x = np.random.rand(N, N)
t = np.random.rand(N, 1)
w = get_w_np(x, t)

print(w)

[[-1.38109302]
 [-0.44175243]
 [-0.77751913]
 [ 2.00281533]
 [-2.13349892]
 [-1.45528867]
 [ 0.7220475 ]
 [ 0.72234294]
 [ 1.55890271]
 [ 1.94650841]]


In [0]:
import time

In [0]:
time_start = time.time()

w = get_w_np(x, t)

time_end = time.time()

elapsed_time = time_end - time_start

print('{:.5f} sec'.format(elapsed_time))

0.00326 sec


In [0]:
times_cpu = []

for N in [10, 100, 1000, 10000]:
  np.random.seed(0)
  x = np.random.rand(N, N)
  t = np.random.rand(N, 1)
  
  time_start = time.time()
  
  w = get_w_np(x, t)
  
  time_end = time.time()
  
  elapsed_time = time_end - time_start
  
  print('N={:<5}:{:>8.5f} sec'.format(N, elapsed_time))
  
  times_cpu.append(elapsed_time)

N=10   : 0.00414 sec
N=100  : 0.00946 sec
N=1000 : 0.15164 sec
N=10000:142.21193 sec


In [0]:
def get_w_cp(x, t):
  return cp.linalg.inv(x.T.dot(x)).dot(x.T).dot(t)

In [0]:
N = 10
x_np = np.random.rand(N, N)
t_np = np.random.rand(N, 1)

In [0]:
x_cp = cp.asarray(x_np)
t_cp = cp.asarray(t_np)

In [0]:
w_np = get_w_np(x_np, t_np)
w_cp = get_w_cp(x_cp, t_cp)

In [0]:
print('NumPy:\n', w_np)
print('\nCuPy:\n', w_cp)

NumPy:
 [[ 3.10913241]
 [-4.32028319]
 [ 1.09894125]
 [ 1.63321226]
 [ 1.25977854]
 [-0.89789306]
 [-0.87023945]
 [ 1.09654016]
 [ 1.19753311]
 [-1.3647516 ]]

CuPy:
 [[ 3.10913241]
 [-4.32028319]
 [ 1.09894125]
 [ 1.63321226]
 [ 1.25977854]
 [-0.89789306]
 [-0.87023945]
 [ 1.09654016]
 [ 1.19753311]
 [-1.3647516 ]]


In [0]:
times_gpu = []

for N in [10, 100, 1000, 10000]:
  cp.random.seed(0)
  x = cp.random.rand(N, N)
  t = cp.random.rand(N, 1)
  
  cp.cuda.Stream.null.synchronize()
  
  time_start = time.time()
  
  w = get_w_cp(x, t)
  
  cp.cuda.Stream.null.synchronize()
  
  time_end = time.time()
  
  elapsed_time = time_end - time_start
  
  print('N={:<5}:{:>8.5f} sec'.format(N, elapsed_time))
  
  times_gpu.append(elapsed_time)

N=10   : 0.00078 sec
N=100  : 0.00312 sec
N=1000 : 0.11783 sec
N=10000:27.31598 sec


In [0]:
import tabulate

N = [10, 100, 1000, 10000]
times_cpu = np.asarray(times_cpu)
times_gpu = np.asarray(times_gpu)
ratio = ['{:.2f} x'.format(r) for r in times_cpu / times_gpu]

table = tabulate.tabulate(
    zip(N, times_cpu, times_gpu, ratio),
    headers = ['N', 'NumPyでの実行時間 (sec)', 'CuPy での実行時間 (sec)', '高速化倍率']
)

print(table)

    N    NumPyでの実行時間 (sec)    CuPy での実行時間 (sec)  高速化倍率
-----  -------------------------  -------------------------  ------------
   10                 0.00414228                0.000777483  5.33 x
  100                 0.00946474                0.00312018   3.03 x
 1000                 0.151636                  0.117833     1.29 x
10000               142.212                    27.316        5.21 x
