In [1]:
%load_ext Cython

In [2]:
from numba import jit
@jit
def Num_NpDot(a, b):
    return np.dot(a, b)
@jit    
def Num_Dot(a, b):
    c = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            for k in range(a.shape[1]):
                c[i][j] += a[i][k] * b[k][j]
    return c

In [25]:
%%cython -a

import numpy as np
cimport numpy as cnp
cimport cython
from cython.view cimport array

@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False)  # turn off negative index wrapping for entire function

cpdef cnp.ndarray Cy_NpDot(cnp.ndarray a, cnp.ndarray b):
    return np.dot(a, b)

cpdef cnp.ndarray Cy_Dot(cnp.ndarray a, cnp.ndarray b):
    cdef cnp.ndarray c = np.zeros((a.shape[0], b.shape[0]),dtype=np.float32)
    cdef int i,j,k
    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            for k in range(a.shape[1]):
                c[i, j] += a[i, k] * b[k, j]
                #c[i][j] += a.item(i,k) * b.item(k,j)
    return c

cpdef array  Cy_Naive(double[:,:] a, double[:,:] b):

    c = array(shape=(a.shape[0], b.shape[0]), itemsize=sizeof(double), format='d')

    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            for k in range(a.shape[1]):
                c[i][j] += a[i][k] * b[k][j]
    return c

In [5]:
N = 100 # 下の表のNと一致
# Generate NxN randomized matrix
ma = np.random.rand(N, N).astype(np.float32)
mb = np.random.rand(N, N).astype(np.float32)

In [22]:
%timeit Cy_NpDot(ma,mb)

120 µs ± 3.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [26]:
%timeit Cy_Dot(ma,mb)

5.92 s ± 864 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
ma = np.random.rand(N, N).astype(np.double)
mb = np.random.rand(N, N).astype(np.double)
%timeit Cy_Naive(ma,mb)

4.89 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
%timeit Num_NpDot(ma,mb)

114 µs ± 4.27 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [30]:
%timeit Num_Dot(ma,mb)

3.93 ms ± 64.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
