In [1]:
import numpy as np

In [2]:
def create_data_file_from_list(lst, out_filename, dtype, shape):
    """Write a list in a binary file as a numpy array.
    Args:
        lst: The list that will be written in the file.
        out_filename: The name of the binary file. It must be in the same
            directory.
        dtype: The type of the numpy array.
        shape: The shape of the numpy array.
    """
    with open(out_filename, 'wb+') as out_file:
        out_file = open(out_filename, 'wb+')
        dat_file = np.memmap(out_file, dtype=dtype, shape=shape)
        dat_file[:] = lst[:]
        dat_file.flush()
        size = float(dat_file.nbytes) / (1024 ** 2)
        print('written %s : %.3f MB' % (out_filename, size))


def load_data(file_path, dtype='int32', shape=None):
    '''
    Loads a numpy array in memory from the filesystem.
    '''
    return np.memmap(file_path, dtype=dtype, shape=shape)

In [3]:
n = 4459542
p = 156
m = load_data('./dataset/mrh_ddea/features.dat', dtype='uint8').reshape(n, p)

In [4]:
m.shape#

(4459542, 156)

In [5]:
create_data_file_from_list(m.T, './dataset/test/mt.dat', 'uint8', (p, n))

written ./dataset/test/mt.dat : 663.460 MB


In [66]:
!c++ -Wall -std=c++11 -O3 ./src/test/gemm.cpp ./src/cpp/Array.cpp -o ./bin/gemm

    float d[p];
[0;1;32m          ^


In [67]:
!time ./bin/gemm

Loading data file : ./dataset/test/mt.dat
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

real	0m3.710s
user	0m3.653s
sys	0m0.047s


In [65]:
n * 40 * 40 / (3.6 * 1000000000)

1.9820186666666666

In [6]:
Xt = load_data('./dataset/test/mt.dat', 'uint8', (p, n))

In [7]:
Xt[1, :].max()

13

In [15]:
#%%timeit
a = (Xt[1, :] == 1).astype('float')

In [17]:
%%timeit
np.dot(a, a)

1.81 ms ± 37.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [19]:
1800*(1800 * 0.012)

38880.0

In [22]:
mi = Xt.max(axis=1)
P = np.zeros((mi.max(), n), 'float')
Y = np.random.rand(n)
mi

array([  7,  13,  20,  11,  20,  12,  16,  20,  20,  13,  20,  11,  15,
        16,  14,  20,  20,   8,   8,  20,  13,  17,  19,  10,   6,   6,
         1,   6,   6,  44,   6,   2,   5,   1,   2,  10,   2,   3,   1,
         1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1,   2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   5,
        15,  24,  27,  33,   5,  30, 130,   4,  50,  46,  26,  21,  33,
        50,  15,  29,  12,  10,  15,  13,  14,   2,   1,   1,   1,   1,
         1,   1,   1,   1,   1,   2,   1,   1,   6,   1,   1,   1,   1,
         1,  10,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1,   1,   1,   8,   8,   9,   9,   8,  24,   5,  11,  11,  16,
         9,   3,   3,   6,   1,   1,   1,  80,  20,  10,  20,  20,  20,
        20,  20,  20,  20,  10,  20,  29,   3,   3,   7,  20,  15,  20],
      dtype=uint8)

In [29]:
%%timeit
mimax = int(mi.max())
d = np.zeros(p * mimax)
for j in range(p):
    for i in range(mi[j]):
        d[j * mimax + i] = Y[Xt[j, :] == i].sum()

8.34 s ± 192 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
%%timeit
R = P @ P.T

117 ms ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [45]:
np = 10
0.32 * np * np / 2

16.0