In [1]:
import os
os.chdir('/content/drive/MyDrive/python-performance')

In [2]:
!pip install blosc



In [3]:
import os

import blosc
import numpy as np
# sync; echo 3 > /proc/sys/vm/drop_caches
# https://www.tecmint.com/clear-ram-memory-cache-buffer-and-swap-space-on-linux/

random_arr = np.random.randint(256, size=(1024, 1024, 1024)).astype(np.uint8)


zero_arr = np.zeros(shape=(1024, 1024, 1024)).astype(np.uint8)
rep_tile_arr = rep_cycle_arr = np.tile(
    np.arange(256).astype(np.uint8),
    4*1024*1024).reshape(1024,1024,1024)



def write_numpy(arr, prefix):
    np.save(f"{prefix}.npy", arr)
    os.system("sync")


def write_blosc(arr, prefix, cname="lz4"):
    b_arr = blosc.pack_array(arr, cname=cname)
    w = open(f"{prefix}.bl", "wb")
    w.write(b_arr)
    w.close()
    os.system("sync")


def read_numpy(prefix):
    return np.load(f"{prefix}.npy")


def read_blosc(prefix):
    r = open(f"{prefix}.bl", "rb")
    b_arr = r.read()
    r.close()
    return blosc.unpack_array(b_arr)


os.system("sync")
%time write_numpy(zero_arr, "zero")
%time write_blosc(zero_arr, "zero")
%time write_numpy(rep_tile_arr, "rep_tile")
%time write_blosc(rep_tile_arr, "rep_tile")
%time write_numpy(random_arr, "random")
%time write_blosc(random_arr, "random")

# Cache drop

%time _ = read_numpy("zero")
%time _ = read_blosc("zero")
#%time _ = read_numpy("one")
#%time _ = read_blosc("one")
%time _ = read_numpy("random")
%time _ = read_blosc("random")


# size fraction



# different algorithms

%time write_blosc(rep_tile_arr, "rep_tile")
%time _ = read_blosc("rep_tile")
%time write_blosc(rep_tile_arr, "rep_tile_zstd", "zstd")
%time _ = read_blosc("rep_tile_zstd")

# ^^^ size fraction (again)


# shift
for shuffle in [blosc.SHUFFLE, blosc.BITSHUFFLE, blosc.NOSHUFFLE]:
        a = blosc.pack_array(rep_tile_arr, shuffle=shuffle)
        print(len(a))
a = blosc.pack_array(rep_tile_arr, shuffle=blosc.BITSHUFFLE)
len(a)
a = blosc.pack_array(rep_tile_arr, shuffle=blosc.NOSHUFFLE)
len(a)

# timeit of pack

blosc.set_nthreads(16)
%timeit blosc.pack_array(rep_tile_arr, shuffle=blosc.SHUFFLE)
%timeit blosc.pack_array(rep_tile_arr, shuffle=blosc.NOSHUFFLE)
%timeit blosc.pack_array(rep_tile_arr, cname="zstd")
%timeit rep_tile_arr.copy()

blosc.detect_number_of_cores()
# memcpy

CPU times: user 26.5 ms, sys: 495 ms, total: 522 ms
Wall time: 4.61 s
CPU times: user 450 ms, sys: 560 ms, total: 1.01 s
Wall time: 945 ms
CPU times: user 58.7 ms, sys: 560 ms, total: 619 ms
Wall time: 17.9 s
CPU times: user 472 ms, sys: 748 ms, total: 1.22 s
Wall time: 1.13 s
CPU times: user 18.5 ms, sys: 658 ms, total: 677 ms
Wall time: 8.82 s
CPU times: user 1.07 s, sys: 2.32 s, total: 3.39 s
Wall time: 17.8 s
CPU times: user 77.8 ms, sys: 1.46 s, total: 1.54 s
Wall time: 4.24 s
CPU times: user 585 ms, sys: 1.48 s, total: 2.06 s
Wall time: 1.51 s
CPU times: user 55.6 ms, sys: 1.35 s, total: 1.41 s
Wall time: 5.22 s
CPU times: user 603 ms, sys: 2.71 s, total: 3.32 s
Wall time: 36.5 s
CPU times: user 513 ms, sys: 1.17 s, total: 1.68 s
Wall time: 1.63 s
CPU times: user 448 ms, sys: 1.38 s, total: 1.83 s
Wall time: 1.63 s
CPU times: user 4.43 s, sys: 1.11 s, total: 5.54 s
Wall time: 35.9 s
CPU times: user 471 ms, sys: 1.43 s, total: 1.9 s
Wall time: 1.57 s
5345503
4600030
5345503
1.11 s

2

In [4]:
import numexpr as ne

a = np.random.rand(100000000).reshape(10000,10000)
b = np.random.rand(100000000).reshape(10000,10000)
f = np.random.rand(100000000).reshape(10000,10000).copy('F')
%timeit a + a
%timeit ne.evaluate('a + a')
%timeit f + f
%timeit ne.evaluate('f + f')
%timeit a + f
%timeit ne.evaluate('a + f')
%timeit a**5 + b
%timeit ne.evaluate('a**5 + b')
%timeit a**5 + b + np.sin(a) + np.cos(a)
%timeit ne.evaluate('a**5 + b + sin(a) + cos(a)')


300 ms ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
230 ms ± 6.26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
284 ms ± 2.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
233 ms ± 7.94 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.75 s ± 123 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.4 s ± 234 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.1 s ± 371 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
441 ms ± 12.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
6.61 s ± 603 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.26 s ± 682 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
