In [1]:
import numpy as np
import timeit
import os
import ctypes
import numba as nb
import math


In [2]:
np.random.seed(0)
x = np.random.randint(0, 2147483646, 20, dtype=np.int32)
x

array([ 209652396,  398764591,  924231285, 1478610112,  441365315,
       1537364731,  192771779, 1491434855, 1819583497,  530702035,
        626610453, 1650906866, 1879422756, 1277901399, 1682652230,
        243580376, 1991416408, 1171049868, 1646868794, 2051556033])

In [3]:
all_mehods = []

In [4]:
def sumSqrtPurePython(x):
    sum = 0
    for e in x:
        sum += e ** 0.5
    return sum

all_mehods.append({
    'method': 'Pure Python',
    'func': sumSqrtPurePython,
    'times': []
})

# quick test:
sumSqrtPurePython(x)

647829.590943377

In [5]:
def sumSqrtMath(x):
    sum = 0
    for e in x:
        sum += math.sqrt(e)
    return sum

all_mehods.append({
    'method': 'Math Module',
    'func': sumSqrtMath,
    'times': []
})

# quick test:
sumSqrtMath(x)

647829.590943377

In [6]:
def sumSqrtNumpy(x):
    return np.sum(np.sqrt(x))

all_mehods.append({
    'method': 'Numpy',
    'func': sumSqrtNumpy,
    'times': []
})

# quick test:
sumSqrtNumpy(x)

647829.5909433769

In [7]:
@nb.njit(parallel=True)
def sumSqrtNumbaParallel(x):
    n = len(x)
    sum = 0
    for i in nb.prange(n):
        sum += x[i] ** 0.5
    return sum

all_mehods.append({
    'method': 'Numba parallel',
    'func': sumSqrtNumbaParallel,
    'times': []
})

# quick test:
sumSqrtNumbaParallel(x)

647829.590943377

In [8]:
@nb.njit
def sumSqrtNumba(x):
    n = len(x)
    sum = 0
    for i in range(n):
        sum += x[i] ** 0.5
    return sum

all_mehods.append({
    'method': 'Numba',
    'func': sumSqrtNumba,
    'times': []
})

# quick test:
sumSqrtNumba(x)

647829.590943377

In [9]:
# compiled functions
gcc_path = "C:\\__APP__\\mingw64\\bin\\gcc"
current_path = os.getcwd()

options = ['-O0', '-O1', '-O2', '-O3', '-Os', '-Ofast', '-Og', '-Oz']
sum_sqrt_libs = []
names = []

compilation_times = {}
for opt in options:
    t = timeit.timeit(lambda: os.system(f'{gcc_path} -shared {opt} -o sumSqrt{opt}.so sumSqrt.c'), number=1)
    if t<0.5:
        repeat = int(0.5/t)+1
        final_t = timeit.timeit(lambda: os.system(f'{gcc_path} -shared {opt} -o sumSqrt{opt}.so sumSqrt.c'), number=repeat)/repeat
    else:
        repeat = 1
        final_t = t
    compilation_times[opt] = final_t
    
    os.system(f'{gcc_path} -shared {opt} -o sumSqrt{opt}.so sumSqrt.c')
    lib = ctypes.cdll.LoadLibrary(os.path.join(current_path, f'sumSqrt{opt}.so'))
    lib.sumSqrtC.argtypes = (ctypes.POINTER(ctypes.c_int), ctypes.c_int)
    lib.sumSqrtC.restype = ctypes.c_double
    sum_sqrt_libs.append(lib)
    names.append(f'compiled{opt}')


def np_to_c(arr: np.ndarray) -> tuple[ctypes.POINTER(ctypes.c_int), ctypes.c_int]:
    return arr.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), ctypes.c_int(len(arr))

def sumSqrtC(x, lib):
    return lib.sumSqrtC(*np_to_c(x))

all_mehods.append({
    'method': 'C -O0',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[0]),
    'times': []
})
all_mehods.append({
    'method': 'C -O1',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[1]),
    'times': []
})
all_mehods.append({
    'method': 'C -O2',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[2]),
    'times': []
})
all_mehods.append({
    'method': 'C -O3',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[3]),
    'times': []
})
all_mehods.append({
    'method': 'C -Os',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[4]),
    'times': []
})
all_mehods.append({
    'method': 'C -Ofast',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[5]),
    'times': []
})
all_mehods.append({
    'method': 'C -Og',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[6]),
    'times': []
})
all_mehods.append({
    'method': 'C -Oz',
    'func': lambda x: sumSqrtC(x, lib=sum_sqrt_libs[7]),
    'times': []
})

print('Compilation times:')
for opt, t in compilation_times.items():
    print(f'{opt}: {t:.3f} s')

# quick test:
sumSqrtC(x, lib=sum_sqrt_libs[0])

Compilation times:
-O0: 0.254 s
-O1: 0.272 s
-O2: 0.540 s
-O3: 0.261 s
-Os: 0.265 s
-Ofast: 0.255 s
-Og: 0.266 s
-Oz: 0.262 s


647829.590943377

In [10]:
os.system('python setup.py build_ext --inplace')

import sumSqrtCython

all_mehods.append({
    'method': 'Cython',
    'func': sumSqrtCython.sumSqrtCython,
    'times': []
})

# quick test:
sumSqrtCython.sumSqrtCython(x)

647829.5909433769

In [11]:
all_mehods

[{'method': 'Pure Python',
  'func': <function __main__.sumSqrtPurePython(x)>,
  'times': []},
 {'method': 'Math Module',
  'func': <function __main__.sumSqrtMath(x)>,
  'times': []},
 {'method': 'Numpy', 'func': <function __main__.sumSqrtNumpy(x)>, 'times': []},
 {'method': 'Numba parallel',
  'func': CPUDispatcher(<function sumSqrtNumbaParallel at 0x000002B1CB7F4360>),
  'times': []},
 {'method': 'Numba',
  'func': CPUDispatcher(<function sumSqrtNumba at 0x000002B1CB7F45E0>),
  'times': []},
 {'method': 'C -O0', 'func': <function __main__.<lambda>(x)>, 'times': []},
 {'method': 'C -O1', 'func': <function __main__.<lambda>(x)>, 'times': []},
 {'method': 'C -O2', 'func': <function __main__.<lambda>(x)>, 'times': []},
 {'method': 'C -O3', 'func': <function __main__.<lambda>(x)>, 'times': []},
 {'method': 'C -Os', 'func': <function __main__.<lambda>(x)>, 'times': []},
 {'method': 'C -Ofast', 'func': <function __main__.<lambda>(x)>, 'times': []},
 {'method': 'C -Og', 'func': <function __m

In [12]:
%%timeit
sumSqrtPurePython(x)

49.5 µs ± 4.3 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [13]:
%%timeit
sumSqrtMath(x)

6 µs ± 847 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [14]:
%%timeit
sumSqrtNumpy(x)

6.98 µs ± 921 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [15]:
%%timeit
sumSqrtC(x, lib=sum_sqrt_libs[0])

5.62 µs ± 565 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [16]:
%%timeit
sumSqrtC(x, lib=sum_sqrt_libs[1])

4.99 µs ± 352 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [17]:
%%timeit
sumSqrtCython.sumSqrtCython(x)

11.7 µs ± 908 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [18]:
%%timeit
sumSqrtNumba(x)

426 ns ± 12.3 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [19]:
%%timeit
sumSqrtNumbaParallel(x)

18.1 µs ± 1.51 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [20]:
print('Speed tests:')
for i in range(1,9):
    n = 10**i
    np.random.seed(0)
    x = np.random.randint(0, 2147483646, n, dtype=np.int32)
    print(f'Taille 10^{i}:')
    for method in all_mehods:
        if i > 6 and method['method'] == 'Pure Python':
            continue
        if i > 7 and method['method'] == 'Math Module':
            continue
        func = method['func']
        t = timeit.timeit(lambda: func(x), number=1)
        if t<0.5:
            repeat = int(0.5/t)+1
            final_t = timeit.timeit(lambda: func(x), number=repeat)/repeat
        else:
            repeat = 1
            final_t = t
        method['times'].append(final_t)
        print(f'\t{method["method"]}: {final_t:0.7f}s (repeats: {repeat})')

Speed tests:
Taille 10^1:
	Pure Python: 0.0000318s (repeats: 7279)
	Math Module: 0.0000041s (repeats: 41667)
	Numpy: 0.0000070s (repeats: 10331)
	Numba parallel: 0.0000174s (repeats: 1311)
	Numba: 0.0000011s (repeats: 70424)
	C -O0: 0.0000058s (repeats: 8117)
	C -O1: 0.0000042s (repeats: 21368)
	C -O2: 0.0000049s (repeats: 14327)
	C -O3: 0.0000048s (repeats: 20081)
	C -Os: 0.0000053s (repeats: 11186)
	C -Ofast: 0.0000048s (repeats: 15385)
	C -Og: 0.0000046s (repeats: 18249)
	C -Oz: 0.0000051s (repeats: 16287)
	Cython: 0.0000117s (repeats: 7397)
Taille 10^2:
	Pure Python: 0.0002157s (repeats: 2243)
	Math Module: 0.0000205s (repeats: 20081)
	Numpy: 0.0000062s (repeats: 8475)
	Numba parallel: 0.0000160s (repeats: 6143)
	Numba: 0.0000011s (repeats: 106383)
	C -O0: 0.0000063s (repeats: 13737)
	C -O1: 0.0000052s (repeats: 16557)
	C -O2: 0.0000049s (repeats: 17544)
	C -O3: 0.0000048s (repeats: 16026)
	C -Os: 0.0000072s (repeats: 17858)
	C -Ofast: 0.0000054s (repeats: 17483)
	C -Og: 0.0000055s