-
-
Notifications
You must be signed in to change notification settings - Fork 11.2k
MAINT: Performance improvement of polyutils.as_series #25299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This small patch provides a (small) performance improvement: For the normal (straight / no error) case the improvement is from 0 - 5%, for the error case the improvement is between 3 and 30%. The improvement highly depends on the value of the parameter. The original code always runs through all arrays, even if it could stop at the first found with size zero. Signed-off-by: Andreas Florath <andreas@florath.net>
Short description how I measured I created a virtualenv and install numpy. In the polyutils.py I added the new function side by side to the original one: def as_series_orig(alist, trim=True):
...
def as_series(alist, trim=True):
... The tests are divided into two groups: 1) normal (straight) use case 2) error case when an exception is thrown. import numpy as np
from numpy.polynomial import polyutils as pu
from numpy.testing import (
assert_raises, assert_equal, assert_,
)
import functools
import timeit
def func_tests_docu(testf):
'''Functional tests from the documentation'''
a = np.arange(4)
assert_equal(testf(a), [np.array([0.]), np.array([1.]), np.array([2.]), np.array([3.])])
b = np.arange(6).reshape((2,3))
assert_equal(testf(b), [np.array([0., 1., 2.]), np.array([3., 4., 5.])])
assert_equal(testf((1, np.arange(3), np.arange(2, dtype=np.float16))),
[np.array([1.]), np.array([0., 1., 2.]), np.array([0., 1.])])
assert_equal(testf([2, [1.1, 0.]]),
[np.array([2.]), np.array([1.1])])
assert_equal(testf([2, [1.1, 0.]], trim=False),
[np.array([2.]), np.array([1.1, 0. ])])
def func_test_unit(testf):
'''Functional tests from the unit tests wich come with numpy'''
# check exceptions
assert_raises(ValueError, testf, [[]])
assert_raises(ValueError, testf, [[[1, 2]]])
assert_raises(ValueError, testf, [[1], ['a']])
# check common types
types = ['i', 'd', 'O']
for i in range(len(types)):
for j in range(i):
ci = np.ones(1, types[i])
cj = np.ones(1, types[j])
[resi, resj] = testf([ci, cj])
assert_(resi.dtype.char == resj.dtype.char)
assert_(resj.dtype.char == types[i])
def generate_test_data_for_as_series(num_elements=10, chunk_size=5):
"""
Generate test data for the numpy.polynomial.polyutils.as_series function.
Parameters:
num_elements (int): Total number of elements to generate.
chunk_size (int): Size of each chunk in the list.
Returns:
list: A list of numpy arrays, each of size `chunk_size`.
"""
# Generate a numpy array with the specified number of elements
data = np.random.rand(num_elements)
# Split the array into chunks of specified size
return [data[i:i + chunk_size] for i in range(0, len(data), chunk_size)]
def exec_time(testf):
tdata = [
np.arange(4),
np.arange(6).reshape((2,3)),
(1, np.arange(3), np.arange(2, dtype=np.float16)),
[2, [1.1, 0.]],
[0.1, 0.2, 0.3],
np.array([[0.1, 0.2, 0.3], [1, 2, 3]])
]
res_times = []
print("Tests from documentation and test cases")
for td in tdata:
res = timeit.timeit(functools.partial(testf, td), number=100000)
res_times.append(res)
print("Random tests")
np.random.seed(0)
for num_elements in (1, 2, 4, 8, 16, 32, 64, 128):
td = generate_test_data_for_as_series(num_elements)
res = timeit.timeit(functools.partial(testf, td), number=100000)
res_times.append(res)
return res_times
def test_wrapper(testf, td):
try:
testf(td)
assert False
except ValueError:
pass
def exec_time_exceptions(testf):
tdata = [
[[]],
[[[1, 2]]],
[[1], ['a']],
[[1], [], [2], [3]] + [ [7] * 100 ],
]
res_times = []
for td in tdata:
res = timeit.timeit(functools.partial(test_wrapper, testf, td), number=1000)
res_times.append(res)
return res_times
def main():
print("Running functional tests")
func_tests_docu(pu.as_series_orig)
func_test_unit(pu.as_series_orig)
func_tests_docu(pu.as_series)
func_test_unit(pu.as_series)
print("Running performance tests")
res_orig = exec_time(pu.as_series_orig)
# print("Orig exec time", res_orig)
res_opt = exec_time(pu.as_series)
# print("Opt exec time", res_opt)
diff = []
for i in range(len(res_orig)):
diff.append("%5.3f" % (res_orig[i] / res_opt[i]))
print("Improvement normal", diff)
res_orig_ex = exec_time_exceptions(pu.as_series_orig)
# print("Orig exption exec time", res_orig_ex)
res_opt_ex = exec_time_exceptions(pu.as_series)
# print("OPT exption exec time", res_opt_ex)
diff = []
for i in range(len(res_orig_ex)):
diff.append("%5.3f" % (res_orig_ex[i] / res_opt_ex[i]))
print("Improvement exception", diff)
if __name__ == '__main__':
main() A typical run on Intel i9-9880H using Python 3.11.2 (Debian):
A typical run on Intel Xeon 6438M using Python 3.10.12 (Ubuntu):
The numbers are the improvement of the proposed version vs the original version (original is 1.0) for the different test cases. |
You could use |
This small patch provides a (small) performance improvement: For the normal (straight / no error) case the improvement is from 1 - 9%, for the error case the improvement is between 3 and 30%. The improvement highly depends on the value of the parameter.
The original code always runs through all arrays, even if it could stop at the first found with size zero.