From a12d456f2b429360dc83a97c76412075e315a814 Mon Sep 17 00:00:00 2001 From: emekaokoli19 Date: Fri, 14 Nov 2025 15:30:52 +0100 Subject: [PATCH 1/3] enhanced RV RNG performance --- pytensor/tensor/random/op.py | 4 ++-- pytensor/tensor/random/utils.py | 9 +++++++ tests/tensor/random/test_utils.py | 40 +++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/pytensor/tensor/random/op.py b/pytensor/tensor/random/op.py index 6891823576..8e9448fa20 100644 --- a/pytensor/tensor/random/op.py +++ b/pytensor/tensor/random/op.py @@ -1,7 +1,6 @@ import abc import warnings from collections.abc import Sequence -from copy import deepcopy from typing import Any, cast import numpy as np @@ -23,6 +22,7 @@ from pytensor.tensor.random.type import RandomGeneratorType, RandomType from pytensor.tensor.random.utils import ( compute_batch_shape, + custom_deepcopy, explicit_expand_dims, normalize_size_param, ) @@ -421,7 +421,7 @@ def perform(self, node, inputs, outputs): # Draw from `rng` if `self.inplace` is `True`, and from a copy of `rng` otherwise. if not self.inplace: - rng = deepcopy(rng) + rng = custom_deepcopy(rng) outputs[0][0] = rng outputs[1][0] = np.asarray( diff --git a/pytensor/tensor/random/utils.py b/pytensor/tensor/random/utils.py index 86628a81cb..7ae49b9f58 100644 --- a/pytensor/tensor/random/utils.py +++ b/pytensor/tensor/random/utils.py @@ -1,10 +1,12 @@ from collections.abc import Callable, Sequence +from copy import deepcopy from functools import wraps from itertools import zip_longest from types import ModuleType from typing import TYPE_CHECKING import numpy as np +from numpy.random import Generator from pytensor.compile.sharedvalue import shared from pytensor.graph.basic import Constant, Variable @@ -201,6 +203,13 @@ def normalize_size_param( return shape +def custom_deepcopy(rng): + old_bitgen = rng.bit_generator + new_bitgen = type(old_bitgen)(deepcopy(old_bitgen._seed_seq)) + new_bitgen.state = old_bitgen.state + return Generator(new_bitgen) + + class RandomStream: """Module component with similar interface to `numpy.random.Generator`. diff --git a/tests/tensor/random/test_utils.py b/tests/tensor/random/test_utils.py index aa761d2922..7a0c7e3958 100644 --- a/tests/tensor/random/test_utils.py +++ b/tests/tensor/random/test_utils.py @@ -1,3 +1,6 @@ +import timeit +from copy import deepcopy + import numpy as np import pytest @@ -7,6 +10,7 @@ from pytensor.tensor.random.utils import ( RandomStream, broadcast_params, + custom_deepcopy, supp_shape_from_ref_param_shape, ) from pytensor.tensor.type import matrix, tensor @@ -327,3 +331,39 @@ def test_supp_shape_from_ref_param_shape(): ref_param_idx=1, ) assert res == (3, 4) + + +def test_custom_deepcopy_matches_deepcopy(): + rng = np.random.default_rng(123) + + dp = deepcopy(rng).bit_generator + fc = custom_deepcopy(rng).bit_generator + + # Same state + assert dp.state == fc.state + # Same seed sequence + assert dp.seed_seq.state == fc.seed_seq.state + + +def test_custom_deepcopy_output_identical(): + rng = np.random.default_rng(123) + + rng1 = deepcopy(rng) + rng2 = custom_deepcopy(rng) + + # Generate numbers from each + x1 = rng1.normal(size=10) + x2 = rng2.normal(size=10) + + assert np.allclose(x1, x2) + + +@pytest.mark.performance +def test_custom_deepcopy_faster_than_deepcopy(): + rng = np.random.default_rng() + + t_dp = timeit.timeit(lambda: deepcopy(rng), number=2000) + t_fc = timeit.timeit(lambda: custom_deepcopy(rng), number=2000) + + # Fast copy should be at least 20% faster + assert t_fc < t_dp * 0.8 From 0c9e6044abd04e607173250b981c18bc5929d35b Mon Sep 17 00:00:00 2001 From: emekaokoli19 Date: Sun, 16 Nov 2025 21:07:00 +0100 Subject: [PATCH 2/3] addressed reviews --- pytensor/link/numba/dispatch/random.py | 5 +++-- pytensor/tensor/random/op.py | 4 ++-- pytensor/tensor/random/utils.py | 6 +++++- tests/tensor/random/test_utils.py | 14 +++++++------- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pytensor/link/numba/dispatch/random.py b/pytensor/link/numba/dispatch/random.py index 36618ceb26..ba714acbc2 100644 --- a/pytensor/link/numba/dispatch/random.py +++ b/pytensor/link/numba/dispatch/random.py @@ -1,5 +1,5 @@ from collections.abc import Callable -from copy import copy, deepcopy +from copy import copy from functools import singledispatch from textwrap import dedent @@ -25,6 +25,7 @@ ) from pytensor.tensor import get_vector_length from pytensor.tensor.random.op import RandomVariable, RandomVariableWithCoreShape +from pytensor.tensor.random.utils import custom_rng_deepcopy from pytensor.tensor.type_other import NoneTypeT from pytensor.tensor.utils import _parse_gufunc_signature @@ -34,7 +35,7 @@ def copy_NumPyRandomGenerator(rng): def impl(rng): # TODO: Open issue on Numba? with numba.objmode(new_rng=types.npy_rng): - new_rng = deepcopy(rng) + new_rng = custom_rng_deepcopy(rng) return new_rng diff --git a/pytensor/tensor/random/op.py b/pytensor/tensor/random/op.py index 8e9448fa20..ab15ca5649 100644 --- a/pytensor/tensor/random/op.py +++ b/pytensor/tensor/random/op.py @@ -22,7 +22,7 @@ from pytensor.tensor.random.type import RandomGeneratorType, RandomType from pytensor.tensor.random.utils import ( compute_batch_shape, - custom_deepcopy, + custom_rng_deepcopy, explicit_expand_dims, normalize_size_param, ) @@ -421,7 +421,7 @@ def perform(self, node, inputs, outputs): # Draw from `rng` if `self.inplace` is `True`, and from a copy of `rng` otherwise. if not self.inplace: - rng = custom_deepcopy(rng) + rng = custom_rng_deepcopy(rng) outputs[0][0] = rng outputs[1][0] = np.asarray( diff --git a/pytensor/tensor/random/utils.py b/pytensor/tensor/random/utils.py index 7ae49b9f58..c34530eb0d 100644 --- a/pytensor/tensor/random/utils.py +++ b/pytensor/tensor/random/utils.py @@ -203,7 +203,11 @@ def normalize_size_param( return shape -def custom_deepcopy(rng): +# NOTE: +# This helper exists because copying numpy.random.Generator via deepcopy is slow. +# NumPy may implement a faster clone/copy API in the future: +# https://github.com/numpy/numpy/issues/24086 +def custom_rng_deepcopy(rng): old_bitgen = rng.bit_generator new_bitgen = type(old_bitgen)(deepcopy(old_bitgen._seed_seq)) new_bitgen.state = old_bitgen.state diff --git a/tests/tensor/random/test_utils.py b/tests/tensor/random/test_utils.py index 7a0c7e3958..be28dc4c2d 100644 --- a/tests/tensor/random/test_utils.py +++ b/tests/tensor/random/test_utils.py @@ -10,7 +10,7 @@ from pytensor.tensor.random.utils import ( RandomStream, broadcast_params, - custom_deepcopy, + custom_rng_deepcopy, supp_shape_from_ref_param_shape, ) from pytensor.tensor.type import matrix, tensor @@ -333,11 +333,11 @@ def test_supp_shape_from_ref_param_shape(): assert res == (3, 4) -def test_custom_deepcopy_matches_deepcopy(): +def test_custom_rng_deepcopy_matches_deepcopy(): rng = np.random.default_rng(123) dp = deepcopy(rng).bit_generator - fc = custom_deepcopy(rng).bit_generator + fc = custom_rng_deepcopy(rng).bit_generator # Same state assert dp.state == fc.state @@ -345,11 +345,11 @@ def test_custom_deepcopy_matches_deepcopy(): assert dp.seed_seq.state == fc.seed_seq.state -def test_custom_deepcopy_output_identical(): +def test_custom_rng_deepcopy_output_identical(): rng = np.random.default_rng(123) rng1 = deepcopy(rng) - rng2 = custom_deepcopy(rng) + rng2 = custom_rng_deepcopy(rng) # Generate numbers from each x1 = rng1.normal(size=10) @@ -359,11 +359,11 @@ def test_custom_deepcopy_output_identical(): @pytest.mark.performance -def test_custom_deepcopy_faster_than_deepcopy(): +def test_custom_rng_deepcopy_faster_than_deepcopy(): rng = np.random.default_rng() t_dp = timeit.timeit(lambda: deepcopy(rng), number=2000) - t_fc = timeit.timeit(lambda: custom_deepcopy(rng), number=2000) + t_fc = timeit.timeit(lambda: custom_rng_deepcopy(rng), number=2000) # Fast copy should be at least 20% faster assert t_fc < t_dp * 0.8 From c411aaf3a918b0a5199f0d123f4f19556ffd59f4 Mon Sep 17 00:00:00 2001 From: emekaokoli19 Date: Mon, 17 Nov 2025 22:05:22 +0100 Subject: [PATCH 3/3] removed performance test --- tests/tensor/random/test_utils.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/tensor/random/test_utils.py b/tests/tensor/random/test_utils.py index be28dc4c2d..d6ecec1d16 100644 --- a/tests/tensor/random/test_utils.py +++ b/tests/tensor/random/test_utils.py @@ -1,4 +1,3 @@ -import timeit from copy import deepcopy import numpy as np @@ -356,14 +355,3 @@ def test_custom_rng_deepcopy_output_identical(): x2 = rng2.normal(size=10) assert np.allclose(x1, x2) - - -@pytest.mark.performance -def test_custom_rng_deepcopy_faster_than_deepcopy(): - rng = np.random.default_rng() - - t_dp = timeit.timeit(lambda: deepcopy(rng), number=2000) - t_fc = timeit.timeit(lambda: custom_rng_deepcopy(rng), number=2000) - - # Fast copy should be at least 20% faster - assert t_fc < t_dp * 0.8