Skip to content

Commit

Permalink
Merge pull request #351 from mdekstrand/tweak/drop-deprecated-random
Browse files Browse the repository at this point in the history
Remove legacy random code
  • Loading branch information
mdekstrand committed Dec 13, 2023
2 parents e21ca83 + 572cdbd commit 7ab675c
Show file tree
Hide file tree
Showing 13 changed files with 64 additions and 272 deletions.
48 changes: 12 additions & 36 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,35 @@
import logging
from pytest import fixture

import numpy as np
from seedbank import numpy_rng, initialize

from lenskit import util
logging.getLogger("numba").setLevel(logging.INFO)

logging.getLogger('numba').setLevel(logging.INFO)

_log = logging.getLogger('lenskit.tests')
_log = logging.getLogger("lenskit.tests")


@fixture
def rng():
return util.rng(42)


@fixture
def legacy_rng():
return util.rng(42, legacy_rng=True)
return numpy_rng(42)


@fixture(autouse=True)
def init_rng(request):
util.init_rng(42)
initialize(42)


@fixture(autouse=True)
def log_test(request):
modname = request.module.__name__ if request.module else '<unknown>'
funcname = request.function.__name__ if request.function else '<unknown>'
_log.info('running test %s:%s', modname, funcname)


@fixture(autouse=True, scope='session')
def carbon(request):
try:
from codecarbon import EmissionsTracker
except ImportError:
yield True # we do nothing
return

tracker = EmissionsTracker("lkpy-tests", 5)
tracker.start()
try:
yield True
finally:
emissions = tracker.stop()
_log.info('test suite used %.3f kgCO2eq', emissions)
modname = request.module.__name__ if request.module else "<unknown>"
funcname = request.function.__name__ if request.function else "<unknown>"
_log.info("running test %s:%s", modname, funcname)


def pytest_collection_modifyitems(items):
# add 'slow' to all 'eval' tests
for item in items:
evm = item.get_closest_marker('eval')
slm = item.get_closest_marker('slow')
evm = item.get_closest_marker("eval")
slm = item.get_closest_marker("slow")
if evm is not None and slm is None:
_log.debug('adding slow mark to %s', item)
item.add_marker('slow')
_log.debug("adding slow mark to %s", item)
item.add_marker("slow")
8 changes: 4 additions & 4 deletions lenskit/algorithms/als.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging
from collections import namedtuple
import warnings

import numpy as np
from numba import njit, prange

from csr import CSR
from seedbank import numpy_rng

from .bias import Bias
from .mf_common import MFPredictor
Expand Down Expand Up @@ -337,7 +337,7 @@ class BiasedMF(MFPredictor):
damping ``damping``.
method(str): the solver to use (see above).
rng_spec:
Random number generator or state (see :func:`lenskit.util.random.rng`).
Random number generator or state (see :func:`seedbank.numpy_rng`).
progress: a :func:`tqdm.tqdm`-compatible progress bar function
"""

Expand Down Expand Up @@ -366,7 +366,7 @@ def __init__(
else:
self.bias = bias
self.progress = progress if progress is not None else util.no_progress
self.rng = util.rng(rng_spec)
self.rng = numpy_rng(rng_spec)
self.save_user_features = save_user_features

def fit(self, ratings, **kwargs):
Expand Down Expand Up @@ -600,7 +600,7 @@ def __init__(
self.weight = weight
self.use_ratings = use_ratings
self.method = method
self.rng = util.rng(rng_spec)
self.rng = numpy_rng(rng_spec)
self.progress = progress if progress is not None else util.no_progress
self.save_user_features = save_user_features

Expand Down
2 changes: 1 addition & 1 deletion lenskit/algorithms/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def __init__(self, selector=None, rng_spec=None):
else:
self.selector = UnratedItemCandidateSelector()
# Get a Pandas-compatible RNG
self.rng_source = derivable_rng(rng_spec, legacy=True)
self.rng_source = derivable_rng(rng_spec)
self.items = None

def fit(self, ratings, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions lenskit/algorithms/funksvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
import numpy as np
import numba as n
from pandas.core.series import Series
from seedbank import numpy_rng

try:
from numba.experimental import jitclass
Expand Down Expand Up @@ -242,7 +242,7 @@ def __init__(
self.bias = Bias(damping=damping)
else:
self.bias = bias
self.random = util.rng(random_state)
self.random = numpy_rng(random_state)

def fit(self, ratings, **kwargs):
"""
Expand Down
27 changes: 13 additions & 14 deletions lenskit/crossfold.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np
import pandas as pd
from . import util
from seedbank import numpy_rng

TTPair = namedtuple("TTPair", ["train", "test"])
TTPair.__doc__ = "Train-test pair (named tuple)."
Expand All @@ -29,7 +29,7 @@ def partition_rows(data, partitions, *, rng_spec=None):
partitions(int):
The number of partitions to produce.
rng_spec:
The random number generator or seed (see :py:func:`lenskit.util.rng`).
The random number generator or seed (see :py:func:`seedbank.numpy_rng`).
Returns:
iterator: an iterator of train-test pairs
Expand All @@ -41,7 +41,7 @@ def partition_rows(data, partitions, *, rng_spec=None):
# create an array of indexes
rows = np.arange(len(data))
# shuffle the indices & split into partitions
rng = util.rng(rng_spec)
rng = numpy_rng(rng_spec)
rng.shuffle(rows)
test_sets = np.array_split(rows, partitions)

Expand Down Expand Up @@ -90,15 +90,16 @@ def sample_rows(data, partitions, size, disjoint=True, *, rng_spec=None):
disjoint(bool):
If ``True``, force samples to be disjoint.
rng_spec:
The random number generator or seed (see :py:func:`lenskit.util.rng`).
The random number generator or seed (see :py:func:`seedbank.numpy_rng`).
Returns:
iterator: An iterator of train-test pairs.
"""

confirm_unique_index(data)
rng = numpy_rng(rng_spec)
if partitions is None:
test = data.sample(n=size)
test = data.sample(n=size, random_state=rng)
tr_mask = pd.Series(True, index=data.index)
tr_mask.loc[test.index] = False
train = data[tr_mask]
Expand All @@ -111,13 +112,11 @@ def sample_rows(data, partitions, size, disjoint=True, *, rng_spec=None):
size,
len(data),
)
return partition_rows(data, partitions)
return partition_rows(data, partitions, rng_spec=rng)

# create an array of indexes
rows = np.arange(len(data))

rng = util.rng(rng_spec)

if disjoint:
_logger.info("creating %d disjoint samples of size %d", partitions, size)
ips = _disjoint_sample(rows, partitions, size, rng)
Expand Down Expand Up @@ -181,7 +180,7 @@ class SampleN(PartitionMethod):

def __init__(self, n, rng_spec=None):
self.n = n
self.rng = util.rng(rng_spec, legacy=True)
self.rng = numpy_rng(rng_spec)

def __call__(self, udf):
return udf.sample(n=self.n, random_state=self.rng)
Expand All @@ -197,7 +196,7 @@ class SampleFrac(PartitionMethod):

def __init__(self, frac, rng_spec=None):
self.fraction = frac
self.rng = util.rng(rng_spec, legacy=True)
self.rng = numpy_rng(rng_spec)

def __call__(self, udf):
return udf.sample(frac=self.fraction, random_state=self.rng)
Expand Down Expand Up @@ -247,7 +246,7 @@ def partition_users(data, partitions: int, method: PartitionMethod, *, rng_spec=
data(pandas.DataFrame): a data frame containing ratings or other data you wish to partition.
partitions(int): the number of partitions to produce
method(PartitionMethod): The method for selecting test rows for each user.
rng_spec: The random number generator or seed (see :py:func:`lenskit.util.rng`).
rng_spec: The random number generator or seed (see :py:func:`seedbank.numpy_rng`).
Returns
iterator: an iterator of train-test pairs
Expand All @@ -263,7 +262,7 @@ def partition_users(data, partitions: int, method: PartitionMethod, *, rng_spec=
# create an array of indexes into user row
rows = np.arange(len(users))
# shuffle the indices & split into partitions
rng = util.rng(rng_spec, legacy=True)
rng = numpy_rng(rng_spec)
rng.shuffle(rows)
test_sets = np.array_split(rows, partitions)

Expand Down Expand Up @@ -303,14 +302,14 @@ def sample_users(
method(PartitionMethod):
The method for obtaining user test ratings.
rng_spec:
The random number generator or seed (see :py:func:`lenskit.util.rng`).
The random number generator or seed (see :py:func:`seedbank.numpy_rng`).
Returns:
iterator: An iterator of train-test pairs (as :class:`TTPair` objects).
"""

confirm_unique_index(data)
rng = util.rng(rng_spec, legacy=True)
rng = numpy_rng(rng_spec)

user_col = data["user"]
users = user_col.unique()
Expand Down
4 changes: 1 addition & 3 deletions lenskit/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..algorithms import Algorithm
from .log import log_to_notebook, log_to_stderr # noqa: F401
from .timing import Stopwatch # noqa: F401
from .random import rng, init_rng, derivable_rng # noqa: F401
from .random import derivable_rng
from .parallel import proc_count # noqa: F401

try:
Expand All @@ -24,8 +24,6 @@
"log_to_notebook",
"Stopwatch",
"read_df_detect",
"rng",
"init_rng",
"derivable_rng",
"proc_count",
"clone",
Expand Down
14 changes: 9 additions & 5 deletions lenskit/util/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
import pickle
from threadpoolctl import threadpool_limits

import seedbank

from lenskit.sharing import persist, PersistedModel
from lenskit.util.log import log_queue
from lenskit.util.random import derive_seed, init_rng, get_root_seed

_log = logging.getLogger(__name__)
__work_model = None
Expand Down Expand Up @@ -91,7 +92,7 @@ def _initialize_worker(log_queue, seed):
__is_worker = True
faulthandler.enable()
if seed is not None:
init_rng(seed)
seedbank.initialize(seed)
if log_queue is not None:
h = logging.handlers.QueueHandler(log_queue)
root = logging.getLogger()
Expand All @@ -101,7 +102,7 @@ def _initialize_worker(log_queue, seed):


def _initialize_mp_worker(mkey, func, threads, log_queue, seed):
seed = derive_seed(mp.current_process().name, base=seed)
seed = seedbank.derive_seed(mp.current_process().name, base=seed)
_initialize_worker(log_queue, seed)
global __work_model, __work_func

Expand Down Expand Up @@ -183,7 +184,7 @@ def run_sp(func, *args, **kwargs):
"""
ctx = LKContext.INSTANCE
rq = ctx.SimpleQueue()
seed = derive_seed()
seed = seedbank.derive_seed()
worker_args = (log_queue(), seed, rq, func, args, kwargs)
_log.debug("spawning subprocess to run %s", func)
proc = ctx.Process(target=_sp_worker, args=worker_args)
Expand Down Expand Up @@ -300,7 +301,10 @@ def __init__(self, model, func, n_jobs, persist_method):
os.environ["_LK_IN_MP"] = "yes"
kid_tc = proc_count(level=1)
self.executor = ProcessPoolExecutor(
n_jobs, ctx, _initialize_mp_worker, (key, func, kid_tc, log_queue(), get_root_seed())
n_jobs,
ctx,
_initialize_mp_worker,
(key, func, kid_tc, log_queue(), seedbank.root_seed()),
)

def map(self, *iterables):
Expand Down

0 comments on commit 7ab675c

Please sign in to comment.