Skip to content

Commit

Permalink
REFACTOR-#3257: Move logging and caching to the gen_data internal f…
Browse files Browse the repository at this point in the history
…unction (#7046)

Signed-off-by: arunjose696 <arunjose696@gmail.com>
Co-authored-by: Anatoly Myachev <anatoliimyachev@mail.com>
  • Loading branch information
arunjose696 and anmyachev committed Mar 13, 2024
1 parent eb740b9 commit 8710994
Showing 1 changed file with 13 additions and 45 deletions.
58 changes: 13 additions & 45 deletions asv_bench/benchmarks/utils/common.py
Expand Up @@ -114,11 +114,7 @@ def gen_nan_data(nrows: int, ncols: int) -> dict:

def gen_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:
"""
Generate int data with caching.
The generated data are saved in the dictionary and on a subsequent call,
if the keys match, saved data will be returned. Therefore, we need
to carefully monitor the changing of saved data and make its copy if needed.
Generate int data.
Parameters
----------
Expand All @@ -136,30 +132,16 @@ def gen_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:
dict
Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.
"""
cache_key = ("int", nrows, ncols, rand_low, rand_high)
if cache_key in data_cache:
return data_cache[cache_key]

logging.info(
"Generating int data {} rows and {} columns [{}-{}]".format(
nrows, ncols, rand_low, rand_high
)
)
data = {
"col{}".format(i): np.random.randint(rand_low, rand_high, size=(nrows))
for i in range(ncols)
}
data_cache[cache_key] = weakdict(data)
return data


def gen_str_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:
"""
Generate int data and string data with caching.
The generated data are saved in the dictionary and on a subsequent call,
if the keys match, saved data will be returned. Therefore, we need
to carefully monitor the changing of saved data and make its copy if needed.
Generate int data and string data.
Parameters
----------
Expand All @@ -178,30 +160,16 @@ def gen_str_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> d
Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.
One of the columns with string values.
"""
cache_key = ("str_int", nrows, ncols, rand_low, rand_high)
if cache_key in data_cache:
return data_cache[cache_key]

logging.info(
"Generating str_int data {} rows and {} columns [{}-{}]".format(
nrows, ncols, rand_low, rand_high
)
)
data = gen_int_data(nrows, ncols, rand_low, rand_high).copy()
# convert values in arbitary column to string type
key = list(data.keys())[0]
data[key] = [f"str_{x}" for x in data[key]]
data_cache[cache_key] = weakdict(data)
return data


def gen_true_false_int_data(nrows, ncols, rand_low, rand_high):
"""
Generate int data and string data "true" and "false" values with caching.
The generated data are saved in the dictionary and on a subsequent call,
if the keys match, saved data will be returned. Therefore, we need
to carefully monitor the changing of saved data and make its copy if needed.
Generate int data and string data "true" and "false" values.
Parameters
----------
Expand All @@ -221,15 +189,6 @@ def gen_true_false_int_data(nrows, ncols, rand_low, rand_high):
One half of the columns with integer values, another half - with "true" and
"false" string values.
"""
cache_key = ("true_false_int", nrows, ncols, rand_low, rand_high)
if cache_key in data_cache:
return data_cache[cache_key]

logging.info(
"Generating true_false_int data {} rows and {} columns [{}-{}]".format(
nrows, ncols, rand_low, rand_high
)
)
data = gen_int_data(nrows // 2, ncols // 2, rand_low, rand_high)

data_true_false = {
Expand All @@ -239,7 +198,6 @@ def gen_true_false_int_data(nrows, ncols, rand_low, rand_high):
for i in range(ncols - ncols // 2)
}
data.update(data_true_false)
data_cache[cache_key] = weakdict(data)
return data


Expand Down Expand Up @@ -289,10 +247,20 @@ def gen_data(
"str_int": gen_str_int_data,
"true_false_int": gen_true_false_int_data,
}
cache_key = (data_type, nrows, ncols, rand_low, rand_high)
if cache_key in data_cache:
return data_cache[cache_key]

logging.info(
"Generating {} data {} rows and {} columns [{}-{}]".format(
data_type, nrows, ncols, rand_low, rand_high
)
)
assert data_type in type_to_generator
data_generator = type_to_generator[data_type]

data = data_generator(nrows, ncols, rand_low, rand_high)
data_cache[cache_key] = weakdict(data)

return data

Expand Down

0 comments on commit 8710994

Please sign in to comment.