Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
add cache_file options for the benchmark
  • Loading branch information
sdpython committed Mar 4, 2017
1 parent c0f85d6 commit 78154c5
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 4 deletions.
18 changes: 16 additions & 2 deletions _unittests/ut_benchhelper/test_benchmark.py
Expand Up @@ -56,8 +56,10 @@ def test_benchmark(self):
link = local_graph.to_html()
self.assertEqual(link, '<img src="zzz/g.png" alt="agraph"/>')

bench = TestBenchMark_("TestName", fLOG=fLOG, clog=temp)
params = [dict(value=random.randint(10, 20)) for i in range(0, 100)]
params = [dict(value=random.randint(10, 20)) for i in range(0, 20)]

bench = TestBenchMark_("TestName", fLOG=fLOG, clog=temp,
cache_file=os.path.join(temp, "cache.pickle"))
bench.run(params)
df = bench.to_df()
ht = df.to_html(float_format="%1.3f", index=False)
Expand All @@ -71,6 +73,18 @@ def test_benchmark(self):
assert os.path.exists(csv)
assert os.path.exists(rst)

df1 = bench.to_df()

fLOG("NEW RUN")
bench = TestBenchMark_("TestName", fLOG=fLOG, clog=temp,
cache_file=os.path.join(temp, "cache.pickle"))
bench.run(params)
meta = bench.Metadata
fLOG(meta)
self.assertEqual(meta[0]["nb_cached"], 20)
df2 = bench.to_df()
self.assertEqual(df1.shape, df2.shape)


if __name__ == "__main__":
unittest.main()
80 changes: 78 additions & 2 deletions src/pyquickhelper/benchhelper/benchmark.py
Expand Up @@ -8,6 +8,7 @@
import sys
from datetime import datetime
from time import clock
import pickle
from ..loghelper import noLOG, CustomLog
from ..texthelper import apply_template
from ..pandashelper import df2rst
Expand All @@ -24,7 +25,8 @@ class BenchMark:
*init*, *bench*, *end*, *graphs*.
"""

def __init__(self, name, clog=None, fLOG=noLOG, path_to_images=".", **params):
def __init__(self, name, clog=None, fLOG=noLOG, path_to_images=".",
cache_file=None, **params):
"""
initialisation
Expand All @@ -33,6 +35,11 @@ def __init__(self, name, clog=None, fLOG=noLOG, path_to_images=".", **params):
@param fLOG logging function
@param params extra parameters
@param path_to_images path to images
@param cache_file cache file
If *cache_file* is specified, the class will store the results of the
method @see me bench. On a second run, the function load the cache
and run modified or new run (in *param_list*).
"""
self._fLOG = fLOG
self._name = name
Expand All @@ -45,6 +52,11 @@ def __init__(self, name, clog=None, fLOG=noLOG, path_to_images=".", **params):
self._clog = CustomLog(clog)
self._params = params
self._path_to_images = path_to_images
self._cache_file = cache_file

##
# methods to overwrite
##

def init(self):
"""
Expand All @@ -58,6 +70,8 @@ def bench(self, **params):
@param params parameters
@return metrics as a dictionary, appendix as a dictionary
The results of this method will be cached if a *cache_file* was specified in the constructor.
"""
raise NotImplementedError("It should be overwritten.")

Expand All @@ -67,6 +81,16 @@ def end(self):
"""
raise NotImplementedError("It should be overwritten.")

def uncache(self, cache):
"""
overwrite this method to uncache some previous run
"""
pass

##
# end of methods to overwrite
##

class LocalGraph:
"""
Information about graphs.
Expand Down Expand Up @@ -153,19 +177,61 @@ def run(self, params_list):
if not isinstance(di, dict):
raise TypeError("params_list must be a list of dictionaries")

# cache

if self._cache_file is not None and os.path.exists(self._cache_file):
self.fLOG("[BenchMark.run] retrieve cache '{0}'".format(
self._cache_file))
with open(self._cache_file, "rb") as f:
cached = pickle.load(f)
self.fLOG("[BenchMark.run] number of cached run: {0}".format(
len(cached["params_list"])))
else:
if self._cache_file is not None:
self.fLOG("[BenchMark.run] cache not found '{0}'".format(
self._cache_file))
cached = dict(metrics=[], appendix=[], params_list=[])
self.uncache(cached)

# run

self._metrics = []
self._metadata = []
self._appendix = []

meta = dict(level="BenchMark", name=self.Name, nb=len(
params_list), time_begin=datetime.now())
self._metadata.append(meta)

self.fLOG("[BenchMark.run] init {0} do".format(self.Name))
self.init()
self.fLOG("[BenchMark.run] init {0} done".format(self.Name))

self.fLOG("[BenchMark.run] start {0}".format(self.Name))
nb_cached = 0

for i, di in enumerate(params_list):

# check the cache
if i < len(cached["params_list"]) and cached["params_list"][i] == di:
can = True
for k, v in cached.items():
if i >= len(v):
# cannot cache
can = False
break

if can:
# can
self._metrics.append(cached["metrics"][i])
self._appendix.append(cached["appendix"][i])
self.fLOG(
"[BenchMark.run] retrieved cached {0}/{1}: {2}".format(i + 1, len(params_list), di))
nb_cached += 1
continue

# cache is available

# no cache
self.fLOG(
"[BenchMark.run] {0}/{1}: {2}".format(i + 1, len(params_list), di))
dt = datetime.now()
Expand Down Expand Up @@ -216,6 +282,16 @@ def run(self, params_list):
self.end()
self.fLOG("[BenchMark.run] end {0} done".format(self.Name))
meta["time_end"] = datetime.now()
meta["nb_cached"] = nb_cached

if self._cache_file is not None:
self.fLOG("[BenchMark.run] save cache '{0}'".format(
self._cache_file))
cached = dict(metrics=self._metrics,
appendix=self._appendix, params_list=params_list)
with open(self._cache_file, "wb") as f:
pickle.dump(cached, f)
self.fLOG("[BenchMark.run] done.")

@property
def Metrics(self):
Expand Down

0 comments on commit 78154c5

Please sign in to comment.