forked from datalad/datalad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
common.py
159 lines (134 loc) · 5.23 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Helpers for benchmarks of DataLad"""
import os
import sys
import tarfile
import tempfile
import timeit
import os.path as op
from glob import glob
from datalad.utils import (
getpwd,
get_tempfile_kwargs,
rmtree,
)
from datalad.api import (
Dataset,
create_test_dataset,
)
############
# Monkey patches
# Robust is_interactive. Should be not needed since 0.11.4
# https://github.com/datalad/datalad/pull/3268
def _is_stream_tty(stream):
try:
# TODO: check on windows if hasattr check would work correctly and
# add value:
return stream.isatty()
except ValueError as exc:
# Who knows why it is a ValueError, but let's try to be specific
# If there is a problem with I/O - non-interactive, otherwise reraise
if "I/O" in str(exc):
return False
raise
def is_interactive():
"""Return True if all in/outs are tty"""
return all(_is_stream_tty(s) for s in (sys.stdin, sys.stdout, sys.stderr))
class SuprocBenchmarks(object):
# manually set a number since otherwise takes way too long!
# see https://github.com/spacetelescope/asv/issues/497
#number = 3
# although seems to work ok with a timer which accounts for subprocesses
# custom timer so we account for subprocess times
timer = timeit.default_timer
_monkey_patched = False
def __init__(self):
if not self._monkey_patched:
# monkey patch things if needed
# ASV started to close one of the std streams since some point
# which caused our is_interactive to fail. We need to provide
# more robust version
from datalad.support.external_versions import external_versions
# comparing to 0.12.1 since the returned version is "loose"
# so fails correctly identify rc as pre .0
if external_versions['datalad'] < '0.12.1':
from datalad import utils
from datalad.interface import ls
utils.is_interactive = is_interactive
ls.is_interactive = is_interactive
SuprocBenchmarks._monkey_patched = True
self.remove_paths = []
def _cleanup(self):
if not self.remove_paths:
return # Nothing TODO
self.log("Cleaning up %d paths", len(self.remove_paths))
while self.remove_paths:
path = self.remove_paths.pop()
if op.lexists(path):
rmtree(path)
def teardown(self):
self._cleanup()
def __del__(self):
# We will at least try
try:
self._cleanup()
except:
pass
def log(self, msg, *args):
"""Consistent benchmarks logging"""
print("BM: "+ str(msg % tuple(args)))
class SampleSuperDatasetBenchmarks(SuprocBenchmarks):
"""
Setup a sample hierarchy of datasets to be used
"""
timeout = 3600
# need to assure that we are working in a different repository now
# see https://github.com/datalad/datalad/issues/1512
# might not be sufficient due to side effects between tests and
# thus getting into the same situation
ds_count = 0
# Creating in CWD so things get removed when ASV is done
# https://asv.readthedocs.io/en/stable/writing_benchmarks.html
# that is where it would be run and cleaned up after
dsname = 'testds1'
tarfile = 'testds1.tar'
def setup_cache(self):
ds_path = create_test_dataset(
self.dsname
, spec='2/-2/-2'
, seed=0
)[0]
self.log("Setup cache ds path %s. CWD: %s", ds_path, getpwd())
# Will store into a tarfile since otherwise install -r is way too slow
# to be invoked for every benchmark
# Store full path since apparently setup is not ran in that directory
self.tarfile = op.realpath(SampleSuperDatasetBenchmarks.tarfile)
with tarfile.open(self.tarfile, "w") as tar:
# F.CK -- Python tarfile can't later extract those because key dirs are
# read-only. For now just a workaround - make it all writeable
from datalad.utils import rotree
rotree(self.dsname, ro=False, chmod_files=False)
tar.add(self.dsname, recursive=True)
rmtree(self.dsname)
def setup(self):
self.log("Setup ran in %s, existing paths: %s", getpwd(), glob('*'))
tempdir = tempfile.mkdtemp(
**get_tempfile_kwargs({}, prefix="bm")
)
self.remove_paths.append(tempdir)
with tarfile.open(self.tarfile) as tar:
tar.extractall(tempdir)
# TODO -- remove this abomination after https://github.com/datalad/datalad/issues/1512 is fixed
epath = op.join(tempdir, 'testds1')
epath_unique = epath + str(self.__class__.ds_count)
os.rename(epath, epath_unique)
self.__class__.ds_count += 1
self.ds = Dataset(epath_unique)
self.repo = self.ds.repo
self.log("Finished setup for %s", tempdir)