From e6909400a6d7ffa73e8e6cc4c770340b0dff25a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 00:39:58 +0200 Subject: [PATCH 1/8] Implements new rendring for a profiling --- _unittests/ut_pycode/test_profiling.py | 84 +++++- src/pyquickhelper/pycode/_pylint_common.py | 2 +- src/pyquickhelper/pycode/profiling.py | 284 +++++++++++++++++- src/pyquickhelper/pycode/unittestclass.py | 3 +- .../server/documentation_server.py | 2 +- 5 files changed, 360 insertions(+), 15 deletions(-) diff --git a/_unittests/ut_pycode/test_profiling.py b/_unittests/ut_pycode/test_profiling.py index 58b8311d..bf6f0c94 100644 --- a/_unittests/ut_pycode/test_profiling.py +++ b/_unittests/ut_pycode/test_profiling.py @@ -5,11 +5,14 @@ import os import unittest import warnings +import time +from pstats import SortKey import pandas from pyquickhelper.pycode import ExtTestCase from pyquickhelper.pandashelper import df2rst from pyquickhelper import __file__ as rootfile -from pyquickhelper.pycode.profiling import profile, profile2df +from pyquickhelper.pycode.profiling import ( + profile, profile2df, profile2graph, ProfileNode) class TestProfiling(ExtTestCase): @@ -48,7 +51,7 @@ def simple2(): ps, df = profile(simple, rootrem=rootrem, as_df=True) # pylint: disable=W0632 self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(df.loc[0, 'namefct'].split('-')[-1], 'simple2') + self.assertEqual(df.loc[0, 'namefct'].split('-')[-1], 'simple') self.assertNotEmpty(ps) df = profile2df(ps, False) self.assertIsInstance(df, list) @@ -56,6 +59,38 @@ def simple2(): df = profile2df(ps, True) self.assertIsInstance(df, pandas.DataFrame) + def test_profile_df_verbose(self): + calls = [0] + + def f0(t): + calls[0] += 1 + time.sleep(t) + + def f1(t): + calls[0] += 1 + time.sleep(t) + + def f2(): + calls[0] += 1 + f1(0.1) + f1(0.01) + + def f3(): + calls[0] += 1 + f0(0.2) + f1(0.5) + + def f4(): + calls[0] += 1 + f2() + f3() + + ps = profile(f4)[0] # pylint: disable=W0632 + df = self.capture(lambda: profile2df(ps, verbose=True))[0] + dfi = df.set_index('fct') + self.assertEqual(dfi.loc['f4', 'ncalls1'], 1) + self.assertEqual(dfi.loc['f4', 'ncalls2'], 1) + def test_profile_pyinst(self): def simple(): df = pandas.DataFrame([{"A": "x", "AA": "xx", "AAA": "xxx"}, @@ -83,6 +118,51 @@ def simple(): self.assertIn('"start_time"', res) self.assertNotEmpty(ps) + def test_profile_graph(self): + calls = [0] + + def f0(t): + calls[0] += 1 + time.sleep(t) + + def f1(t): + calls[0] += 1 + time.sleep(t) + + def f2(): + calls[0] += 1 + f1(0.1) + f1(0.01) + + def f3(): + calls[0] += 1 + f0(0.2) + f1(0.5) + + def f4(): + calls[0] += 1 + f2() + f3() + + ps = profile(f4)[0] # pylint: disable=W0632 + profile2df(ps, verbose=False, clean_text=lambda x: x.split('/')[-1]) + root, nodes = profile2graph(ps, clean_text=lambda x: x.split('/')[-1]) + self.assertEqual(len(nodes), 6) + self.assertIsInstance(nodes, dict) + self.assertIsInstance(root, ProfileNode) + self.assertIn("(", str(root)) + dicts = root.as_dict() + self.assertEqual(10, len(dicts)) + text = root.to_text() + self.assertIn("1 1", text) + self.assertIn(' f1', text) + text = root.to_text(fct_width=20) + self.assertIn('...', text) + root.to_text(sort_key=SortKey.CUMULATIVE) + root.to_text(sort_key=SortKey.TIME) + self.assertRaise(lambda: root.to_text(sort_key=SortKey.NAME), + NotImplementedError) + if __name__ == "__main__": unittest.main() diff --git a/src/pyquickhelper/pycode/_pylint_common.py b/src/pyquickhelper/pycode/_pylint_common.py index e8165f34..949afc5d 100644 --- a/src/pyquickhelper/pycode/_pylint_common.py +++ b/src/pyquickhelper/pycode/_pylint_common.py @@ -30,7 +30,7 @@ def _private_test_style_src(fLOG, run_lint, verbose=False, pattern=".*[.]py$"): 'W0143', 'W0107', 'C0415', 'W1202', 'W0707', 'R1725', 'R1732', 'W1514', 'C0123', 'C0208', 'W1514', 'R1735', - 'R1734', 'C0209'), + 'R1734', 'C0209', 'W0602'), skip=["windows_scripts.py", "Redefining built-in 'format'", "bokeh_plot.py", diff --git a/src/pyquickhelper/pycode/profiling.py b/src/pyquickhelper/pycode/profiling.py index db916863..d0f37b92 100644 --- a/src/pyquickhelper/pycode/profiling.py +++ b/src/pyquickhelper/pycode/profiling.py @@ -5,23 +5,218 @@ import os from io import StringIO import cProfile -import pstats +from pstats import SortKey, Stats import site +from collections import deque -def _process_pstats(ps, clean_text): +class ProfileNode: + """ + Graph structure to represent a profiling. + + :param filename: filename + :param line: line number + :param func_name: function name + :param nc1: number of calls 1 + :param nc2: number of calls 2 + :param tin: time spent in the function + :param tout: time spent in the function and in the sub functions + + .. versionadded:: 1.11 + """ + + def __init__(self, filename, line, func_name, nc1, nc2, tin, tall): + if "method 'disable' of '_lsprof.Profiler'" in func_name: + raise RuntimeError( + "Function not allowed in the profiling: %r." % func_name) + self.filename = filename + self.line = line + self.func_name = func_name + self.nc1 = nc1 + self.nc2 = nc2 + self.tin = tin + self.tall = tall + self.called_by = [] + self.calls_to = [] + self.calls_to_elements = [] + + def add_called_by(self, pnode): + "This function is called by these lines." + self.called_by.append(pnode) + + def add_calls_to(self, pnode, time_elements): + "This function calls this node." + self.calls_to.append(pnode) + self.calls_to_elements.append(time_elements) + + @staticmethod + def _key(filename, line, fct): + key = "%s:%d" % (filename, line) + if key == "~:0": + key += ":%s" % fct + return key + + @property + def key(self): + "Returns `file:line`." + return ProfileNode._key(self.filename, self.line, + self.func_name) + + def get_root(self): + "Returns the root of the graph." + node = self + while len(node.called_by) > 0: + node = node.called_by[0].get_root() + return node + + def __repr__(self): + "usual" + return "%s(%r, %r, %r, %r, %r, %r, %r) # %d-%d" % ( + self.__class__.__name__, + self.filename, self.line, self.func_name, + self.nc1, self.nc2, self.tin, self.tall, + len(self.called_by), len(self.calls_to)) + + def __iter__(self): + "Returns all nodes in the graph." + done = set() + stack = deque() + stack.append(self) + while len(stack) > 0: + node = stack.popleft() + if node.key in done: + continue + yield node + done.add(node.key) + stack.extend(node.calls_to) + + def as_dict(self, filter_node=None, sort_key=SortKey.LINE): + """ + Renders the results of a profiling interpreted with + function @fn profile2graph. It can then be loaded with + a dataframe. + + :param filter_node: display only the nodes for which + this function returns True + :param sort_key: sort sub nodes by... + :return: rows + """ + def sort_key_line(dr): + return dr[0].line + + def sort_key_tin(dr): + return -dr[1][2] + + def sort_key_tall(dr): + return -dr[1][3] + + if sort_key == SortKey.LINE: + sortk = sort_key_line + elif sort_key == SortKey.CUMULATIVE: + sortk = sort_key_tall + elif sort_key == SortKey.TIME: + sortk = sort_key_tin + else: + raise NotImplementedError( + "Unable to sort subcalls with this key %r." % sort_key) + + SortKey.CUMULATIVE + + def depth_first(node, roots_keys, indent=0): + text = {'fct': node.func_name, 'where': node.key, + 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin, + 'tall': node.tall, 'indent': indent, + 'ncalls': len(node.calls_to), 'debug': 'A'} + yield text + for n, nel in sorted(zip(node.calls_to, + node.calls_to_elements), + key=sortk): + if filter_node is not None and not filter_node(n): + continue + if n.key in roots_keys: + text = {'fct': n.func_name, 'where': n.key, + 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2], + 'tall': nel[3], 'indent': indent + 1, + 'ncalls': len(n.calls_to), 'more': '+', + 'debug': 'B'} + yield text + else: + for t in depth_first(n, roots_keys, indent + 1): + yield t + + nodes = list(self) + roots = [node for node in nodes if len(node.called_by) != 1] + roots_key = {r.key: r for r in roots} + rows = [] + for root in roots: + if filter_node is not None and not filter_node(root): + continue + rows.extend(depth_first(root, roots_key)) + return rows + + def to_text(self, filter_node=None, sort_key=SortKey.LINE, + fct_width=50): + """ + Prints the profiling to text. + + :param filter_node: display only the nodes for which + this function returns True + :param sort_key: sort sub nodes by... + :return: rows + """ + def align_text(text, size): + if size <= 0: + return text + if len(text) <= size: + return text + " " * (size - len(text)) + h = size // 2 - 1 + return text[:h] + "..." + text[-h + 1:] + + dicts = self.as_dict(filter_node=filter_node, sort_key=sort_key) + text = [] + for row in dicts: + line = ("{indent}{fct} -- {nc1: 3d} {nc2: 3d} -- {tin:1.4f} {tall:1.4f}" + " -- {name} ({fct2})").format( + indent=" " * (row['indent'] * 4), + fct=align_text(row['fct'], fct_width - row['indent'] * 4), + nc1=row['nc1'], nc2=row['nc2'], tin=row['tin'], + tall=row['tall'], name=row['where'], + fct2=row['fct']) + if row.get('more', '') == '+': + line += " +++" + if row['fct'] == 'f0': + raise AssertionError("Unexpected %r." % row) + text.append(line) + return "\n".join(text) + + +def _process_pstats(ps, clean_text=None, verbose=False): """ Converts class `Stats `_ into something readable for a dataframe. + + :param ps: instance of type :func:`pstats.Stats` + :param clean_text: function to clean function names + :param verbose: change verbosity + :return: list of rows """ + if clean_text is None: + clean_text = lambda x: x + def add_rows(rows, d): tt1, tt2 = 0, 0 for k, v in d.items(): stin = 0 stall = 0 + if verbose: # pragma: no cover + print("[pstats] %s=%r" % ( + (clean_text(k[0].replace("\\", "/")), ) + k[1:], + v)) + if len(v) < 5: + continue row = { - 'file': "%s:%d" % (clean_text(k[0]), k[1]), + 'file': "%s:%d" % (clean_text(k[0].replace("\\", "/")), k[1]), 'fct': k[2], 'ncalls1': v[0], 'ncalls2': v[1], @@ -46,7 +241,7 @@ def add_rows(rows, d): return rows -def profile2df(ps, as_df=True): +def profile2df(ps, as_df=True, clean_text=None, verbose=False): """ Converts profiling statistics into a Dataframe. @@ -54,6 +249,8 @@ def profile2df(ps, as_df=True): `_ :param as_df: returns the results as a dataframe (True) or a list of dictionaries (False) + :param clean_text: function to clean function names + :param verbose: verbosity :return: a DataFrame :: @@ -61,12 +258,11 @@ def profile2df(ps, as_df=True): import pstats from pyquickhelper.pycode.profiling import profile2df - ps = pstats.Stats('c:/temp/bench_ortmodule_nn_gpu6') - print(ps.strip_dirs().sort_stats(SortKey.TIME).print_stats()) + ps = pstats.Stats('bench_ortmodule_nn_gpu6.prof') df = profile2df(pd) print(df) """ - rows = _process_pstats(ps, lambda x: x) + rows = _process_pstats(ps, clean_text, verbose=verbose) if not as_df: return rows @@ -124,7 +320,7 @@ def fctm(): fct_res = fct() pr.disable() s = StringIO() - ps = pstats.Stats(pr, stream=s).sort_stats(sort) + ps = Stats(pr, stream=s).sort_stats(sort) ps.print_stats() res = s.getvalue() try: @@ -150,7 +346,8 @@ def clean_text(res): res = res.replace(sub[0], sub[1]) else: raise TypeError( - "rootrem must contains strings or tuple not {0}".format(rootrem)) + "rootrem must contains strings or tuple not {0}" + ".".format(rootrem)) return res if as_df: @@ -212,3 +409,72 @@ def better_name(row): return fct_res, profiler, profiler.output_html() return profiler, profiler.output_html() raise ValueError("Unknown format '{}'.".format(pyinst_format)) + + +def profile2graph(ps, clean_text=None, verbose=False): + """ + Converts profiling statistics into a graphs. + + :param ps: an instance of `pstats + `_ + :param clean_text: function to clean function names + :param verbose: verbosity + :return: a DataFrame + + :: + + import pstats + from pyquickhelper.pycode.profiling import profile2graph + + ps = pstats.Stats('bench_ortmodule_nn_gpu6.prof') + gr = profile2graph(pd) + print(df) + + .. versionadded:: 1.11 + """ + if clean_text is None: + clean_text = lambda x: x + + nodes = {} + for k, v in ps.stats.items(): + if verbose: + print("[pstats] %s=%r" % (k, v)) + if len(v) < 5: + continue + if k[0] == '~' and len(v) == 0: + # raw function never called by another + continue + if "method 'disable' of '_lsprof.Profiler'" in k[2]: + continue + node = ProfileNode( + filename=clean_text(k[0].replace("\\", "/")), + line=k[1], func_name=k[2], + nc1=v[0], nc2=v[1], tin=v[2], tall=v[3]) + nodes[node.key] = node + + for k, v in ps.stats.items(): + if "method 'disable' of '_lsprof.Profiler'" in k[2]: + continue + filename = clean_text(k[0].replace("\\", "/")) + ks = ProfileNode._key(filename, k[1], k[2]) + node = nodes[ks] + sublist = v[4] + for f, vv in sublist.items(): + if "method 'disable' of '_lsprof.Profiler'" in f[2]: + continue + name = clean_text(f[0].replace("\\", "/")) + key = ProfileNode._key(name, f[1], f[2]) + if key not in nodes: + raise RuntimeError( + "Unable to find key %r into\n%s" % ( + key, "\n".join(sorted(nodes)))) + if k[0] == '~' and len(v) == 0: + continue + child = nodes[key] + node.add_called_by(child) + child.add_calls_to(node, vv) + + for k, v in nodes.items(): + root = v.get_root() + break + return root, nodes diff --git a/src/pyquickhelper/pycode/unittestclass.py b/src/pyquickhelper/pycode/unittestclass.py index 19428ffc..05fa0ad1 100644 --- a/src/pyquickhelper/pycode/unittestclass.py +++ b/src/pyquickhelper/pycode/unittestclass.py @@ -427,8 +427,7 @@ def profile(fct, sort='cumulative', rootrem=None, :func:`profile `. :param fct: function to profile - :param sort: see `sort_stats < - https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats>`_ + :param sort: see :meth:`pstats.Stats.sort_stats` :param rootrem: root to remove in filenames :param return_results: return the results as well :return: statistics text dump diff --git a/src/pyquickhelper/server/documentation_server.py b/src/pyquickhelper/server/documentation_server.py index 3072ae8c..dc87e84c 100644 --- a/src/pyquickhelper/server/documentation_server.py +++ b/src/pyquickhelper/server/documentation_server.py @@ -352,7 +352,7 @@ def serve_content(self, cpath, method="GET"): @param cpath ParseResult @param method GET or POST """ - if cpath.path == "" or cpath.path == "/": # pragma: no cover + if cpath.path in ("", "/"): # pragma: no cover params = parse_qs(cpath.query) self.serve_main_page() else: From 14182e25e4fc45eb14f36f13473c8f2c51365fd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 02:29:46 +0200 Subject: [PATCH 2/8] documentation --- _doc/sphinxdoc/source/index.rst | 2 +- src/pyquickhelper/pycode/profiling.py | 48 +++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/_doc/sphinxdoc/source/index.rst b/_doc/sphinxdoc/source/index.rst index 844c4a9b..6c4bf4fe 100644 --- a/_doc/sphinxdoc/source/index.rst +++ b/_doc/sphinxdoc/source/index.rst @@ -7,7 +7,7 @@ pyquickhelper: various automations in python :alt: Build Status Linux .. image:: https://ci.appveyor.com/api/projects/status/t2g9olcgqgdvqq3l?svg=true - :target: https://app.travis-ci.com/github/sdpython/pyquickhelper/ + :target: https://ci.appveyor.com/project/sdpython/pyquickhelper :alt: Build Status Windows .. image:: https://circleci.com/gh/sdpython/pyquickhelper/tree/master.svg?style=svg diff --git a/src/pyquickhelper/pycode/profiling.py b/src/pyquickhelper/pycode/profiling.py index d0f37b92..24f0fbbe 100644 --- a/src/pyquickhelper/pycode/profiling.py +++ b/src/pyquickhelper/pycode/profiling.py @@ -419,16 +419,50 @@ def profile2graph(ps, clean_text=None, verbose=False): `_ :param clean_text: function to clean function names :param verbose: verbosity - :return: a DataFrame + :return: an instance of class @see cl ProfileNode - :: + .. exref:: + :title: Hierarchical display for a profiling - import pstats - from pyquickhelper.pycode.profiling import profile2graph + :epkg:`pyinstrument` has a nice display to show + time spent and call stack at the same time. This function + tries to replicate that display based on the results produced + by module :mod:`cProfile`. Here is an example. - ps = pstats.Stats('bench_ortmodule_nn_gpu6.prof') - gr = profile2graph(pd) - print(df) + .. runpython:: + :showcode: + + import time + from pyquickhelper.pycode.profiling import profile, profile2graph + + + def fct0(t): + time.sleep(t) + + + def fct1(t): + time.sleep(t) + + + def fct2(): + fct1(0.1) + fct1(0.01) + + + def fct3(): + fct0(0.2) + fct1(0.5) + + + def fct4(): + fct2() + fct3() + + + ps = profile(fct4)[0] + root, nodes = profile2graph(ps, clean_text=lambda x: x.split('/')[-1]) + text = root.to_text() + print(text) .. versionadded:: 1.11 """ From d34f96158c1cb4666292389ba4d000807484bbcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 10:26:50 +0200 Subject: [PATCH 3/8] fix issue with latest pip --- _unittests/ut_pycode/test_pip_helper.py | 39 +++++++------ _unittests/ut_pycode/test_pip_helper2.py | 50 ---------------- src/pyquickhelper/pycode/pip_helper.py | 73 +++++++++++++++++++++++- 3 files changed, 94 insertions(+), 68 deletions(-) delete mode 100644 _unittests/ut_pycode/test_pip_helper2.py diff --git a/_unittests/ut_pycode/test_pip_helper.py b/_unittests/ut_pycode/test_pip_helper.py index 79befbdb..959445bc 100644 --- a/_unittests/ut_pycode/test_pip_helper.py +++ b/_unittests/ut_pycode/test_pip_helper.py @@ -1,37 +1,23 @@ """ @brief test tree node (time=2s) """ - -import sys -import os import unittest import pandas - -from pyquickhelper.loghelper import fLOG -from pyquickhelper.pycode.pip_helper import get_packages_list, package2dict from pyquickhelper.pycode import ExtTestCase -from pyquickhelper.pycode.pip_helper import PQPipError +from pyquickhelper.pycode.pip_helper import ( + get_packages_list, package2dict, get_package_info, + PQPipError) class TestPipHelper(ExtTestCase): def test_exc(self): - fLOG( - __file__, - self._testMethodName, - OutputPrint=__name__ == "__main__") - exc = PQPipError('cmd', 'out', 'err') msg = str(exc) self.assertEqual([msg.replace('\n', '')], [ 'CMD:cmdOUT:out[piperror]err']) def test_pip_list(self): - fLOG( - __file__, - self._testMethodName, - OutputPrint=__name__ == "__main__") - li = get_packages_list() dt = package2dict(li[0]) avoid = {'py_version'} @@ -43,6 +29,25 @@ def test_pip_list(self): self.assertEmpty(empty) self.assertNotEmpty(li) + def test_pip_show(self): + info = get_package_info("pandas") + if "version" not in str(info): + raise AssertionError(str(info)) + + info = get_package_info("sphinx") + if "version" not in str(info): + raise Exception(str(info)) + + def test_pip_show_all(self): + info = get_package_info(start=0, end=2) + df = pandas.DataFrame(info) + self.assertNotEmpty(info) + + if __name__ == "__main__": + info = get_package_info() + df = pandas.DataFrame(info) + df.to_excel("out_packages.xlsx") + if __name__ == "__main__": unittest.main() diff --git a/_unittests/ut_pycode/test_pip_helper2.py b/_unittests/ut_pycode/test_pip_helper2.py deleted file mode 100644 index 541e48b4..00000000 --- a/_unittests/ut_pycode/test_pip_helper2.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -@brief test tree node (time=2s) -""" - -import sys -import os -import unittest -import pandas - -from pyquickhelper.loghelper import fLOG -from pyquickhelper.pycode.pip_helper import get_package_info -from pyquickhelper.pycode import ExtTestCase - - -class TestPipHelper2(ExtTestCase): - - def test_pip_show(self): - fLOG( - __file__, - self._testMethodName, - OutputPrint=__name__ == "__main__") - - info = get_package_info("pandas") - # if "license" not in info: - # raise Exception(str(info)) - if "version" not in str(info): - raise Exception(str(info)) - - info = get_package_info("sphinx") - if "version" not in str(info): - raise Exception(str(info)) - - def test_pip_show_all(self): - fLOG( - __file__, - self._testMethodName, - OutputPrint=__name__ == "__main__") - - info = get_package_info(start=0, end=2) - df = pandas.DataFrame(info) - self.assertNotEmpty(info) - - if __name__ == "__mahin__": - info = get_package_info() - df = pandas.DataFrame(info) - df.to_excel("out_packages.xlsx") - - -if __name__ == "__main__": - unittest.main() diff --git a/src/pyquickhelper/pycode/pip_helper.py b/src/pyquickhelper/pycode/pip_helper.py index 03db127f..e21968f1 100644 --- a/src/pyquickhelper/pycode/pip_helper.py +++ b/src/pyquickhelper/pycode/pip_helper.py @@ -26,11 +26,82 @@ def __init__(self, *args): Exception.__init__(self, mes) +class Distribution: + """ + Common interface for old and recent pip packages. + + .. versionadded:: 1.5 + """ + + def __init__(self, dist): + self.dist = dist + + def __getattr__(self, attr): + if attr == 'key': + if hasattr(self.__dict__['dist'], 'key'): + return self.__dict__['dist'].key + return self.__dict__['dist'].canonical_name + if attr == 'dist': + return self.__dict__['dist'] + if attr in {'_get_metadata', 'requires', 'PKG_INFO', 'project_name', + 'py_version', 'platform', 'extras'}: + if hasattr(self.__dict__['dist'], attr): + return getattr(self.__dict__['dist'], attr) + return getattr(self.__dict__['dist']._dist, attr) + return getattr(self.__dict__['dist'], attr) + + +def get_installed_distributions(local_only=True, skip=None, + include_editables=True, editables_only=False, + user_only=False, use_cmd=False): + """ + Directs call to function *get_installed_distributions* from :epkg:`pip`. + + Return a list of installed Distribution objects. + + :param local_only: if True (default), only return installations + local to the current virtualenv, if in a virtualenv. + :param skip: argument is an iterable of lower-case project names to + ignore; defaults to ``pip.compat.stdlib_pkgs`` (if *skip* is None) + :param editables: if False, don't report editables. + :param editables_only: if True , only report editables. + :param user_only: if True , only report installations in the user + site directory. + :param use_cmd: if True, use a different process (updated package list) + :return: list of installed Distribution objects. + + .. versionadded:: 1.5 + """ + if use_cmd: + raise NotImplementedError("use_cmd should be False") + if skip is None: + try: + from pip._internal.utils.compat import stdlib_pkgs + skip = stdlib_pkgs + except ImportError: + pass + try: + from pip._internal.metadata import get_default_environment + return list(map(Distribution, + get_default_environment().iter_installed_distributions( + local_only=local_only, skip=skip, + include_editables=include_editables, + editables_only=editables_only, + user_only=user_only))) + + except ImportError: + from pip._internal.utils.misc import get_installed_distributions as getd + return list(map(Distribution, getd( + local_only=local_only, skip=skip, + include_editables=include_editables, + editables_only=editables_only, + user_only=user_only, use_cmd=use_cmd))) + + def get_packages_list(): """ calls ``pip list`` to retrieve the list of packages """ - from pip._internal.utils.misc import get_installed_distributions return get_installed_distributions(local_only=True) From a3f65004b83ab43597e7b02145181edc78e76b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 11:26:58 +0200 Subject: [PATCH 4/8] add cli --- _unittests/ut_cli/test_cli_profile.py | 120 ++++++++++++++++++++++++- _unittests/ut_pycode/test_profiling.py | 3 +- src/pyquickhelper/__main__.py | 4 +- src/pyquickhelper/cli/profile_cli.py | 111 +++++++++++++++++++++++ src/pyquickhelper/pycode/profiling.py | 19 ++-- 5 files changed, 243 insertions(+), 14 deletions(-) create mode 100644 src/pyquickhelper/cli/profile_cli.py diff --git a/_unittests/ut_cli/test_cli_profile.py b/_unittests/ut_cli/test_cli_profile.py index f416a527..37878655 100644 --- a/_unittests/ut_cli/test_cli_profile.py +++ b/_unittests/ut_cli/test_cli_profile.py @@ -1,15 +1,13 @@ """ @brief test tree node (time=7s) """ - - import os import unittest -from io import StringIO - +import time from pyquickhelper.loghelper import fLOG, BufferedPrint from pyquickhelper.pycode import ExtTestCase from pyquickhelper.__main__ import main +from pyquickhelper.pycode.profiling import profile def to_profile(args): @@ -25,6 +23,120 @@ def test_profile(self): prof = self.profile(lambda: to_profile(["clean_files", "--help"]))[1] self.assertNotIn("bokeh", prof.lower()) + def test_profile_stat_help(self): + st = BufferedPrint() + main(args=['profile_stat', '--help'], fLOG=st.fprint) + res = str(st) + self.assertIn("usage: profile_stat", res) + + def test_profile_stat(self): + + def f0(t): + time.sleep(t) + + def f1(t): + time.sleep(t) + + def f2(): + f1(0.1) + f1(0.01) + + def f3(): + f0(0.2) + f1(0.5) + + def f4(): + f2() + f3() + + ps = profile(f4)[0] # pylint: disable=W0632 + ps.dump_stats("temp_stat.prof") + + with self.subTest(calls=False, output=None): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_stat.prof", + '--calls', '0'], fLOG=st.fprint) + self.assertIn('percall', str(st)) + + with self.subTest(calls=False, output="txt"): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_stat.prof", + '--calls', '0', '-o', 'temp_output.txt'], fLOG=st.fprint) + with open("temp_output.txt", "r", encoding='utf-8') as f: + content = f.read() + self.assertIn('percall', str(st)) + self.assertIn('percall', content) + + with self.subTest(calls=False, output='csv'): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_stat.prof", + '--calls', '0', '-o', 'temp_output.csv'], fLOG=st.fprint) + with open("temp_output.csv", "r", encoding='utf-8') as f: + content = f.read() + self.assertIn('percall', str(st)) + self.assertIn('ncalls1,', content) + + with self.subTest(calls=False, output='xlsx'): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_stat.prof", + '--calls', '0', '-o', 'temp_output.xlsx'], fLOG=st.fprint) + self.assertExists('temp_output.xlsx') + self.assertIn('percall', str(st)) + + def test_profile_stat_gr(self): + + def f0(t): + time.sleep(t) + + def f1(t): + time.sleep(t) + + def f2(): + f1(0.1) + f1(0.01) + + def f3(): + f0(0.2) + f1(0.5) + + def f4(): + f2() + f3() + + ps = profile(f4)[0] # pylint: disable=W0632 + ps.dump_stats("temp_gr_stat.prof") + + with self.subTest(calls=False, output=None): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_gr_stat.prof", + '--calls', '1'], fLOG=st.fprint) + self.assertIn('+++', str(st)) + + with self.subTest(calls=False, output="txt"): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_gr_stat.prof", + '--calls', '1', '-o', 'temp_gr_output.txt'], fLOG=st.fprint) + with open("temp_gr_output.txt", "r", encoding='utf-8') as f: + content = f.read() + self.assertIn('+++', str(st)) + self.assertIn('+++', content) + + with self.subTest(calls=False, output='csv'): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_gr_stat.prof", + '--calls', '1', '-o', 'temp_gr_output.csv'], fLOG=st.fprint) + with open("temp_gr_output.csv", "r", encoding='utf-8') as f: + content = f.read() + self.assertIn('+++', str(st)) + self.assertIn(',+', content) + + with self.subTest(calls=False, output='xlsx'): + st = BufferedPrint() + main(args=['profile_stat', '-f', "temp_gr_stat.prof", + '--calls', '1', '-o', 'temp_gr_output.xlsx'], fLOG=st.fprint) + self.assertIn('+++', str(st)) + self.assertExists('temp_gr_output.xlsx') + if __name__ == "__main__": unittest.main() diff --git a/_unittests/ut_pycode/test_profiling.py b/_unittests/ut_pycode/test_profiling.py index bf6f0c94..94b18f17 100644 --- a/_unittests/ut_pycode/test_profiling.py +++ b/_unittests/ut_pycode/test_profiling.py @@ -86,7 +86,8 @@ def f4(): f3() ps = profile(f4)[0] # pylint: disable=W0632 - df = self.capture(lambda: profile2df(ps, verbose=True))[0] + df = self.capture( + lambda: profile2df(ps, verbose=True, fLOG=print))[0] dfi = df.set_index('fct') self.assertEqual(dfi.loc['f4', 'ncalls1'], 1) self.assertEqual(dfi.loc['f4', 'ncalls2'], 1) diff --git a/src/pyquickhelper/__main__.py b/src/pyquickhelper/__main__.py index f9d9dc61..5685b38d 100644 --- a/src/pyquickhelper/__main__.py +++ b/src/pyquickhelper/__main__.py @@ -30,6 +30,7 @@ def main(args, fLOG=print): from .loghelper import set_password from .filehelper.download_urls_helper import download_urls_in_folder_content from .cli.uvicorn_cli import uvicorn_app + from .cli.profile_cli import profile_stat except ImportError: # pragma: no cover from pyquickhelper.cli.pyq_sync_cli import pyq_sync from pyquickhelper.cli.encryption_file_cli import encrypt_file, decrypt_file @@ -47,6 +48,7 @@ def main(args, fLOG=print): from pyquickhelper.loghelper import set_password from pyquickhelper.filehelper.download_urls_helper import download_urls_in_folder_content from pyquickhelper.cli.uvicorn_cli import uvicorn_app + from pyquickhelper.cli.profile_cli import profile_stat fcts = dict(synchronize_folder=pyq_sync, encrypt_file=encrypt_file, decrypt_file=decrypt_file, encrypt=encrypt, @@ -59,7 +61,7 @@ def main(args, fLOG=print): repeat_script=repeat_script, ftp_upload=ftp_upload, set_password=set_password, download_urls_in_folder_content=download_urls_in_folder_content, - uvicorn_app=uvicorn_app) + uvicorn_app=uvicorn_app, profile_stat=profile_stat) return cli_main_helper(fcts, args=args, fLOG=fLOG) diff --git a/src/pyquickhelper/cli/profile_cli.py b/src/pyquickhelper/cli/profile_cli.py new file mode 100644 index 00000000..05937ede --- /dev/null +++ b/src/pyquickhelper/cli/profile_cli.py @@ -0,0 +1,111 @@ +""" +@file +@brief Command line about transfering files. +""" +import os +import io +from pstats import Stats, SortKey + + +def profile_stat(file_stat, output=None, calls=True, verbose=False, + clean_prefixes="", sort_key='line', + fct_width=50, fLOG=print): + """ + Analyses the output of a profiling measured by module + :mod:`cProfile`. + + :param file_stat: filename, profiling statistics + :param output: output file, the extension determines the format, + `.txt` for a text output, `.csv` for a comma separated value, + `.xlsx` for excel output + :param calls: flat output (False) or hierchical output (True), + the hierarchical output shows the call stack + :param clean_prefixes: prefixes to clean from the output, + separated by `;` + :param sort_key: `line` or `cumulative` or `time` (if calls is True) + :param fct_width: number of character dedicatedd to the function name + (if calls is True) + :param verbose: more verbosity + :param fLOG: logging function + :return: status + + .. cmdref:: + :title: Analyses profiling results produced by module cProfile + :cmd: -m pyquickhelper profile_stat --help + + The command line produces a flat output like method `print_stats` + or a hierchical output showing function calls. + """ + from ..pycode.profiling import profile2graph, profile2df + prefixes = '' if clean_prefixes is None else clean_prefixes.split(';') + verbose = verbose in (True, 'True', '1', 1) + calls = calls in (True, 'True', '1', 1) + if sort_key == 'line': + sort_key = SortKey.LINE + elif sort_key == 'cumulative': + sort_key = SortKey.CUMULATIVE + elif sort_key == 'line': + sort_key = SortKey.TIME + else: + raise ValueError( + "Unexpected value for sort_key=%r." % sort_key) + + def clean_text(text): + for pref in prefixes: + text = text.replace(pref, '') + return text + + if calls: + stats = Stats(file_stat) + fct_width = int(fct_width) + gr = profile2graph(stats, clean_text=clean_text, + verbose=verbose) + if output is None: + fLOG(gr[0].to_text(fct_width=fct_width, sort_key=sort_key)) + res = None + else: + ext = os.path.splitext(output)[-1] + text = gr[0].to_text(fct_width=fct_width, sort_key=sort_key) + if ext == '.txt': + with open(output, 'w', encoding='utf-8') as f: + f.write(text) + res = text + elif ext in {'.csv', '.xlsx'}: + import pandas + dicts = gr[0].as_dict(sort_key=sort_key) + df = pandas.DataFrame(dicts) + if ext == '.csv': + df.to_csv(output, index=False) + else: + df.to_excel(output, index=False) + res = text + else: + raise ValueError( + "Unexpected file extension %r." % output) + else: + st = io.StringIO() + stats = Stats(file_stat, stream=st) + stats.strip_dirs().sort_stats(sort_key).print_stats() + text = st.getvalue() + if output is None: + fLOG(text) + res = None + else: + df = profile2df(stats, clean_text=clean_text, verbose=verbose, + fLOG=print) + ext = os.path.splitext(output)[-1] + if ext == '.txt': + with open(output, 'w', encoding='utf-8') as f: + f.write(text) + res = text + elif ext in {'.csv', '.xlsx'}: + df = profile2df(stats, clean_text=clean_text, as_df=True) + if ext == '.csv': + df.to_csv(output, index=False) + else: + df.to_excel(output, index=False) + res = text + else: + raise ValueError( + "Unexpected file extension %r." % output) + return res diff --git a/src/pyquickhelper/pycode/profiling.py b/src/pyquickhelper/pycode/profiling.py index 24f0fbbe..d70c9857 100644 --- a/src/pyquickhelper/pycode/profiling.py +++ b/src/pyquickhelper/pycode/profiling.py @@ -190,7 +190,7 @@ def align_text(text, size): return "\n".join(text) -def _process_pstats(ps, clean_text=None, verbose=False): +def _process_pstats(ps, clean_text=None, verbose=False, fLOG=None): """ Converts class `Stats `_ into something @@ -199,6 +199,7 @@ def _process_pstats(ps, clean_text=None, verbose=False): :param ps: instance of type :func:`pstats.Stats` :param clean_text: function to clean function names :param verbose: change verbosity + :param fLOG: logging function :return: list of rows """ if clean_text is None: @@ -209,8 +210,8 @@ def add_rows(rows, d): for k, v in d.items(): stin = 0 stall = 0 - if verbose: # pragma: no cover - print("[pstats] %s=%r" % ( + if verbose and fLOG is not None: + fLOG("[pstats] %s=%r" % ( (clean_text(k[0].replace("\\", "/")), ) + k[1:], v)) if len(v) < 5: @@ -241,7 +242,7 @@ def add_rows(rows, d): return rows -def profile2df(ps, as_df=True, clean_text=None, verbose=False): +def profile2df(ps, as_df=True, clean_text=None, verbose=False, fLOG=None): """ Converts profiling statistics into a Dataframe. @@ -251,6 +252,7 @@ def profile2df(ps, as_df=True, clean_text=None, verbose=False): or a list of dictionaries (False) :param clean_text: function to clean function names :param verbose: verbosity + :param fLOG: logging function :return: a DataFrame :: @@ -262,7 +264,7 @@ def profile2df(ps, as_df=True, clean_text=None, verbose=False): df = profile2df(pd) print(df) """ - rows = _process_pstats(ps, clean_text, verbose=verbose) + rows = _process_pstats(ps, clean_text, verbose=verbose, fLOG=fLOG) if not as_df: return rows @@ -411,7 +413,7 @@ def better_name(row): raise ValueError("Unknown format '{}'.".format(pyinst_format)) -def profile2graph(ps, clean_text=None, verbose=False): +def profile2graph(ps, clean_text=None, verbose=False, fLOG=None): """ Converts profiling statistics into a graphs. @@ -419,6 +421,7 @@ def profile2graph(ps, clean_text=None, verbose=False): `_ :param clean_text: function to clean function names :param verbose: verbosity + :param fLOG: logging function :return: an instance of class @see cl ProfileNode .. exref:: @@ -471,8 +474,8 @@ def fct4(): nodes = {} for k, v in ps.stats.items(): - if verbose: - print("[pstats] %s=%r" % (k, v)) + if verbose and fLOG is not None: + fLOG("[pstats] %s=%r" % (k, v)) if len(v) < 5: continue if k[0] == '~' and len(v) == 0: From f89f8019fea8e19a91843eb4f644c795a31167f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 11:44:20 +0200 Subject: [PATCH 5/8] lint --- src/pyquickhelper/pycode/profiling.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pyquickhelper/pycode/profiling.py b/src/pyquickhelper/pycode/profiling.py index d70c9857..49f71a77 100644 --- a/src/pyquickhelper/pycode/profiling.py +++ b/src/pyquickhelper/pycode/profiling.py @@ -120,8 +120,6 @@ def sort_key_tall(dr): raise NotImplementedError( "Unable to sort subcalls with this key %r." % sort_key) - SortKey.CUMULATIVE - def depth_first(node, roots_keys, indent=0): text = {'fct': node.func_name, 'where': node.key, 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin, From 186d776d8a80b8148829a347948e0499fd63584f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 16:45:48 +0200 Subject: [PATCH 6/8] add json rendering --- _doc/notebooks/profiling.ipynb | 687 +++++++++++++++++++++++++ _unittests/ut_pycode/test_profiling.py | 52 +- src/pyquickhelper/pycode/profiling.py | 190 ++++++- 3 files changed, 908 insertions(+), 21 deletions(-) create mode 100644 _doc/notebooks/profiling.ipynb diff --git a/_doc/notebooks/profiling.ipynb b/_doc/notebooks/profiling.ipynb new file mode 100644 index 00000000..c2fa8c46 --- /dev/null +++ b/_doc/notebooks/profiling.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "786b1b5c", + "metadata": {}, + "source": [ + "# Hierchical profiling\n", + "\n", + "[pyinstrument](https://github.com/joerick/pyinstrument) has a nice rendering when it comes to display the profiling of a function in a notebook. Let's see how to get the same with this library." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2dca537b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
run previous cell, wait for 2 seconds
\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from jyquickhelper import add_notebook_menu\n", + "add_notebook_menu()" + ] + }, + { + "cell_type": "markdown", + "id": "16c0b3cf", + "metadata": {}, + "source": [ + "## The code to profile" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "33d6b822", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "def f0(t):\n", + " time.sleep(t)\n", + "\n", + "def f1(t):\n", + " time.sleep(t)\n", + "\n", + "def f2():\n", + " f1(0.1)\n", + " f1(0.01)\n", + "\n", + "def f3():\n", + " f0(0.2)\n", + " f1(0.5)\n", + "\n", + "def f4():\n", + " f2()\n", + " f3()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f23ed5bd", + "metadata": {}, + "outputs": [], + "source": [ + "from pyquickhelper.pycode.profiling import profile\n", + "stat, text = profile(f4)" + ] + }, + { + "cell_type": "markdown", + "id": "475a18ab", + "metadata": {}, + "source": [ + "This is the default rendering for module [cProfile](https://docs.python.org/3/library/profile.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "45870d14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 12 function calls in 0.829 seconds\n", + "\n", + " Ordered by: cumulative time\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 1 0.000 0.000 0.829 0.829 :17(f4)\n", + " 4 0.829 0.207 0.829 0.207 {built-in method time.sleep}\n", + " 1 0.000 0.000 0.711 0.711 :13(f3)\n", + " 3 0.000 0.000 0.622 0.207 :6(f1)\n", + " 1 0.000 0.000 0.207 0.207 :3(f0)\n", + " 1 0.000 0.000 0.118 0.118 :9(f2)\n", + " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(text)" + ] + }, + { + "cell_type": "markdown", + "id": "d8026683", + "metadata": {}, + "source": [ + "## Renders a profile as a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "40d5b65e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fctfilencalls1ncalls2tincum_tintallcum_tall
0f4<ipython-input-2-61d7d6b6a99d>:17110.0000260.0000260.8288290.828829
1<built-in method time.sleep>~:0440.8286240.8286240.8286240.828624
2f3<ipython-input-2-61d7d6b6a99d>:13110.0000300.0000300.7110660.711066
3f1<ipython-input-2-61d7d6b6a99d>:6330.0001130.0001130.6218870.621887
4f0<ipython-input-2-61d7d6b6a99d>:3110.0000140.0000140.2068640.206864
5f2<ipython-input-2-61d7d6b6a99d>:9110.0000220.0000220.1177360.117736
6<method 'disable' of '_lsprof.Profiler' objects>~:0110.0000040.0000040.0000040.000004
\n", + "
" + ], + "text/plain": [ + " fct \\\n", + "0 f4 \n", + "1 \n", + "2 f3 \n", + "3 f1 \n", + "4 f0 \n", + "5 f2 \n", + "6 \n", + "\n", + " file ncalls1 ncalls2 tin cum_tin \\\n", + "0 :17 1 1 0.000026 0.000026 \n", + "1 ~:0 4 4 0.828624 0.828624 \n", + "2 :13 1 1 0.000030 0.000030 \n", + "3 :6 3 3 0.000113 0.000113 \n", + "4 :3 1 1 0.000014 0.000014 \n", + "5 :9 1 1 0.000022 0.000022 \n", + "6 ~:0 1 1 0.000004 0.000004 \n", + "\n", + " tall cum_tall \n", + "0 0.828829 0.828829 \n", + "1 0.828624 0.828624 \n", + "2 0.711066 0.711066 \n", + "3 0.621887 0.621887 \n", + "4 0.206864 0.206864 \n", + "5 0.117736 0.117736 \n", + "6 0.000004 0.000004 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pyquickhelper.pycode.profiling import profile2df\n", + "\n", + "df = profile2df(stat)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "d6ebb890", + "metadata": {}, + "source": [ + "## Renders a profile as indented text" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "41ec6a8f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " -- 4 4 -- 0.82862 0.82862 -- ~:0: ()\n", + "f1 -- 3 3 -- 0.00011 0.62189 -- :6 (f1)\n", + " -- 3 3 -- 0.62177 0.62177 -- ~:0: () +++\n", + "f4 -- 1 1 -- 0.00003 0.82883 -- :17 (f4)\n", + " f2 -- 1 1 -- 0.00002 0.11774 -- :9 (f2)\n", + " f1 -- 2 2 -- 0.00007 0.11771 -- :6 (f1) +++\n", + " f3 -- 1 1 -- 0.00003 0.71107 -- :13 (f3)\n", + " f0 -- 1 1 -- 0.00001 0.20686 -- :3 (f0)\n", + " -- 1 1 -- 0.20685 0.20685 -- ~:0: () +++\n", + " f1 -- 1 1 -- 0.00004 0.50417 -- :6 (f1) +++\n" + ] + } + ], + "source": [ + "from pyquickhelper.pycode.profiling import profile2graph\n", + "\n", + "gr = profile2graph(stat)\n", + "print(gr[0].to_text(fct_width=20))" + ] + }, + { + "cell_type": "markdown", + "id": "ea5f2491", + "metadata": {}, + "source": [ + "## Renders a profile as json" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "26f4239c", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"profile\": {\n", + " \"4-0.82862:::\": {\n", + " \"details\": {\n", + " \"fct\": \"\",\n", + " \"where\": \"~:0:\",\n", + " \"nc1\": 4,\n", + " \"nc2\": 4,\n", + " \"tin\": 0.8286236,\n", + " \"tall\": 0.8286236,\n", + " \"indent\": 0,\n", + " \"ncalls\": 0\n", + " }\n", + " },\n", + " \"3-0.62189:::f1\": {\n", + " \"details\": {\n", + " \"fct\": \"f1\",\n", + " \"where\": \":6\",\n", + " \"nc1\": 3,\n", + " \"nc2\": 3,\n", + " \"tin\": 0.00011260000000000001,\n", + " \"tall\": 0.6218869,\n", + " \"indent\": 0,\n", + " \"ncalls\": 1\n", + " },\n", + " \"calls\": {\n", + " \"3-0.62177:\": {\n", + " \"details\": {\n", + " \"fct\": \"\",\n", + " \"where\": \"~:0:\",\n", + " \"nc1\": 3,\n", + " \"nc2\": 3,\n", + " \"tin\": 0.6217743,\n", + " \"tall\": 0.6217743,\n", + " \"indent\": 0,\n", + " \"ncalls\": 1\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"1-0.82883:::f4\": {\n", + " \"details\": {\n", + " \"fct\": \"f4\",\n", + " \"where\": \":17\",\n", + " \"nc1\": 1,\n", + " \"nc2\": 1,\n", + " \"tin\": 2.6300000000000002e-05,\n", + " \"tall\": 0.8288287000000001,\n", + " \"indent\": 0,\n", + " \"ncalls\": 2\n", + " },\n", + " \"calls\": {\n", + " \"1-0.11774:f2\": {\n", + " \"details\": {\n", + " \"fct\": \"f2\",\n", + " \"where\": \":9\",\n", + " \"nc1\": 1,\n", + " \"nc2\": 1,\n", + " \"tin\": 2.19e-05,\n", + " \"tall\": 0.1177363,\n", + " \"indent\": 1,\n", + " \"ncalls\": 1\n", + " },\n", + " \"calls\": {\n", + " \"2-0.11771:f1\": {\n", + " \"details\": {\n", + " \"fct\": \"f1\",\n", + " \"where\": \":6\",\n", + " \"nc1\": 2,\n", + " \"nc2\": 2,\n", + " \"tin\": 7.25e-05,\n", + " \"tall\": 0.11771440000000001,\n", + " \"indent\": 1,\n", + " \"ncalls\": 1\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"1-0.71107:f3\": {\n", + " \"details\": {\n", + " \"fct\": \"f3\",\n", + " \"where\": \":13\",\n", + " \"nc1\": 1,\n", + " \"nc2\": 1,\n", + " \"tin\": 3.0100000000000003e-05,\n", + " \"tall\": 0.7110661,\n", + " \"indent\": 1,\n", + " \"ncalls\": 2\n", + " },\n", + " \"calls\": {\n", + " \"1-0.20686:f0\": {\n", + " \"details\": {\n", + " \"fct\": \"f0\",\n", + " \"where\": \":3\",\n", + " \"nc1\": 1,\n", + " \"nc2\": 1,\n", + " \"tin\": 1.4200000000000001e-05,\n", + " \"tall\": 0.2068635,\n", + " \"indent\": 2,\n", + " \"ncalls\": 1\n", + " },\n", + " \"calls\": {\n", + " \"1-0.20685:\": {\n", + " \"details\": {\n", + " \"fct\": \"\",\n", + " \"where\": \"~:0:\",\n", + " \"nc1\": 1,\n", + " \"nc2\": 1,\n", + " \"tin\": 0.2068493,\n", + " \"tall\": 0.2068493,\n", + " \"indent\": 2,\n", + " \"ncalls\": 1\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"1-0.50417:f1\": {\n", + " \"details\": {\n", + " \"fct\": \"f1\",\n", + " \"where\": \":6\",\n", + " \"nc1\": 1,\n", + " \"nc2\": 1,\n", + " \"tin\": 4.0100000000000006e-05,\n", + " \"tall\": 0.5041725,\n", + " \"indent\": 1,\n", + " \"ncalls\": 2\n", + " }\n", + " }\n", + " }\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "from pyquickhelper.pycode.profiling import profile2graph\n", + "\n", + "gr = profile2graph(stat)\n", + "js = gr[0].to_json(indent=4)\n", + "print(js)" + ] + }, + { + "cell_type": "markdown", + "id": "e48ecde6", + "metadata": {}, + "source": [ + "Or in a better way:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5c36b36a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "from pstats import SortKey\n", + "from jyquickhelper import JSONJS\n", + "\n", + "JSONJS(gr[0].to_json(as_str=False, sort_key=SortKey.CUMULATIVE))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "167058a0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/_unittests/ut_pycode/test_profiling.py b/_unittests/ut_pycode/test_profiling.py index 94b18f17..211c1663 100644 --- a/_unittests/ut_pycode/test_profiling.py +++ b/_unittests/ut_pycode/test_profiling.py @@ -155,7 +155,7 @@ def f4(): dicts = root.as_dict() self.assertEqual(10, len(dicts)) text = root.to_text() - self.assertIn("1 1", text) + self.assertIn("1 1", text) self.assertIn(' f1', text) text = root.to_text(fct_width=20) self.assertIn('...', text) @@ -163,6 +163,56 @@ def f4(): root.to_text(sort_key=SortKey.TIME) self.assertRaise(lambda: root.to_text(sort_key=SortKey.NAME), NotImplementedError) + js = root.to_json(indent=2) + self.assertIn('"details"', js) + js = root.to_json(as_str=False) + self.assertIsInstance(js, dict) + + def test_profile_graph_recursive2(self): + + def f0(t): + if t < 0.2: + time.sleep(t) + else: + f1(t - 0.1) + + def f1(t): + if t < 0.1: + time.sleep(t) + else: + f0(t) + + def f4(): + f1(0.3) + + ps = profile(f4)[0] # pylint: disable=W0632 + profile2df(ps, verbose=False, clean_text=lambda x: x.split('/')[-1]) + root, nodes = profile2graph(ps, clean_text=lambda x: x.split('/')[-1]) + self.assertEqual(len(nodes), 4) + text = root.to_text() + self.assertIn(" f1", text) + js = root.to_json(indent=2) + self.assertIn('"details"', js) + + def test_profile_graph_recursive1(self): + + def f0(t): + if t < 0.1: + time.sleep(t) + else: + f0(t - 0.1) + + def f4(): + f0(0.15) + + ps = profile(f4)[0] # pylint: disable=W0632 + profile2df(ps, verbose=False, clean_text=lambda x: x.split('/')[-1]) + root, nodes = profile2graph(ps, clean_text=lambda x: x.split('/')[-1]) + self.assertEqual(len(nodes), 3) + text = root.to_text() + self.assertIn(" f0", text) + js = root.to_json(indent=2) + self.assertIn('"details"', js) if __name__ == "__main__": diff --git a/src/pyquickhelper/pycode/profiling.py b/src/pyquickhelper/pycode/profiling.py index 49f71a77..257dbce7 100644 --- a/src/pyquickhelper/pycode/profiling.py +++ b/src/pyquickhelper/pycode/profiling.py @@ -2,12 +2,14 @@ @file @brief Profiling helpers """ -import os +from collections import deque, OrderedDict from io import StringIO +import json +import math +import os +import site import cProfile from pstats import SortKey, Stats -import site -from collections import deque class ProfileNode: @@ -64,10 +66,27 @@ def key(self): def get_root(self): "Returns the root of the graph." - node = self - while len(node.called_by) > 0: - node = node.called_by[0].get_root() - return node + done = set() + + def _get_root(node): + if len(node.called_by) == 0: + return node + if len(node.called_by) == 1: + return _get_root(node.called_by[0]) + res = None + for ct in node.called_by: + k = id(node), id(ct) + if k in done: + continue + res = ct + break + if res is None: + raise RuntimeError( # pragma: no cover + "All paths have been explored and no entry point was found.") + done.add((id(node), id(res))) + return res + + return _get_root(self) def __repr__(self): "usual" @@ -90,6 +109,38 @@ def __iter__(self): done.add(node.key) stack.extend(node.calls_to) + _modules_ = { + '~', 'subprocess.py', 'posixpath.py', 'os.py', + '', 'inspect.py', + 'version.py', 'typing.py', 'warnings.py', 'errors.py', + 'numbers.py', 'ast.py', 'threading.py', '_collections_abc.py', + 'datetime.py', 'abc.py', 'argparse.py', '__future__.py', + 'functools.py', 'six.py', 'sre_parse.py', 'contextlib.py', + ' _globals.py', '_ios.py', 'types.py'} + + @staticmethod + def filter_node_(node, info=None): + """ + Filters out node to be displayed by default. + + :param node: node + :param info: if the node is called by a function, + this dictionary can be used to overwrite the attributes + held by the node + :return: boolean (True to keep, False to forget) + """ + if node.filename in ProfileNode._modules_: + if info is None: + if (node.nc1 <= 10 and node.nc2 <= 10 and + node.tall <= 1e-4): + return False + else: + if (info['nc1'] <= 10 and info['nc2'] <= 10 and + info['tall'] <= 1e-4): + return False + + return True + def as_dict(self, filter_node=None, sort_key=SortKey.LINE): """ Renders the results of a profiling interpreted with @@ -97,18 +148,25 @@ def as_dict(self, filter_node=None, sort_key=SortKey.LINE): a dataframe. :param filter_node: display only the nodes for which - this function returns True + this function returns True, if None, the default function + removes built-in function with small impact :param sort_key: sort sub nodes by... :return: rows """ def sort_key_line(dr): - return dr[0].line + if isinstance(dr, tuple): + return dr[0].line + return dr.line def sort_key_tin(dr): - return -dr[1][2] + if isinstance(dr, tuple): + return -dr[1][2] + return -dr.tin def sort_key_tall(dr): - return -dr[1][3] + if isinstance(dr, tuple): + return -dr[1][3] + return -dr.tall if sort_key == SortKey.LINE: sortk = sort_key_line @@ -129,36 +187,42 @@ def depth_first(node, roots_keys, indent=0): for n, nel in sorted(zip(node.calls_to, node.calls_to_elements), key=sortk): - if filter_node is not None and not filter_node(n): - continue if n.key in roots_keys: text = {'fct': n.func_name, 'where': n.key, 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2], 'tall': nel[3], 'indent': indent + 1, 'ncalls': len(n.calls_to), 'more': '+', 'debug': 'B'} + if (filter_node is not None and + not filter_node(n, info=text)): + continue yield text else: + if filter_node is not None and not filter_node(n): + continue for t in depth_first(n, roots_keys, indent + 1): yield t + if filter_node is None: + filter_node = ProfileNode.filter_node_ nodes = list(self) roots = [node for node in nodes if len(node.called_by) != 1] roots_key = {r.key: r for r in roots} rows = [] - for root in roots: + for root in sorted(roots, key=sortk): if filter_node is not None and not filter_node(root): continue rows.extend(depth_first(root, roots_key)) return rows def to_text(self, filter_node=None, sort_key=SortKey.LINE, - fct_width=50): + fct_width=60): """ Prints the profiling to text. :param filter_node: display only the nodes for which - this function returns True + this function returns True, if None, the default function + removes built-in function with small impact :param sort_key: sort sub nodes by... :return: rows """ @@ -171,10 +235,14 @@ def align_text(text, size): return text[:h] + "..." + text[-h + 1:] dicts = self.as_dict(filter_node=filter_node, sort_key=sort_key) + max_nc = max(max(_['nc1'] for _ in dicts), + max(_['nc2'] for _ in dicts)) + dg = int(math.log(max_nc) / math.log(10) + 1.5) + line_format = ("{indent}{fct} -- {nc1: %dd} {nc2: %dd} -- {tin:1.5f} {tall:1.5f}" + " -- {name} ({fct2})" % (dg, dg)) text = [] for row in dicts: - line = ("{indent}{fct} -- {nc1: 3d} {nc2: 3d} -- {tin:1.4f} {tall:1.4f}" - " -- {name} ({fct2})").format( + line = line_format.format( indent=" " * (row['indent'] * 4), fct=align_text(row['fct'], fct_width - row['indent'] * 4), nc1=row['nc1'], nc2=row['nc2'], tin=row['tin'], @@ -182,11 +250,93 @@ def align_text(text, size): fct2=row['fct']) if row.get('more', '') == '+': line += " +++" - if row['fct'] == 'f0': - raise AssertionError("Unexpected %r." % row) text.append(line) return "\n".join(text) + def to_json(self, filter_node=None, sort_key=SortKey.LINE, as_str=True, + **kwargs): + """ + Renders the results of a profiling interpreted with + function @fn profile2graph as :epkg:`JSON`. + + :param filter_node: display only the nodes for which + this function returns True, if None, the default function + removes built-in function with small impact + :param sort_key: sort sub nodes by... + :param as_str: converts the json into a string + :param kwargs: see :func:`json.dumps` + :return: rows + """ + def sort_key_line(dr): + if isinstance(dr, tuple): + return dr[0].line + return dr.line + + def sort_key_tin(dr): + if isinstance(dr, tuple): + return -dr[1][2] + return -dr.tin + + def sort_key_tall(dr): + if isinstance(dr, tuple): + return -dr[1][3] + return -dr.tall + + if sort_key == SortKey.LINE: + sortk = sort_key_line + elif sort_key == SortKey.CUMULATIVE: + sortk = sort_key_tall + elif sort_key == SortKey.TIME: + sortk = sort_key_tin + else: + raise NotImplementedError( + "Unable to sort subcalls with this key %r." % sort_key) + + def walk(node, roots_keys, indent=0): + item = {'details': { + 'fct': node.func_name, 'where': node.key, + 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin, + 'tall': node.tall, 'indent': indent, + 'ncalls': len(node.calls_to)}} + + child = OrderedDict() + for n, nel in sorted(zip(node.calls_to, + node.calls_to_elements), + key=sortk): + key = "%d-%1.5f:%s" % (nel[0], nel[3], n.func_name) + if n.key in roots_keys: + details = {'fct': n.func_name, 'where': n.key, + 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2], + 'tall': nel[3], 'indent': indent, + 'ncalls': len(node.calls_to)} + if (filter_node is not None and + not filter_node(n, info=details)): + continue + child[key] = {'details': details} + else: + if filter_node is not None and not filter_node(n): + continue + child[key] = walk(n, roots_key, indent + 1) + + if len(child) > 0: + item['calls'] = child + return item + + if filter_node is None: + filter_node = ProfileNode.filter_node_ + nodes = list(self) + roots = [node for node in nodes if len(node.called_by) != 1] + roots_key = {r.key: r for r in roots} + rows = OrderedDict() + for root in sorted(roots, key=sortk): + if filter_node is not None and not filter_node(root): + continue + key = "%d-%1.5f:::%s" % (root.nc1, root.tall, root.func_name) + rows[key] = walk(root, roots_key) + if as_str: + return json.dumps({'profile': rows}, **kwargs) + return {'profile': rows} + def _process_pstats(ps, clean_text=None, verbose=False, fLOG=None): """ From 707bdc39a0e9f8a74735be5398fac288a6115100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 21:12:51 +0200 Subject: [PATCH 7/8] fixes root --- _unittests/ut_helpgen/test_stat_helper.py | 2 +- src/pyquickhelper/pycode/profiling.py | 26 +++++++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/_unittests/ut_helpgen/test_stat_helper.py b/_unittests/ut_helpgen/test_stat_helper.py index 4eb4c593..6a43e7e4 100644 --- a/_unittests/ut_helpgen/test_stat_helper.py +++ b/_unittests/ut_helpgen/test_stat_helper.py @@ -30,7 +30,7 @@ def test_enumerate_notebooks_link(self): counts["title"] += 1 nbfound.add(rl[1]) self.assertTrue(counts.get("ref", 0) > 0) - self.assertIn(counts.get(None, 0), (0, 11)) + self.assertIn(counts.get(None, 0), (0, 12)) self.assertTrue(counts["title"] > 0) self.assertTrue(len(nbfound) > 8) # self.assertTrue(counts.get("refn", 0) > 0) diff --git a/src/pyquickhelper/pycode/profiling.py b/src/pyquickhelper/pycode/profiling.py index 257dbce7..05d3b7a7 100644 --- a/src/pyquickhelper/pycode/profiling.py +++ b/src/pyquickhelper/pycode/profiling.py @@ -84,7 +84,7 @@ def _get_root(node): raise RuntimeError( # pragma: no cover "All paths have been explored and no entry point was found.") done.add((id(node), id(res))) - return res + return _get_root(res) return _get_root(self) @@ -155,8 +155,8 @@ def as_dict(self, filter_node=None, sort_key=SortKey.LINE): """ def sort_key_line(dr): if isinstance(dr, tuple): - return dr[0].line - return dr.line + return (dr[0].filename, dr[0].line) + return (dr.filename, dr.line) def sort_key_tin(dr): if isinstance(dr, tuple): @@ -269,8 +269,8 @@ def to_json(self, filter_node=None, sort_key=SortKey.LINE, as_str=True, """ def sort_key_line(dr): if isinstance(dr, tuple): - return dr[0].line - return dr.line + return (dr[0].filename, dr[0].line) + return (dr.filename, dr.line) def sort_key_tin(dr): if isinstance(dr, tuple): @@ -303,7 +303,7 @@ def walk(node, roots_keys, indent=0): for n, nel in sorted(zip(node.calls_to, node.calls_to_elements), key=sortk): - key = "%d-%1.5f:%s" % (nel[0], nel[3], n.func_name) + key = (nel[0], "%1.5f:%s" % (nel[3], n.func_name)) if n.key in roots_keys: details = {'fct': n.func_name, 'where': n.key, 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2], @@ -319,6 +319,10 @@ def walk(node, roots_keys, indent=0): child[key] = walk(n, roots_key, indent + 1) if len(child) > 0: + mx = max(_[0] for _ in child) + dg = int(math.log(mx) / math.log(10) + 1.5) + form = "%-{}d-%s".format(dg) + child = OrderedDict((form % k, v) for k, v in child.items()) item['calls'] = child return item @@ -331,8 +335,12 @@ def walk(node, roots_keys, indent=0): for root in sorted(roots, key=sortk): if filter_node is not None and not filter_node(root): continue - key = "%d-%1.5f:::%s" % (root.nc1, root.tall, root.func_name) + key = (root.nc1, "%1.5f:::%s" % (root.tall, root.func_name)) rows[key] = walk(root, roots_key) + mx = max(_[0] for _ in rows) + dg = int(math.log(mx) / math.log(10) + 1.5) + form = "%-{}d-%s".format(dg) + rows = OrderedDict((form % k, v) for k, v in rows.items()) if as_str: return json.dumps({'profile': rows}, **kwargs) return {'profile': rows} @@ -635,6 +643,9 @@ def fct4(): filename=clean_text(k[0].replace("\\", "/")), line=k[1], func_name=k[2], nc1=v[0], nc2=v[1], tin=v[2], tall=v[3]) + if node.key in nodes: + raise RuntimeError( + "Key %r is already present, node=%r." % (node.key, node)) nodes[node.key] = node for k, v in ps.stats.items(): @@ -662,4 +673,5 @@ def fct4(): for k, v in nodes.items(): root = v.get_root() break + return root, nodes From 1dd3d5075fc9a57912636faf9f229f60a516a4af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 12 Oct 2021 21:42:03 +0200 Subject: [PATCH 8/8] Update test_notebook_runner_report.py --- _unittests/ut_ipythonhelper/test_notebook_runner_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_unittests/ut_ipythonhelper/test_notebook_runner_report.py b/_unittests/ut_ipythonhelper/test_notebook_runner_report.py index ff5f1953..82a33ec0 100644 --- a/_unittests/ut_ipythonhelper/test_notebook_runner_report.py +++ b/_unittests/ut_ipythonhelper/test_notebook_runner_report.py @@ -18,7 +18,7 @@ def test_notebook_runner_report(self): if len(cov) <= 9: raise Exception("too few found notebooks") - if cov.shape[0] != 14: + if cov.shape[0] != 15: raise AssertionError("NB={0}\n----\n{1}".format(cov.shape, cov)) self.assertIn("last_name", cov.columns) cols = ['notebooks', 'last_name', 'date', 'etime',