Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #407: Added capabilty to monitor progress with a logger #408

Merged
merged 4 commits into from
Aug 6, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion petl/test/util/test_timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


from petl.util.counting import nrows
from petl.util.timing import progress
from petl.util.timing import progress, log_progress


def test_progress():
Expand All @@ -13,3 +13,10 @@ def test_progress():
('b', 3))
nrows(progress(table))

def test_log_progress():
# make sure log_progress doesn't raise exception
table = (('foo', 'bar', 'baz'),
('a', 1, True),
('b', 2, True),
('b', 3))
nrows(log_progress(table))
2 changes: 1 addition & 1 deletion petl/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from petl.util.materialise import listoflists, listoftuples, tupleoflists, \
tupleoftuples, columns, facetcolumns

from petl.util.timing import progress, clock
from petl.util.timing import progress, log_progress, clock

from petl.util.statistics import limits, stats

Expand Down
89 changes: 78 additions & 11 deletions petl/util/timing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import absolute_import, print_function, division


import abc
import logging
import sys
import time

Expand All @@ -9,9 +11,9 @@
from petl.util.statistics import onlinestats


def progress(table, batchsize=1000, prefix="", out=sys.stderr):
def progress(table, batchsize=1000, prefix="", out=None):
"""
Report progress on rows passing through. E.g.::
Report progress on rows passing through to a log. E.g.::
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest 'Report progress on rows passing through to a file or file-like object (defaults to sys.stderr).'


>>> import petl as etl
>>> table = etl.dummytable(100000)
Expand All @@ -35,16 +37,49 @@ def progress(table, batchsize=1000, prefix="", out=sys.stderr):
return ProgressView(table, batchsize, prefix, out)


def log_progress(table, batchsize=1000, prefix="", out=None, level=logging.INFO):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest to rename 'out' to 'logger' just to help make it clear that a logger is expected here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Left it out so that it agreed with the progress() function, but this makes sense, too.

"""
Report progress on rows passing through. E.g.::
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest 'Report progress on rows passing through to a logger.'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, no problem


>>> import petl as etl
>>> table = etl.dummytable(100000)
>>> table.progress(10000).tocsv('example.csv')
10000 rows in 0.13s (78363 row/s); batch in 0.13s (78363 row/s)
20000 rows in 0.22s (91679 row/s); batch in 0.09s (110448 row/s)
30000 rows in 0.31s (96573 row/s); batch in 0.09s (108114 row/s)
40000 rows in 0.40s (99535 row/s); batch in 0.09s (109625 row/s)
50000 rows in 0.49s (101396 row/s); batch in 0.09s (109591 row/s)
60000 rows in 0.59s (102245 row/s); batch in 0.09s (106709 row/s)
70000 rows in 0.68s (103221 row/s); batch in 0.09s (109498 row/s)
80000 rows in 0.77s (103810 row/s); batch in 0.09s (108126 row/s)
90000 rows in 0.90s (99465 row/s); batch in 0.13s (74516 row/s)
100000 rows in 1.02s (98409 row/s); batch in 0.11s (89821 row/s)
100000 rows in 1.02s (98402 row/s); batches in 0.10 +/- 0.02s [0.09-0.13] (100481 +/- 13340 rows/s [74516-110448])

See also :func:`petl.util.timing.clock`.

"""

return LoggingProgressView(table, batchsize, prefix, out, level=level)


Table.progress = progress
Table.log_progress = log_progress


class ProgressView(Table):
class ProgressViewBase(Table):
"""
Abstract base class for reporting on proecessing status
"""

def __init__(self, inner, batchsize, prefix, out):
def __init__(self, inner, batchsize, prefix):
self.inner = inner
self.batchsize = batchsize
self.prefix = prefix
self.out = out

@abc.abstractmethod
def print_message(self, message):
pass

def __iter__(self):
start = time.time()
Expand Down Expand Up @@ -76,9 +111,7 @@ def __iter__(self):
message = self.prefix + \
'%s rows in %.2fs (%s row/s); ' \
'batch in %.2fs (%s row/s)' % v
print(message, file=self.out)
if hasattr(self.out, 'flush'):
self.out.flush()
self.print_message(message)
batchstart = batchend
batchtimemean, batchtimevar = \
onlinestats(batchtime, batchn, mean=batchtimemean,
Expand Down Expand Up @@ -120,9 +153,43 @@ def __iter__(self):
v = (n, elapsedtime, rate)
message = self.prefix + '%s rows in %.2fs (%s row/s)' % v

print(message, file=self.out)
if hasattr(self.out, 'flush'):
self.out.flush()
self.print_message(message)


class ProgressView(ProgressViewBase):
"""
Reports progress to a file_object like sys.stdout or a file handler
"""

def __init__(self, inner, batchsize, prefix, out):
if out is None:
self.file_object = sys.stderr
else:
self.file_object = out
super(ProgressView, self).__init__(inner, batchsize, prefix)

def print_message(self, message):
print(message, file=self.file_object)
if hasattr(self.file_object, 'flush'):
self.file_object.flush()


class LoggingProgressView(ProgressViewBase):
"""
Reports progress to a logger, log handler, or log adapter
"""

def __init__(self, inner, batchsize, prefix, out, level=logging.INFO):
if out is None:
self.logger = logging.getLogger(__name__)
self.logger.setLevel(level)
else:
self.logger = out
self.level = level
super(LoggingProgressView, self).__init__(inner, batchsize, prefix)

def print_message(self, message):
self.logger.log(self.level, message)


def clock(table):
Expand Down