-
Notifications
You must be signed in to change notification settings - Fork 192
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Issue #407: Added capabilty to monitor progress with a logger #408
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
from __future__ import absolute_import, print_function, division | ||
|
||
|
||
import abc | ||
import logging | ||
import sys | ||
import time | ||
|
||
|
@@ -9,9 +11,9 @@ | |
from petl.util.statistics import onlinestats | ||
|
||
|
||
def progress(table, batchsize=1000, prefix="", out=sys.stderr): | ||
def progress(table, batchsize=1000, prefix="", out=None): | ||
""" | ||
Report progress on rows passing through. E.g.:: | ||
Report progress on rows passing through to a log. E.g.:: | ||
|
||
>>> import petl as etl | ||
>>> table = etl.dummytable(100000) | ||
|
@@ -35,16 +37,49 @@ def progress(table, batchsize=1000, prefix="", out=sys.stderr): | |
return ProgressView(table, batchsize, prefix, out) | ||
|
||
|
||
def log_progress(table, batchsize=1000, prefix="", out=None, level=logging.INFO): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest to rename 'out' to 'logger' just to help make it clear that a logger is expected here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Left it out so that it agreed with the progress() function, but this makes sense, too. |
||
""" | ||
Report progress on rows passing through. E.g.:: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest 'Report progress on rows passing through to a logger.' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, no problem |
||
|
||
>>> import petl as etl | ||
>>> table = etl.dummytable(100000) | ||
>>> table.progress(10000).tocsv('example.csv') | ||
10000 rows in 0.13s (78363 row/s); batch in 0.13s (78363 row/s) | ||
20000 rows in 0.22s (91679 row/s); batch in 0.09s (110448 row/s) | ||
30000 rows in 0.31s (96573 row/s); batch in 0.09s (108114 row/s) | ||
40000 rows in 0.40s (99535 row/s); batch in 0.09s (109625 row/s) | ||
50000 rows in 0.49s (101396 row/s); batch in 0.09s (109591 row/s) | ||
60000 rows in 0.59s (102245 row/s); batch in 0.09s (106709 row/s) | ||
70000 rows in 0.68s (103221 row/s); batch in 0.09s (109498 row/s) | ||
80000 rows in 0.77s (103810 row/s); batch in 0.09s (108126 row/s) | ||
90000 rows in 0.90s (99465 row/s); batch in 0.13s (74516 row/s) | ||
100000 rows in 1.02s (98409 row/s); batch in 0.11s (89821 row/s) | ||
100000 rows in 1.02s (98402 row/s); batches in 0.10 +/- 0.02s [0.09-0.13] (100481 +/- 13340 rows/s [74516-110448]) | ||
|
||
See also :func:`petl.util.timing.clock`. | ||
|
||
""" | ||
|
||
return LoggingProgressView(table, batchsize, prefix, out, level=level) | ||
|
||
|
||
Table.progress = progress | ||
Table.log_progress = log_progress | ||
|
||
|
||
class ProgressView(Table): | ||
class ProgressViewBase(Table): | ||
""" | ||
Abstract base class for reporting on proecessing status | ||
""" | ||
|
||
def __init__(self, inner, batchsize, prefix, out): | ||
def __init__(self, inner, batchsize, prefix): | ||
self.inner = inner | ||
self.batchsize = batchsize | ||
self.prefix = prefix | ||
self.out = out | ||
|
||
@abc.abstractmethod | ||
def print_message(self, message): | ||
pass | ||
|
||
def __iter__(self): | ||
start = time.time() | ||
|
@@ -76,9 +111,7 @@ def __iter__(self): | |
message = self.prefix + \ | ||
'%s rows in %.2fs (%s row/s); ' \ | ||
'batch in %.2fs (%s row/s)' % v | ||
print(message, file=self.out) | ||
if hasattr(self.out, 'flush'): | ||
self.out.flush() | ||
self.print_message(message) | ||
batchstart = batchend | ||
batchtimemean, batchtimevar = \ | ||
onlinestats(batchtime, batchn, mean=batchtimemean, | ||
|
@@ -120,9 +153,43 @@ def __iter__(self): | |
v = (n, elapsedtime, rate) | ||
message = self.prefix + '%s rows in %.2fs (%s row/s)' % v | ||
|
||
print(message, file=self.out) | ||
if hasattr(self.out, 'flush'): | ||
self.out.flush() | ||
self.print_message(message) | ||
|
||
|
||
class ProgressView(ProgressViewBase): | ||
""" | ||
Reports progress to a file_object like sys.stdout or a file handler | ||
""" | ||
|
||
def __init__(self, inner, batchsize, prefix, out): | ||
if out is None: | ||
self.file_object = sys.stderr | ||
else: | ||
self.file_object = out | ||
super(ProgressView, self).__init__(inner, batchsize, prefix) | ||
|
||
def print_message(self, message): | ||
print(message, file=self.file_object) | ||
if hasattr(self.file_object, 'flush'): | ||
self.file_object.flush() | ||
|
||
|
||
class LoggingProgressView(ProgressViewBase): | ||
""" | ||
Reports progress to a logger, log handler, or log adapter | ||
""" | ||
|
||
def __init__(self, inner, batchsize, prefix, out, level=logging.INFO): | ||
if out is None: | ||
self.logger = logging.getLogger(__name__) | ||
self.logger.setLevel(level) | ||
else: | ||
self.logger = out | ||
self.level = level | ||
super(LoggingProgressView, self).__init__(inner, batchsize, prefix) | ||
|
||
def print_message(self, message): | ||
self.logger.log(self.level, message) | ||
|
||
|
||
def clock(table): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggest 'Report progress on rows passing through to a file or file-like object (defaults to
sys.stderr
).'