Skip to content

Commit

Permalink
Merge correct columnization of CJK tag excerpts.
Browse files Browse the repository at this point in the history
  • Loading branch information
erikrose committed Mar 24, 2020
2 parents b51a5c9 + 372cf5c commit adcea50
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 5 deletions.
13 changes: 8 additions & 5 deletions cli/fathom_web/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from click import get_terminal_size, style
import torch

from .utils import tensors_from
from .utils import tensors_from, fit_unicode


def accuracy_per_tag(y, y_pred, cutoff):
Expand Down Expand Up @@ -76,7 +76,7 @@ def print_per_tag_report(metricses):
template_width_minus_tag = max_filename_len + 2 + 3 + 2 + 3 + 10
tag_max_width = min(get_terminal_size()[0] - template_width_minus_tag, max_tag_len)

template = '{file_style}{file: >' + str(max_filename_len) + '}{style_reset} {tag_style}{tag: <' + str(tag_max_width) + '} {error_type: >2}{style_reset} {score}'
template = '{file_style}{file: >' + str(max_filename_len) + '}{style_reset} {tag_style}{tag_and_padding} {error_type: >2}{style_reset} {score}'
style_reset = style('', reset=True)
for metrics in sorted(metricses, key=lambda m: m['filename']):
first = True
Expand All @@ -89,7 +89,7 @@ def print_per_tag_report(metricses):
file=metrics['filename'] if first else '',
file_style=style('', **FAT_COLORS[file_color], reset=False),
style_reset=style_reset,
tag=tag['markup'][:tag_max_width],
tag_and_padding=fit_unicode(tag['markup'], tag_max_width),
tag_style=style('', **THIN_COLORS[not bool(tag['error_type'])], reset=False),
error_type=tag['error_type'],
score=thermometer(tag['score'])))
Expand All @@ -100,7 +100,7 @@ def print_per_tag_report(metricses):
file=metrics['filename'],
file_style=style('', **FAT_COLORS['good'], reset=False),
style_reset=style_reset,
tag='No targets found.',
tag_and_padding=fit_unicode('No targets found.', tag_max_width),
tag_style=style('', fg='green', reset=False),
error_type='',
score=''))
Expand All @@ -111,7 +111,10 @@ def print_per_tag_report(metricses):
file='',
file_style=style('', **FAT_COLORS[file_color], reset=False),
style_reset=style_reset,
tag=f' ...and {true_negative_count} correct negative' + ('s' if true_negative_count > 1 else ''),
tag_and_padding=fit_unicode(
f' ...and {true_negative_count} correct negative'
+ ('s' if true_negative_count > 1 else ''),
tag_max_width),
tag_style=style('', fg='green', reset=False),
error_type='',
score=''))
Expand Down
13 changes: 13 additions & 0 deletions cli/fathom_web/test/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from ..utils import fit_unicode


def test_fit_unicode():
assert fit_unicode('abc', 3) == 'abc'
assert fit_unicode('abc', 2) == 'ab'
assert fit_unicode('a母', 2) == 'a '
assert fit_unicode('a母', 3) == 'a母'
assert fit_unicode('a母母母s', 7) == 'a母母母'
assert fit_unicode('a母母母s', 6) == 'a母母 '
assert fit_unicode('a母母母s', 5) == 'a母母'
assert fit_unicode('a母母', 4) == 'a母 '
assert fit_unicode('a母', 6) == 'a母 '
16 changes: 16 additions & 0 deletions cli/fathom_web/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


from random import sample
from unicodedata import east_asian_width

from click import style
from more_itertools import pairwise
Expand Down Expand Up @@ -79,3 +80,18 @@ def speed_readout(pages):
average = sum(p['time'] for p in pages) / sum(len(p['nodes']) for p in pages)
histogram = mini_histogram([p['time'] for p in pages])
return f'\nTime per page (ms): {histogram} Average per tag: {average:.1f}'


def fit_unicode(string, width):
"""Truncate or pad a string to width, taking into account that some unicode
chars are double-width."""
width_so_far = 0
for num_chars, char in enumerate(string, start=1):
width_so_far += 2 if east_asian_width(char) == 'W' else 1
if width_so_far == width:
break
elif width_so_far > width:
num_chars -= 1
width_so_far -= 2
break
return string[:num_chars] + (' ' * (width - width_so_far))

0 comments on commit adcea50

Please sign in to comment.