Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion prose_wc/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_output_json(self, mock_dump, mock_print):
self.assertTrue(mock_print.called_once)

@patch.object(wc, '_mockable_print')
@patch.object(yaml, 'dump')
@patch.object(yaml, 'safe_dump')
def test_output_yaml(self, mock_dump, mock_print):
wc.prose_wc(wc.setup(['-f', 'yaml', self.plaintext]))
mock_dump.assert_called_once_with({
Expand Down
22 changes: 14 additions & 8 deletions prose_wc/wc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/usr/bin/python

from __future__ import print_function
from __future__ import (
print_function,
unicode_literals,
)
import argparse
from bs4 import BeautifulSoup
import json
Expand All @@ -23,6 +26,9 @@
apostrophes
"""

NEWLINE_PATTERN = re.compile(r'[\r|\n|\r\n]')
NEWPARA_PATTERN = re.compile(r'[\r|\n|\r\n]{2}')


def _mockable_print(arg):
"""A print function that can be mocked in tests.
Expand Down Expand Up @@ -62,8 +68,8 @@ def setup(argv):
help='output format.')
parser.add_argument('-i', '--indent', type=int, nargs='?', default=4,
help='indentation depth (default: 4).')
parser.add_argument('file', type=argparse.FileType('r'),
help='file to count (or - for STDIN)')
parser.add_argument('file', type=argparse.FileType('rb'),
help='file to parse (or - for STDIN)')
return parser.parse_args(argv)


Expand All @@ -77,7 +83,7 @@ def prose_wc(args):
return 1
if args.split_hyphens:
INTERSTITIAL_PUNCTUATION.append(re.compile(r'-'))
content = args.file.read()
content = args.file.read().decode('utf-8')
filename = args.file.name
body = strip_frontmatter(content)
parsed = markdown_to_text(body)
Expand All @@ -89,7 +95,7 @@ def prose_wc(args):
update_file(filename, result, content, args.indent)
else:
_mockable_print({
'yaml': yaml.dump(result, default_flow_style=False,
'yaml': yaml.safe_dump(result, default_flow_style=False,
indent=args.indent),
'json': json.dumps(result, indent=args.indent),
'default': default_dump(result),
Expand Down Expand Up @@ -151,7 +157,7 @@ def wc(filename, contents, parsed=None, is_jekyll=False):
body = parsed.strip() if parsed else contents.strip()

# Strip the body down to just words
words = re.sub(r'\n', ' ', body)
words = NEWLINE_PATTERN.sub(' ', body)
words = re.sub(r'\s+', ' ', words)
for punctuation in INTERSTITIAL_PUNCTUATION:
words = re.sub(punctuation, ' ', words)
Expand All @@ -164,7 +170,7 @@ def wc(filename, contents, parsed=None, is_jekyll=False):
'counts': {
'file': filename,
'type': fmt,
'paragraphs': len(contents.strip().split('\n\n')),
'paragraphs': len(NEWPARA_PATTERN.split(contents.strip())),
'words': len(re.split('\s+', words)),
'characters_real': len(real_characters),
'characters_total': len(words),
Expand Down Expand Up @@ -195,7 +201,7 @@ def update_file(filename, result, content, indent):
# Set the frontmatter part backed to the stringified version of the
# frontmatter object
parts[1] = '\n{}'.format(
yaml.dump(frontmatter, default_flow_style=False, indent=indent))
yaml.safe_dump(frontmatter, default_flow_style=False, indent=indent))
result = '---'.join(parts)

# Write everything back to the file
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

setup(
name='prose-wc',
version='0.2.1',
version='0.3.0',
description='Jekyll-aware prose wordcount utility',
long_description=long_description,
author='Madison Scott-Clary',
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = 2.7,3.4,3.5
envlist = py27, py35

[testenv]
deps = nose
Expand Down