Skip to content

Commit

Permalink
fix python 2/3 unicode inconsistency (part 2)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gransk committed Dec 20, 2016
1 parent c64e128 commit 6ecef67
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 10 deletions.
2 changes: 1 addition & 1 deletion gransk/core/document.py
Expand Up @@ -168,7 +168,7 @@ def get_document(path, parent=None):
doc.docid = digest.hexdigest()

_, ext = os.path.splitext(doc.path)
doc.ext = ext.lower() or 'none'
doc.ext = ext.lstrip('.').lower() or 'none'

doc.parent = parent

Expand Down
2 changes: 1 addition & 1 deletion gransk/core/magic.py
Expand Up @@ -19,7 +19,7 @@ class Subscriber(abstract_subscriber.Subscriber):

def setup(self, _):
"""Compile file headers for all magic extractors into a regex pattern."""
self.pattern = re.compile('|'.join(list(self.pipeline.magic.keys())))
self.pattern = re.compile(b'|'.join(list(self.pipeline.magic.keys())))

def consume(self, doc, payload):
"""
Expand Down
6 changes: 3 additions & 3 deletions gransk/plugins/extractors/tests/tika_extractor_test.py
Expand Up @@ -27,8 +27,8 @@ def test_simple(self):
extractor = tika_extractor.Subscriber(mock_pipeline)

expected = (
'This is an unstructured document containing the \nidentifier '
'"193.34.2.1" (ip address), stored as a PDF document.')
b'This is an unstructured document containing the \nidentifier '
b'"193.34.2.1" (ip address), stored as a PDF document.')

with open('config.yml') as inp:
config = yaml.load(inp.read())
Expand All @@ -48,7 +48,7 @@ def test_simple(self):

actual = doc.text

self.assertEqual(expected, actual)
self.assertEqual(expected.decode('utf-8'), actual)


if __name__ == '__main__':
Expand Down
3 changes: 2 additions & 1 deletion gransk/plugins/storage/store_text.py
Expand Up @@ -5,6 +5,7 @@

import os
import logging
import io

from werkzeug import secure_filename

Expand Down Expand Up @@ -54,7 +55,7 @@ def consume(self, doc, payload):

new_path = os.path.join(self.root, new_filename)

with open(new_path, 'wb') as out:
with io.open(new_path, 'w', encoding='utf-8') as out:
out.write(doc.text)

doc.meta['text_file'] = new_path
Empty file modified run_tests.py 100644 → 100755
Empty file.
12 changes: 8 additions & 4 deletions utils/helper_usage.py
@@ -1,3 +1,7 @@
#!/usr/bin/env python

from __future__ import print_function

import os
import re
import sys
Expand Down Expand Up @@ -26,10 +30,10 @@
direct[m].update([p])

for m in sorted(use.keys()):
#print "%s = '%s' # %s" % (m, m.lower(), ', '.join(use[m]))
print "%s = '%s'" % (m, m.lower())
#print("%s = '%s' # %s" % (m, m.lower(), ', '.join(use[m])))
print("%s = '%s'" % (m, m.lower()))

print "\n\n__DIRECT__"
print("\n\n__DIRECT__")

for m in sorted(direct.keys()):
print "%s = '%s' # %s" % (m.upper(), m, ', '.join(direct[m]))
print("%s = '%s' # %s" % (m.upper(), m, ', '.join(direct[m])))

0 comments on commit 6ecef67

Please sign in to comment.