From 27ebd699061195671e64f6c325d4a3e2554ff4cd Mon Sep 17 00:00:00 2001 From: Tobias Litherland Date: Sun, 27 Oct 2019 20:48:53 +0100 Subject: [PATCH] Refactored from parsing and readers to parse and read --- README.md | 19 ++++++++------- fileparse/{parsing.py => parse.py} | 2 +- fileparse/{readers.py => read.py} | 0 fileparse/test/test_parsing.py | 37 +++++++++++------------------- 4 files changed, 24 insertions(+), 34 deletions(-) rename fileparse/{parsing.py => parse.py} (99%) rename fileparse/{readers.py => read.py} (100%) diff --git a/README.md b/README.md index 54a2611..09d26dc 100644 --- a/README.md +++ b/README.md @@ -33,27 +33,26 @@ You can then define some simple classes defining this content structure and patt ```python import re -import fileparse.parsing as model -import fileparse.readers as readers +from fileparse import parse, read -class Text(model.Content): +class Text(parse.Content): pass text_match = re.compile('^(?P[^#].+)$') -text_finder = model.ContentFinder(start_pattern=text_match, +text_finder = parse.ContentFinder(start_pattern=text_match, content_type=Text) -class SubTitle(model.Content): +class SubTitle(parse.Content): pass subtitle_match = re.compile('^## ?(?P[^#].+)$') -subtitle_finder = model.ContentFinder(start_pattern=subtitle_match, +subtitle_finder = parse.ContentFinder(start_pattern=subtitle_match, content_type=SubTitle, sub_content_finders=[text_finder] ) -class Title(model.Content): +class Title(parse.Content): pass title_match = re.compile('^# ?(?P[^#].+)$') -title_finder = model.ContentFinder(start_pattern=title_match, +title_finder = parse.ContentFinder(start_pattern=title_match, content_type=Title, sub_content_finders=[subtitle_finder, text_finder]) ``` @@ -64,7 +63,7 @@ Notice two things: Finally, we define the Parser. ````python - file_finder = model.Parser(finders=[title_finder]) + file_finder = parse.Parser(finders=[title_finder]) ```` The file_finder is now ready to parse text content. @@ -72,7 +71,7 @@ The file_finder is now ready to parse text content. For this specific content, we need a text stream able to parse a string. We define it like this: ````python -stream = readers.TextStream(reader=readers.StringReader(string=nested_text)) +stream = read.TextStream(reader=read.StringReader(string=nested_text)) ```` We can now parse the text with the rules defined in file_finder, and se what comes out of it. To get information out of a file-object, use the `file.get_contents_by_type(content_type)` method. diff --git a/fileparse/parsing.py b/fileparse/parse.py similarity index 99% rename from fileparse/parsing.py rename to fileparse/parse.py index a122530..ae11c0b 100644 --- a/fileparse/parsing.py +++ b/fileparse/parse.py @@ -3,7 +3,7 @@ import re import logging -from fileparse.readers import TextStream +from fileparse.read import TextStream class ParsingError(Exception): diff --git a/fileparse/readers.py b/fileparse/read.py similarity index 100% rename from fileparse/readers.py rename to fileparse/read.py diff --git a/fileparse/test/test_parsing.py b/fileparse/test/test_parsing.py index cc587a7..4ca2444 100644 --- a/fileparse/test/test_parsing.py +++ b/fileparse/test/test_parsing.py @@ -2,8 +2,7 @@ import re import os -import fileparse.parsing as model -import fileparse.readers as readers +from fileparse import parse, read FILENAME = os.path.join(os.path.split(__file__)[0], 'bin', 'context.md') @@ -25,7 +24,7 @@ def test_text_stream_previous(): - stream = readers.TextStream(reader=readers.StringReader(string=SIMPLE_TEXT)) + stream = read.TextStream(reader=read.StringReader(string=SIMPLE_TEXT)) assert stream.get_line() == 'This is a test.' assert stream.get_line() == '# this is content' @@ -36,7 +35,7 @@ def test_text_stream_previous(): def test_text_stream_backtrack(): - stream = readers.TextStream(reader=readers.StringReader(string=SIMPLE_TEXT)) + stream = read.TextStream(reader=read.StringReader(string=SIMPLE_TEXT)) assert stream.get_line() == 'This is a test.' assert stream.get_line() == '# this is content' @@ -54,16 +53,16 @@ def test_file_reader(): # with codecs.open(FILENAME) as file: # for line in file.readlines(): # print(line) - stream = readers.TextStream(reader=readers.FileReader(filepath=FILENAME, encoding='utf-8')) + stream = read.TextStream(reader=read.FileReader(filepath=FILENAME, encoding='utf-8')) line = '' while line is not None: line = stream.get_line() def test_content_finder_simple(): - stream = readers.TextStream(reader=readers.StringReader(string=SIMPLE_TEXT)) + stream = read.TextStream(reader=read.StringReader(string=SIMPLE_TEXT)) - c = model.ContentFinder(start_pattern=re.compile('^#(?P<stuff>.+)$'), + c = parse.ContentFinder(start_pattern=re.compile('^#(?P<stuff>.+)$'), end_pattern=re.compile('^ยค')) content = c.search_stream(stream) @@ -72,31 +71,31 @@ def test_content_finder_simple(): def test_content_finder_nested(): - class Text(model.Content): + class Text(parse.Content): pass - class Title(model.Content): + class Title(parse.Content): pass - class SubTitle(model.Content): + class SubTitle(parse.Content): pass text_match = re.compile('^(?P<text>[^#].+)$') title_match = re.compile('^# ?(?P<title>[^#].+)$') subtitle_match = re.compile('^## ?(?P<subtitle>[^#].+)$') - stream = readers.TextStream(reader=readers.StringReader(string=NESTED_TEXT)) + stream = read.TextStream(reader=read.StringReader(string=NESTED_TEXT)) - text_finder = model.ContentFinder(start_pattern=text_match, + text_finder = parse.ContentFinder(start_pattern=text_match, content_type=Text) - subtitle_finder = model.ContentFinder(start_pattern=subtitle_match, + subtitle_finder = parse.ContentFinder(start_pattern=subtitle_match, content_type=SubTitle, sub_content_finders=[text_finder] ) - title_finder = model.ContentFinder(start_pattern=title_match, + title_finder = parse.ContentFinder(start_pattern=title_match, content_type=Title, sub_content_finders=[subtitle_finder, text_finder]) - file_finder = model.Parser(finders=[title_finder]) + file_finder = parse.Parser(finders=[title_finder]) file = file_finder.parse_stream(stream) # TODO: Figure out why i can't find subtitles. @@ -104,12 +103,4 @@ class SubTitle(model.Content): assert file.get_contents_by_type(SubTitle)[0].contents[0].text == 'with subtitle contents.' -def test_content_finder_integration(): - """Test using the actual format of the old entries.""" - path = os.path.split(__file__)[0] - file = os.path.join(path, 'bin', 'context.md') - - reader = readers.FileReader(filepath=file, encoding='utf-8') - -