Skip to content

Commit

Permalink
Refactored from parsing and readers to parse and read
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasli committed Oct 27, 2019
1 parent dd5cba2 commit 27ebd69
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 34 deletions.
19 changes: 9 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,26 @@ You can then define some simple classes defining this content structure and patt
```python
import re

import fileparse.parsing as model
import fileparse.readers as readers
from fileparse import parse, read

class Text(model.Content):
class Text(parse.Content):
pass
text_match = re.compile('^(?P<text>[^#].+)$')
text_finder = model.ContentFinder(start_pattern=text_match,
text_finder = parse.ContentFinder(start_pattern=text_match,
content_type=Text)

class SubTitle(model.Content):
class SubTitle(parse.Content):
pass
subtitle_match = re.compile('^## ?(?P<subtitle>[^#].+)$')
subtitle_finder = model.ContentFinder(start_pattern=subtitle_match,
subtitle_finder = parse.ContentFinder(start_pattern=subtitle_match,
content_type=SubTitle,
sub_content_finders=[text_finder]
)

class Title(model.Content):
class Title(parse.Content):
pass
title_match = re.compile('^# ?(?P<title>[^#].+)$')
title_finder = model.ContentFinder(start_pattern=title_match,
title_finder = parse.ContentFinder(start_pattern=title_match,
content_type=Title,
sub_content_finders=[subtitle_finder, text_finder])
```
Expand All @@ -64,15 +63,15 @@ Notice two things:
Finally, we define the Parser.

````python
file_finder = model.Parser(finders=[title_finder])
file_finder = parse.Parser(finders=[title_finder])
````

The file_finder is now ready to parse text content.

For this specific content, we need a text stream able to parse a string. We define it like this:

````python
stream = readers.TextStream(reader=readers.StringReader(string=nested_text))
stream = read.TextStream(reader=read.StringReader(string=nested_text))
````

We can now parse the text with the rules defined in file_finder, and se what comes out of it. To get information out of a file-object, use the `file.get_contents_by_type(content_type)` method.
Expand Down
2 changes: 1 addition & 1 deletion fileparse/parsing.py → fileparse/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
import logging

from fileparse.readers import TextStream
from fileparse.read import TextStream


class ParsingError(Exception):
Expand Down
File renamed without changes.
37 changes: 14 additions & 23 deletions fileparse/test/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import re
import os

import fileparse.parsing as model
import fileparse.readers as readers
from fileparse import parse, read

FILENAME = os.path.join(os.path.split(__file__)[0], 'bin', 'context.md')

Expand All @@ -25,7 +24,7 @@


def test_text_stream_previous():
stream = readers.TextStream(reader=readers.StringReader(string=SIMPLE_TEXT))
stream = read.TextStream(reader=read.StringReader(string=SIMPLE_TEXT))

assert stream.get_line() == 'This is a test.'
assert stream.get_line() == '# this is content'
Expand All @@ -36,7 +35,7 @@ def test_text_stream_previous():


def test_text_stream_backtrack():
stream = readers.TextStream(reader=readers.StringReader(string=SIMPLE_TEXT))
stream = read.TextStream(reader=read.StringReader(string=SIMPLE_TEXT))

assert stream.get_line() == 'This is a test.'
assert stream.get_line() == '# this is content'
Expand All @@ -54,16 +53,16 @@ def test_file_reader():
# with codecs.open(FILENAME) as file:
# for line in file.readlines():
# print(line)
stream = readers.TextStream(reader=readers.FileReader(filepath=FILENAME, encoding='utf-8'))
stream = read.TextStream(reader=read.FileReader(filepath=FILENAME, encoding='utf-8'))
line = ''
while line is not None:
line = stream.get_line()


def test_content_finder_simple():
stream = readers.TextStream(reader=readers.StringReader(string=SIMPLE_TEXT))
stream = read.TextStream(reader=read.StringReader(string=SIMPLE_TEXT))

c = model.ContentFinder(start_pattern=re.compile('^#(?P<stuff>.+)$'),
c = parse.ContentFinder(start_pattern=re.compile('^#(?P<stuff>.+)$'),
end_pattern=re.compile('^¤'))

content = c.search_stream(stream)
Expand All @@ -72,44 +71,36 @@ def test_content_finder_simple():


def test_content_finder_nested():
class Text(model.Content):
class Text(parse.Content):
pass

class Title(model.Content):
class Title(parse.Content):
pass

class SubTitle(model.Content):
class SubTitle(parse.Content):
pass

text_match = re.compile('^(?P<text>[^#].+)$')
title_match = re.compile('^# ?(?P<title>[^#].+)$')
subtitle_match = re.compile('^## ?(?P<subtitle>[^#].+)$')
stream = readers.TextStream(reader=readers.StringReader(string=NESTED_TEXT))
stream = read.TextStream(reader=read.StringReader(string=NESTED_TEXT))

text_finder = model.ContentFinder(start_pattern=text_match,
text_finder = parse.ContentFinder(start_pattern=text_match,
content_type=Text)
subtitle_finder = model.ContentFinder(start_pattern=subtitle_match,
subtitle_finder = parse.ContentFinder(start_pattern=subtitle_match,
content_type=SubTitle,
sub_content_finders=[text_finder]
)
title_finder = model.ContentFinder(start_pattern=title_match,
title_finder = parse.ContentFinder(start_pattern=title_match,
content_type=Title,
sub_content_finders=[subtitle_finder, text_finder])

file_finder = model.Parser(finders=[title_finder])
file_finder = parse.Parser(finders=[title_finder])

file = file_finder.parse_stream(stream)
# TODO: Figure out why i can't find subtitles.
assert file.get_contents_by_type(SubTitle)[0].subtitle == 'This is a subtitle.'
assert file.get_contents_by_type(SubTitle)[0].contents[0].text == 'with subtitle contents.'


def test_content_finder_integration():
"""Test using the actual format of the old entries."""
path = os.path.split(__file__)[0]
file = os.path.join(path, 'bin', 'context.md')

reader = readers.FileReader(filepath=file, encoding='utf-8')



0 comments on commit 27ebd69

Please sign in to comment.