-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for
-
and *
bullet style in the metadata cell
Made the metadata extraction less strict: add support for `-` and `*` bullet style, title doesn't have to be first line, title can also be '##' or higher as well. Simplified error handling during extraction, but made exception more helpful. Also added some unittests for the metadata cell parsing
- Loading branch information
Showing
4 changed files
with
96 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import textwrap | ||
|
||
from ..preprocess import Metadata | ||
|
||
import unittest | ||
|
||
|
||
class MetadataTest(unittest.TestCase): | ||
|
||
def test_extract_cell_metadata_basic(self): | ||
metadata = Metadata.extract_cell_metadata(textwrap.dedent("""\ | ||
# Animal Farm | ||
+ Author: George Orwell | ||
+ Date: 1945-08-17 | ||
""")) | ||
expected = { | ||
'title': 'Animal Farm', | ||
'author': 'George Orwell', | ||
'date': '1945-08-17', | ||
} | ||
self.assertEqual(expected, metadata) | ||
|
||
def test_extract_cell_metadata_other_bullets(self): | ||
metadata = Metadata.extract_cell_metadata(textwrap.dedent("""\ | ||
# Animal Farm | ||
- Author: George Orwell | ||
* Date: 1945-08-17 | ||
Tags: books | ||
""")) | ||
expected = { | ||
'title': 'Animal Farm', | ||
'author': 'George Orwell', | ||
'date': '1945-08-17', | ||
'tags': 'books', | ||
} | ||
self.assertEqual(expected, metadata) | ||
|
||
def test_extract_cell_metadata_title_variation(self): | ||
metadata = Metadata.extract_cell_metadata(textwrap.dedent("""\ | ||
+ Author: George Orwell | ||
## Animal Farm | ||
+ Date: 1945-08-17 | ||
""")) | ||
expected = { | ||
'title': 'Animal Farm', | ||
'author': 'George Orwell', | ||
'date': '1945-08-17', | ||
} | ||
self.assertEqual(expected, metadata) | ||
|
||
def test_extract_cell_metadata_whitespace(self): | ||
metadata = Metadata.extract_cell_metadata(textwrap.dedent("""\ | ||
# Animal Farm | ||
+ Author : George Orwell | ||
+ Date : 1945-08-17 | ||
""")) | ||
expected = { | ||
'title': 'Animal Farm', | ||
'author': 'George Orwell', | ||
'date': '1945-08-17', | ||
} | ||
self.assertEqual(expected, metadata) |