Skip to content

Commit

Permalink
Add basic CLI support
Browse files Browse the repository at this point in the history
Fix error tests
Fix yml save
  • Loading branch information
kororo committed Jul 29, 2018
1 parent 923713f commit a70ae56
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 24 deletions.
14 changes: 14 additions & 0 deletions excelcy/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import sys

from excelcy import ExcelCy


def main(argv: list = None):
# quick CLI execution
args = argv or sys.argv
if args[1] == 'execute':
excelcy = ExcelCy.execute(file_path=args[2])


if __name__ == '__main__':
main()
5 changes: 3 additions & 2 deletions excelcy/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ def load(self, file_path: str):

def _save_yml(self, file_path: str, kind: list):
data = self.as_dict()
for name, _ in data.items():
names = list(data.keys())
for name in names:
if name not in kind:
del data[name]
utils.yaml_save(file_path=file_path, data=data)
Expand Down Expand Up @@ -328,7 +329,7 @@ def convert(header: list, registry: Registry) -> list:
utils.excel_save(sheets=sheets, file_path=file_path)

def save(self, file_path: str, kind: list = None):
kind = kind or ['phase', 'prepare', 'train', 'config']
kind = kind or ['phase', 'source', 'prepare', 'train', 'config']
file_name, file_ext = os.path.splitext(file_path)
processor = getattr(self, '_save_%s' % file_ext[1:], None)
if processor:
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def get_requirements(r: str):
url='https://github.com/kororo/excelcy',
license='MIT',
keywords=[
'spacy', 'spacy-pipeline', 'spacy-nlp', 'nlp', 'python', 'python3', 'entity', 'training', 'excel', 'xlsx', 'spacy-extensions'
'spacy', 'spacy-pipeline', 'spacy-nlp', 'nlp', 'python', 'python3', 'entity', 'training', 'excel', 'xlsx',
'spacy-extensions'
],
classifiers=[
'Development Status :: 4 - Beta',
Expand All @@ -54,4 +55,7 @@ def get_requirements(r: str):
install_requires=REQUIRES,
tests_require=['coverage', 'pytest'],
packages=find_packages(),
entry_points={
'console_scripts': ['excelcy=excelcy.cli:main'],
}
)
Binary file added tests/data/source/source_03.xlsx
Binary file not shown.
Binary file modified tests/data/test_data_03.xlsx
Binary file not shown.
Binary file modified tests/data/test_data_04.xlsx
Binary file not shown.
10 changes: 5 additions & 5 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ def assert_training(self, file_path: str, entity_tests: dict = None):
train_ents = set([(gold.subtext, gold.entity) for _, gold in train.items.items()])
doc = nlp(train.text)
ents = set([(ent.text, ent.label_) for ent in doc.ents])
# verify based on data
assert train_ents <= ents
# verify if test given
test = (entity_tests or {}).get(idx, set())
assert test <= ents
for ent in ents:
assert ent in train_ents

def extract_storage(self, storage: Storage):
data = storage.as_dict()
# remove phase
if data.get('phase'):
del data['phase']
# clean up the offset
for _, train in data['train']['items'].items():
for _, gold in train['items'].items():
Expand Down
9 changes: 1 addition & 8 deletions tests/test_excelcy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,4 @@ def test_save(self):
def test_execute_phases(self):
""" Test: executing phases """

excelcy = ExcelCy.execute(file_path=self.get_test_data_path(fs_path='test_data_04.xlsx'))
file_path = self.get_test_tmp_path(fs_path='test_data_04.xlsx')
excelcy.save_storage(file_path=file_path)

data = self.extract_storage(storage=excelcy.storage)
excelcy.load(file_path=file_path)
data2 = self.extract_storage(storage=excelcy.storage)
assert data == data2
self.assert_training(file_path=self.get_test_data_path('test_data_04.xlsx'))
9 changes: 7 additions & 2 deletions tests/test_readme.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from excelcy import ExcelCy
from excelcy import ExcelCy, cli
from excelcy.storage import Config
from tests.test_base import BaseTestCase

Expand Down Expand Up @@ -33,7 +33,7 @@ def test_readme_03(self):
assert excelcy.nlp('Robertus Johansyah is maintainer ExcelCy').ents[0].label_ == 'PERSON'

def test_readme_04(self):
""" Test: test real world scenario """
""" Test: code snippet found in README.rst """

# load first and confirm Himalayas is PRODUCT
excelcy = ExcelCy.execute(file_path=self.get_test_data_path(fs_path='test_data_05.xlsx'))
Expand All @@ -44,3 +44,8 @@ def test_readme_04(self):
excelcy = ExcelCy.execute(file_path=self.get_test_data_path(fs_path='test_data_05a.xlsx'))
gold = excelcy.storage.train.items.get('1').items.get('1.1')
assert gold.subtext == 'Himalayas' and gold.entity == 'FAC'

def test_readme_05(self):
""" Test: code snippet found in README.rst """

cli.main(['', 'execute', self.get_test_data_path('test_data_01.xlsx')])
12 changes: 6 additions & 6 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@ class StorageTestCase(BaseTestCase):
def test_load_save_excel(self):
storage = Storage()
storage.load(file_path=self.get_test_data_path(fs_path='test_data_03.xlsx'))
data1 = storage.as_dict()
data = self.extract_storage(storage=storage)
tmp_path = self.get_test_tmp_path(fs_path='test_data_03.xlsx')
storage.save(file_path=tmp_path)
storage.load(file_path=tmp_path)
data2 = storage.as_dict()
assert data1 == data2
data2 = self.extract_storage(storage=storage)
assert data == data2

def test_load_save_yml(self):
storage = Storage()
storage.load(file_path=self.get_test_data_path(fs_path='test_data_03.xlsx'))
data1 = storage.as_dict()
data = self.extract_storage(storage=storage)
tmp_path = self.get_test_tmp_path(fs_path='test_data_03.yml')
storage.save(file_path=tmp_path)
storage.load(file_path=tmp_path)
data2 = storage.as_dict()
assert data1 == data2
data2 = self.extract_storage(storage=storage)
assert data == data2

0 comments on commit a70ae56

Please sign in to comment.