Skip to content

Commit

Permalink
Merge ab8f7ad into 1800a86
Browse files Browse the repository at this point in the history
  • Loading branch information
nielstron committed Sep 20, 2018
2 parents 1800a86 + ab8f7ad commit 035e5e0
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 75 deletions.
10 changes: 4 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,13 @@ matrix:
install:
- pip install coverage
- pip install coveralls
- pip install numpy
- pip install scipy
- pip install scikit-learn
- pip install wikipedia
- pip install stemming
- pip install .

script:
- coverage run --source=quantulum3 setup.py test
- coverage run -a --source=quantulum3 setup.py test -s quantulum3.tests.test_setup
- coverage run -a --source=quantulum3 setup.py test -s quantulum3.tests.test_no_classifier
- pip install -r requirements_classifier.txt
- coverage run -a --source=quantulum3 setup.py test -s quantulum3.tests.test_classifier
- coverage report

after_success:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ the packages `stemming` and `wikipedia`. Use the method `train_classifier` in `q
You could also [download requirements_classifier.txt](https://raw.githubusercontent.com/nielstron/quantulum3/dev/requirements_classifier.txt)
and run
```bash
$ pip install requirements_classifier.txt
$ pip install -r requirements_classifier.txt
```

Contributing
Expand Down
2 changes: 1 addition & 1 deletion quantulum3/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
:mod:`Quantulum` classes.
"""

# Dependences
# Dependencies
import inflect
import num2words

Expand Down
21 changes: 17 additions & 4 deletions quantulum3/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import string
import pkg_resources

# Dependences
# Semi-dependencies
try:
from sklearn.linear_model import SGDClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
Expand All @@ -21,6 +21,16 @@
SGDClassifier, TfidfVectorizer = None, None
USE_CLF = False

try:
import wikipedia
except ImportError:
wikipedia = None

try:
from stemming.porter2 import stem
except ImportError:
stem = None

# Quantulum
from . import load

Expand All @@ -31,7 +41,9 @@ def download_wiki(store=True): # pragma: no cover
Download WikiPedia pages of ambiguous units.
@:param store (bool) store wikipedia data in wiki.json file
"""
import wikipedia
if not wikipedia:
print("Cannot download wikipedia pages. Install package wikipedia first.")
return

ambiguous = [i for i in list(load.UNITS.items()) if len(i[1]) > 1]
ambiguous += [i for i in list(load.DERIVED_ENT.items()) if len(i[1]) > 1]
Expand Down Expand Up @@ -67,7 +79,8 @@ def clean_text(text):
"""
Clean text for TFIDF
"""
from stemming.porter2 import stem
if not stem:
raise ImportError("Module stemming is not installed.")

my_regex = re.compile(r'[%s]' % re.escape(string.punctuation))
new_text = my_regex.sub(' ', text)
Expand Down Expand Up @@ -151,7 +164,7 @@ def load_classifier():
obj = pickle.load(file, encoding='latin1')

cur_scipy_version = pkg_resources.get_distribution('scikit-learn').version
if cur_scipy_version != obj.get('scikit-learn_version'):
if cur_scipy_version != obj.get('scikit-learn_version'): # pragma: no cover
logging.warning(
"The classifier was built using a different scikit-learn version (={}, !={}). The disambiguation tool could behave unexpectedly. Consider running classifier.train_classfier()"
.format(obj.get('scikit-learn_version'), cur_scipy_version))
Expand Down
2 changes: 1 addition & 1 deletion quantulum3/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from collections import defaultdict
import re

# Dependences
# Dependencies
import inflect

# Quantulum
Expand Down
1 change: 1 addition & 0 deletions quantulum3/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from collections import defaultdict
from math import pow

# Quantulum
from . import load
from . import regex as reg
from . import classes as cls
Expand Down
75 changes: 75 additions & 0 deletions quantulum3/tests/test_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
:mod:`Quantulum` tests.
"""

# Standard library
import os
import json
import pickle
import urllib.request
import unittest

# Quantulum
from .. import load
from .. import parser as p
from .. import classifier as clf
from .test_setup import load_expand_tests, load_quantity_tests

COLOR1 = '\033[94m%s\033[0m'
COLOR2 = '\033[91m%s\033[0m'
TOPDIR = os.path.dirname(__file__) or "."


################################################################################
class ClassifierTest(unittest.TestCase):
"""Test suite for the quantulum3 project."""

def test_parse_classifier(self):
""" Test that parsing works with classifier usage """
all_tests = load_quantity_tests(False) + load_quantity_tests(True)
# forcedly activate classifier
clf.USE_CLF = True
for test in sorted(all_tests, key=lambda x: len(x['req'])):
quants = p.parse(test['req'])
self.assertEqual(
quants, test['res'],
"{} \n {}".format([quant.__dict__ for quant in quants],
[quant.__dict__ for quant in test['res']]))

def test_training(self):
""" Test that classifier training works """
# Test that no errors are thrown during training
obj = clf.train_classifier(download=False, store=False)
# Test that the classifier works with the currently downloaded data
clf.TFIDF_MODEL, clf.CLF, clf.TARGET_NAMES = obj['tfidf_model'], obj[
'clf'], obj['target_names']
# Don't run tests with ambiguities because result is non-detemernistic

def test_expand(self):
all_tests = load_expand_tests()
for test in all_tests:
result = p.inline_parse_and_expand(test['req'])
self.assertEqual(result, test['res'])

def test_classifier_up_to_date(self):
""" Test that the classifier has been built with the latest version of scikit-learn """
path = os.path.join(load.TOPDIR, 'clf.pickle')
with open(path, 'rb') as clf_file:
obj = pickle.load(clf_file, encoding='latin1')
clf_version = obj['scikit-learn_version']
with urllib.request.urlopen(
"https://pypi.org/pypi/scikit-learn/json") as response:
cur_version = json.loads(
response.read().decode('utf-8'))['info']['version']
self.assertEqual(
clf_version, cur_version,
"Classifier has been built with scikit-learn version {}, while the newest version is {}. Please update scikit-learn."
.format(clf_version, cur_version))


################################################################################
if __name__ == '__main__': # pragma: no cover

unittest.main()
38 changes: 38 additions & 0 deletions quantulum3/tests/test_no_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
:mod:`Quantulum` tests.
"""

# Standard library
import os
import unittest

# Quantulum
from .. import parser as p
from .test_setup import load_quantity_tests

COLOR1 = '\033[94m%s\033[0m'
COLOR2 = '\033[91m%s\033[0m'
TOPDIR = os.path.dirname(__file__) or "."


################################################################################
class ParsingTest(unittest.TestCase):
"""Test suite for the quantulum3 project."""

def test_parse_no_classifier(self):
""" Test that parsing works without classifier usage """
all_tests = load_quantity_tests(False)
for test in sorted(all_tests, key=lambda x: len(x['req'])):
quants = p.parse(test['req'])
self.assertEqual(
quants, test['res'], "\nExcpected: {1} \nGot: {0}".format(
[quant.__dict__ for quant in quants],
[quant.__dict__ for quant in test['res']]))


################################################################################
if __name__ == '__main__': # pragma: no cover

unittest.main()
71 changes: 9 additions & 62 deletions quantulum3/tests/tests.py → quantulum3/tests/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
import os
import re
import json
import pickle
import urllib.request
import unittest

# Dependences
import wikipedia
# Dependencies
try:
import wikipedia
except ImportError:
wikipedia = None

# Quantulum
from .. import load
from .. import parser as p
from .. import classes as cls
from .. import classifier as clf

COLOR1 = '\033[94m%s\033[0m'
COLOR2 = '\033[91m%s\033[0m'
Expand All @@ -36,6 +36,9 @@ def wiki_test(page='CERN'): # pragma: no cover
Hubble_Space_Telescope,
Herschel_Space_Observatory
"""
if not wikipedia:
print("Cannot activate wiki_test. Please install the package wikipedia first.")
return

content = wikipedia.page(page).content
parsed = p.parse(content)
Expand Down Expand Up @@ -141,7 +144,7 @@ def load_expand_tests():


################################################################################
class EndToEndTests(unittest.TestCase):
class SetupTest(unittest.TestCase):
"""Test suite for the quantulum3 project."""

def test_load_tests(self):
Expand All @@ -150,47 +153,6 @@ def test_load_tests(self):
self.assertFalse(load_quantity_tests(False) is None)
self.assertFalse(load_expand_tests() is None)

def test_parse_classifier(self):
""" Test that parsing works with classifier usage """
all_tests = load_quantity_tests(False) + load_quantity_tests(True)
# forcedly activate classifier
clf.USE_CLF = True
for test in sorted(all_tests, key=lambda x: len(x['req'])):
quants = p.parse(test['req'])
self.assertEqual(
quants, test['res'],
"{} \n {}".format([quant.__dict__ for quant in quants],
[quant.__dict__ for quant in test['res']]))

def test_parse_no_classifier(self):
""" Test that parsing works without classifier usage """
all_tests = load_quantity_tests(False)
# forcedly deactivate classifier
clf.USE_CLF = False
for test in sorted(all_tests, key=lambda x: len(x['req'])):
quants = p.parse(test['req'])
self.assertEqual(
quants, test['res'], "\nExcpected: {1} \nGot: {0}".format(
[quant.__dict__ for quant in quants],
[quant.__dict__ for quant in test['res']]))

def test_training(self):
""" Test that classifier training works """
# TODO - update test to not overwirte existing clf.pickle and wiki.json files.
# Test that no errors are thrown during training
obj = clf.train_classifier(download=False, store=False)
# Test that the classifier works with the currently downloaded data
clf.TFIDF_MODEL, clf.CLF, clf.TARGET_NAMES = obj['tfidf_model'], obj[
'clf'], obj['target_names']
# Don't run with ambiguities because result is non-detemernistic
self.test_parse_no_classifier()

def test_expand(self):
all_tests = load_expand_tests()
for test in all_tests:
result = p.inline_parse_and_expand(test['req'])
self.assertEqual(result, test['res'])

def test_build_script(self):
""" Test that the build script has run correctly """
# Read raw 4 letter file
Expand All @@ -200,21 +162,6 @@ def test_build_script(self):
load.COMMON_WORDS[length], word_set,
"Build script has not been run since change to critical files")

def test_classifier_up_to_date(self):
""" Test that the classifier has been built with the latest version of scikit-learn """
path = os.path.join(load.TOPDIR, 'clf.pickle')
with open(path, 'rb') as clf_file:
obj = pickle.load(clf_file, encoding='latin1')
clf_version = obj['scikit-learn_version']
with urllib.request.urlopen(
"https://pypi.org/pypi/scikit-learn/json") as response:
cur_version = json.loads(
response.read().decode('utf-8'))['info']['version']
self.assertEqual(
clf_version, cur_version,
"Classifier has been built with scikit-learn version {}, while the newest version is {}. Please update scikit-learn."
.format(clf_version, cur_version))


################################################################################
if __name__ == '__main__': # pragma: no cover
Expand Down

0 comments on commit 035e5e0

Please sign in to comment.