diff --git a/.travis.yml b/.travis.yml index c5cdb24..8e20722 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,15 +14,17 @@ matrix: install: - pip install coverage - pip install coveralls - - pip install numpy - - pip install scipy - - pip install scikit-learn - - pip install wikipedia - - pip install stemming - pip install . script: - - coverage run --source=quantulum3 setup.py test + - coverage run --source=quantulum3 setup.py test -s quantulum3.tests.test_setup + - coverage run --source=quantulum3 setup.py test -s quantulum3.tests.test_no_classifier + - pip install wikipedia + - pip install stemming + - pip install numpy + - pip install scipy + - pip install scikit-learn + - coverage run --source=quantulum3 setup.py test -s quantulum3.tests.test_classifier - coverage report after_success: diff --git a/quantulum3/classes.py b/quantulum3/classes.py index 11a8772..a9fc0b7 100644 --- a/quantulum3/classes.py +++ b/quantulum3/classes.py @@ -4,7 +4,7 @@ :mod:`Quantulum` classes. """ -# Dependences +# Dependencies import inflect import num2words diff --git a/quantulum3/classifier.py b/quantulum3/classifier.py index 3066595..b9f1fe7 100644 --- a/quantulum3/classifier.py +++ b/quantulum3/classifier.py @@ -12,7 +12,7 @@ import string import pkg_resources -# Dependences +# Semi-dependencies try: from sklearn.linear_model import SGDClassifier from sklearn.feature_extraction.text import TfidfVectorizer @@ -21,6 +21,16 @@ SGDClassifier, TfidfVectorizer = None, None USE_CLF = False +try: + import wikipedia +except ImportError: + wikipedia = None + +try: + from stemming.porter2 import stem +except ImportError: + stem = None + # Quantulum from . import load @@ -31,7 +41,9 @@ def download_wiki(store=True): # pragma: no cover Download WikiPedia pages of ambiguous units. @:param store (bool) store wikipedia data in wiki.json file """ - import wikipedia + if not wikipedia: + print("Cannot download wikipedia pages. Install package wikipedia first.") + return ambiguous = [i for i in list(load.UNITS.items()) if len(i[1]) > 1] ambiguous += [i for i in list(load.DERIVED_ENT.items()) if len(i[1]) > 1] @@ -67,7 +79,8 @@ def clean_text(text): """ Clean text for TFIDF """ - from stemming.porter2 import stem + if not stem: + raise ImportError("Module stemming is not installed.") my_regex = re.compile(r'[%s]' % re.escape(string.punctuation)) new_text = my_regex.sub(' ', text) @@ -151,7 +164,7 @@ def load_classifier(): obj = pickle.load(file, encoding='latin1') cur_scipy_version = pkg_resources.get_distribution('scikit-learn').version - if cur_scipy_version != obj.get('scikit-learn_version'): + if cur_scipy_version != obj.get('scikit-learn_version'): # pragma: no cover logging.warning( "The classifier was built using a different scikit-learn version (={}, !={}). The disambiguation tool could behave unexpectedly. Consider running classifier.train_classfier()" .format(obj.get('scikit-learn_version'), cur_scipy_version)) diff --git a/quantulum3/load.py b/quantulum3/load.py index 53aab70..b081e4d 100644 --- a/quantulum3/load.py +++ b/quantulum3/load.py @@ -12,7 +12,7 @@ from collections import defaultdict import re -# Dependences +# Dependencies import inflect # Quantulum diff --git a/quantulum3/parser.py b/quantulum3/parser.py index 44bf359..a1ecefa 100644 --- a/quantulum3/parser.py +++ b/quantulum3/parser.py @@ -11,6 +11,7 @@ from collections import defaultdict from math import pow +# Quantulum from . import load from . import regex as reg from . import classes as cls diff --git a/quantulum3/tests/test_classifier.py b/quantulum3/tests/test_classifier.py new file mode 100644 index 0000000..35aa8f8 --- /dev/null +++ b/quantulum3/tests/test_classifier.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +:mod:`Quantulum` tests. +""" + +# Standard library +import os +import json +import pickle +import urllib.request +import unittest + +# Quantulum +from .. import load +from .. import parser as p +from .. import classifier as clf +from .test_setup import load_expand_tests, load_quantity_tests + +COLOR1 = '\033[94m%s\033[0m' +COLOR2 = '\033[91m%s\033[0m' +TOPDIR = os.path.dirname(__file__) or "." + + +################################################################################ +class ClassifierTest(unittest.TestCase): + """Test suite for the quantulum3 project.""" + + def test_parse_classifier(self): + """ Test that parsing works with classifier usage """ + all_tests = load_quantity_tests(False) + load_quantity_tests(True) + # forcedly activate classifier + clf.USE_CLF = True + for test in sorted(all_tests, key=lambda x: len(x['req'])): + quants = p.parse(test['req']) + self.assertEqual( + quants, test['res'], + "{} \n {}".format([quant.__dict__ for quant in quants], + [quant.__dict__ for quant in test['res']])) + + def test_training(self): + """ Test that classifier training works """ + # Test that no errors are thrown during training + obj = clf.train_classifier(download=False, store=False) + # Test that the classifier works with the currently downloaded data + clf.TFIDF_MODEL, clf.CLF, clf.TARGET_NAMES = obj['tfidf_model'], obj[ + 'clf'], obj['target_names'] + # Don't run tests with ambiguities because result is non-detemernistic + + def test_expand(self): + all_tests = load_expand_tests() + for test in all_tests: + result = p.inline_parse_and_expand(test['req']) + self.assertEqual(result, test['res']) + + def test_classifier_up_to_date(self): + """ Test that the classifier has been built with the latest version of scikit-learn """ + path = os.path.join(load.TOPDIR, 'clf.pickle') + with open(path, 'rb') as clf_file: + obj = pickle.load(clf_file, encoding='latin1') + clf_version = obj['scikit-learn_version'] + with urllib.request.urlopen( + "https://pypi.org/pypi/scikit-learn/json") as response: + cur_version = json.loads( + response.read().decode('utf-8'))['info']['version'] + self.assertEqual( + clf_version, cur_version, + "Classifier has been built with scikit-learn version {}, while the newest version is {}. Please update scikit-learn." + .format(clf_version, cur_version)) + + +################################################################################ +if __name__ == '__main__': # pragma: no cover + + unittest.main() diff --git a/quantulum3/tests/test_no_classifier.py b/quantulum3/tests/test_no_classifier.py new file mode 100644 index 0000000..25d5216 --- /dev/null +++ b/quantulum3/tests/test_no_classifier.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +:mod:`Quantulum` tests. +""" + +# Standard library +import os +import unittest + +# Quantulum +from .. import parser as p +from .test_setup import load_quantity_tests + +COLOR1 = '\033[94m%s\033[0m' +COLOR2 = '\033[91m%s\033[0m' +TOPDIR = os.path.dirname(__file__) or "." + + +################################################################################ +class ParsingTest(unittest.TestCase): + """Test suite for the quantulum3 project.""" + + def test_parse_no_classifier(self): + """ Test that parsing works without classifier usage """ + all_tests = load_quantity_tests(False) + for test in sorted(all_tests, key=lambda x: len(x['req'])): + quants = p.parse(test['req']) + self.assertEqual( + quants, test['res'], "\nExcpected: {1} \nGot: {0}".format( + [quant.__dict__ for quant in quants], + [quant.__dict__ for quant in test['res']])) + + +################################################################################ +if __name__ == '__main__': # pragma: no cover + + unittest.main() diff --git a/quantulum3/tests/tests.py b/quantulum3/tests/test_setup.py similarity index 64% rename from quantulum3/tests/tests.py rename to quantulum3/tests/test_setup.py index bf08578..7b066e7 100644 --- a/quantulum3/tests/tests.py +++ b/quantulum3/tests/test_setup.py @@ -8,18 +8,18 @@ import os import re import json -import pickle -import urllib.request import unittest -# Dependences -import wikipedia +# Dependencies +try: + import wikipedia +except ImportError: + wikipedia = None # Quantulum from .. import load from .. import parser as p from .. import classes as cls -from .. import classifier as clf COLOR1 = '\033[94m%s\033[0m' COLOR2 = '\033[91m%s\033[0m' @@ -36,6 +36,9 @@ def wiki_test(page='CERN'): # pragma: no cover Hubble_Space_Telescope, Herschel_Space_Observatory """ + if not wikipedia: + print("Cannot activate wiki_test. Please install the package wikipedia first.") + return content = wikipedia.page(page).content parsed = p.parse(content) @@ -141,7 +144,7 @@ def load_expand_tests(): ################################################################################ -class EndToEndTests(unittest.TestCase): +class SetupTest(unittest.TestCase): """Test suite for the quantulum3 project.""" def test_load_tests(self): @@ -150,47 +153,6 @@ def test_load_tests(self): self.assertFalse(load_quantity_tests(False) is None) self.assertFalse(load_expand_tests() is None) - def test_parse_classifier(self): - """ Test that parsing works with classifier usage """ - all_tests = load_quantity_tests(False) + load_quantity_tests(True) - # forcedly activate classifier - clf.USE_CLF = True - for test in sorted(all_tests, key=lambda x: len(x['req'])): - quants = p.parse(test['req']) - self.assertEqual( - quants, test['res'], - "{} \n {}".format([quant.__dict__ for quant in quants], - [quant.__dict__ for quant in test['res']])) - - def test_parse_no_classifier(self): - """ Test that parsing works without classifier usage """ - all_tests = load_quantity_tests(False) - # forcedly deactivate classifier - clf.USE_CLF = False - for test in sorted(all_tests, key=lambda x: len(x['req'])): - quants = p.parse(test['req']) - self.assertEqual( - quants, test['res'], "\nExcpected: {1} \nGot: {0}".format( - [quant.__dict__ for quant in quants], - [quant.__dict__ for quant in test['res']])) - - def test_training(self): - """ Test that classifier training works """ - # TODO - update test to not overwirte existing clf.pickle and wiki.json files. - # Test that no errors are thrown during training - obj = clf.train_classifier(download=False, store=False) - # Test that the classifier works with the currently downloaded data - clf.TFIDF_MODEL, clf.CLF, clf.TARGET_NAMES = obj['tfidf_model'], obj[ - 'clf'], obj['target_names'] - # Don't run with ambiguities because result is non-detemernistic - self.test_parse_no_classifier() - - def test_expand(self): - all_tests = load_expand_tests() - for test in all_tests: - result = p.inline_parse_and_expand(test['req']) - self.assertEqual(result, test['res']) - def test_build_script(self): """ Test that the build script has run correctly """ # Read raw 4 letter file @@ -200,21 +162,6 @@ def test_build_script(self): load.COMMON_WORDS[length], word_set, "Build script has not been run since change to critical files") - def test_classifier_up_to_date(self): - """ Test that the classifier has been built with the latest version of scikit-learn """ - path = os.path.join(load.TOPDIR, 'clf.pickle') - with open(path, 'rb') as clf_file: - obj = pickle.load(clf_file, encoding='latin1') - clf_version = obj['scikit-learn_version'] - with urllib.request.urlopen( - "https://pypi.org/pypi/scikit-learn/json") as response: - cur_version = json.loads( - response.read().decode('utf-8'))['info']['version'] - self.assertEqual( - clf_version, cur_version, - "Classifier has been built with scikit-learn version {}, while the newest version is {}. Please update scikit-learn." - .format(clf_version, cur_version)) - ################################################################################ if __name__ == '__main__': # pragma: no cover