Merge ab8f7ad into 1800a86

nielstron · Sep 20, 2018 · 035e5e0 · 035e5e0
2 parents 1800a86 + ab8f7ad
commit 035e5e0
Show file tree

Hide file tree

Showing 9 changed files with 147 additions and 75 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -14,15 +14,13 @@ matrix:
 install:
   - pip install coverage
   - pip install coveralls
-  - pip install numpy
-  - pip install scipy
-  - pip install scikit-learn
-  - pip install wikipedia
-  - pip install stemming
   - pip install .
 
 script:
-  - coverage run --source=quantulum3 setup.py test
+  - coverage run -a --source=quantulum3 setup.py test -s quantulum3.tests.test_setup
+  - coverage run -a --source=quantulum3 setup.py test -s quantulum3.tests.test_no_classifier
+  - pip install -r requirements_classifier.txt
+  - coverage run -a --source=quantulum3 setup.py test -s quantulum3.tests.test_classifier
   - coverage report
 
 after_success:

diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ the packages `stemming` and `wikipedia`. Use the method `train_classifier` in `q
 You could also [download requirements_classifier.txt](https://raw.githubusercontent.com/nielstron/quantulum3/dev/requirements_classifier.txt)
 and run 
 ```bash
-$ pip install requirements_classifier.txt
+$ pip install -r requirements_classifier.txt
 ```
 
 Contributing

diff --git a/quantulum3/classes.py b/quantulum3/classes.py
@@ -4,7 +4,7 @@
 :mod:`Quantulum` classes.
 """
 
-# Dependences
+# Dependencies
 import inflect
 import num2words
 

diff --git a/quantulum3/classifier.py b/quantulum3/classifier.py
@@ -12,7 +12,7 @@
 import string
 import pkg_resources
 
-# Dependences
+# Semi-dependencies
 try:
     from sklearn.linear_model import SGDClassifier
     from sklearn.feature_extraction.text import TfidfVectorizer
@@ -21,6 +21,16 @@
     SGDClassifier, TfidfVectorizer = None, None
     USE_CLF = False
 
+try:
+    import wikipedia
+except ImportError:
+    wikipedia = None
+
+try:
+    from stemming.porter2 import stem
+except ImportError:
+    stem = None
+
 # Quantulum
 from . import load
 
@@ -31,7 +41,9 @@ def download_wiki(store=True):  # pragma: no cover
     Download WikiPedia pages of ambiguous units.
     @:param store (bool) store wikipedia data in wiki.json file
     """
-    import wikipedia
+    if not wikipedia:
+        print("Cannot download wikipedia pages. Install package wikipedia first.")
+        return
 
     ambiguous = [i for i in list(load.UNITS.items()) if len(i[1]) > 1]
     ambiguous += [i for i in list(load.DERIVED_ENT.items()) if len(i[1]) > 1]
@@ -67,7 +79,8 @@ def clean_text(text):
     """
     Clean text for TFIDF
     """
-    from stemming.porter2 import stem
+    if not stem:
+        raise ImportError("Module stemming is not installed.")
 
     my_regex = re.compile(r'[%s]' % re.escape(string.punctuation))
     new_text = my_regex.sub(' ', text)
@@ -151,7 +164,7 @@ def load_classifier():
         obj = pickle.load(file, encoding='latin1')
 
     cur_scipy_version = pkg_resources.get_distribution('scikit-learn').version
-    if cur_scipy_version != obj.get('scikit-learn_version'):
+    if cur_scipy_version != obj.get('scikit-learn_version'):  # pragma: no cover
         logging.warning(
             "The classifier was built using a different scikit-learn version (={}, !={}). The disambiguation tool could behave unexpectedly. Consider running classifier.train_classfier()"
             .format(obj.get('scikit-learn_version'), cur_scipy_version))

diff --git a/quantulum3/load.py b/quantulum3/load.py
@@ -12,7 +12,7 @@
 from collections import defaultdict
 import re
 
-# Dependences
+# Dependencies
 import inflect
 
 # Quantulum

diff --git a/quantulum3/parser.py b/quantulum3/parser.py
@@ -11,6 +11,7 @@
 from collections import defaultdict
 from math import pow
 
+# Quantulum
 from . import load
 from . import regex as reg
 from . import classes as cls

diff --git a/quantulum3/tests/test_classifier.py b/quantulum3/tests/test_classifier.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+:mod:`Quantulum` tests.
+"""
+
+# Standard library
+import os
+import json
+import pickle
+import urllib.request
+import unittest
+
+# Quantulum
+from .. import load
+from .. import parser as p
+from .. import classifier as clf
+from .test_setup import load_expand_tests, load_quantity_tests
+
+COLOR1 = '\033[94m%s\033[0m'
+COLOR2 = '\033[91m%s\033[0m'
+TOPDIR = os.path.dirname(__file__) or "."
+
+
+################################################################################
+class ClassifierTest(unittest.TestCase):
+    """Test suite for the quantulum3 project."""
+
+    def test_parse_classifier(self):
+        """ Test that parsing works with classifier usage """
+        all_tests = load_quantity_tests(False) + load_quantity_tests(True)
+        # forcedly activate classifier
+        clf.USE_CLF = True
+        for test in sorted(all_tests, key=lambda x: len(x['req'])):
+            quants = p.parse(test['req'])
+            self.assertEqual(
+                quants, test['res'],
+                "{} \n {}".format([quant.__dict__ for quant in quants],
+                                  [quant.__dict__ for quant in test['res']]))
+
+    def test_training(self):
+        """ Test that classifier training works """
+        # Test that no errors are thrown during training
+        obj = clf.train_classifier(download=False, store=False)
+        # Test that the classifier works with the currently downloaded data
+        clf.TFIDF_MODEL, clf.CLF, clf.TARGET_NAMES = obj['tfidf_model'], obj[
+            'clf'], obj['target_names']
+        # Don't run tests with ambiguities because result is non-detemernistic
+
+    def test_expand(self):
+        all_tests = load_expand_tests()
+        for test in all_tests:
+            result = p.inline_parse_and_expand(test['req'])
+            self.assertEqual(result, test['res'])
+
+    def test_classifier_up_to_date(self):
+        """ Test that the classifier has been built with the latest version of scikit-learn """
+        path = os.path.join(load.TOPDIR, 'clf.pickle')
+        with open(path, 'rb') as clf_file:
+            obj = pickle.load(clf_file, encoding='latin1')
+        clf_version = obj['scikit-learn_version']
+        with urllib.request.urlopen(
+                "https://pypi.org/pypi/scikit-learn/json") as response:
+            cur_version = json.loads(
+                response.read().decode('utf-8'))['info']['version']
+        self.assertEqual(
+            clf_version, cur_version,
+            "Classifier has been built with scikit-learn version {}, while the newest version is {}. Please update scikit-learn."
+            .format(clf_version, cur_version))
+
+
+################################################################################
+if __name__ == '__main__':  # pragma: no cover
+
+    unittest.main()
diff --git a/quantulum3/tests/test_no_classifier.py b/quantulum3/tests/test_no_classifier.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+:mod:`Quantulum` tests.
+"""
+
+# Standard library
+import os
+import unittest
+
+# Quantulum
+from .. import parser as p
+from .test_setup import load_quantity_tests
+
+COLOR1 = '\033[94m%s\033[0m'
+COLOR2 = '\033[91m%s\033[0m'
+TOPDIR = os.path.dirname(__file__) or "."
+
+
+################################################################################
+class ParsingTest(unittest.TestCase):
+    """Test suite for the quantulum3 project."""
+
+    def test_parse_no_classifier(self):
+        """ Test that parsing works without classifier usage """
+        all_tests = load_quantity_tests(False)
+        for test in sorted(all_tests, key=lambda x: len(x['req'])):
+            quants = p.parse(test['req'])
+            self.assertEqual(
+                quants, test['res'], "\nExcpected: {1} \nGot: {0}".format(
+                    [quant.__dict__ for quant in quants],
+                    [quant.__dict__ for quant in test['res']]))
+
+
+################################################################################
+if __name__ == '__main__':  # pragma: no cover
+
+    unittest.main()
diff --git a/quantulum3/tests/tests.py → quantulum3/tests/test_setup.py b/quantulum3/tests/tests.py → quantulum3/tests/test_setup.py
@@ -8,18 +8,18 @@
 import os
 import re
 import json
-import pickle
-import urllib.request
 import unittest
 
-# Dependences
-import wikipedia
+# Dependencies
+try:
+    import wikipedia
+except ImportError:
+    wikipedia = None
 
 # Quantulum
 from .. import load
 from .. import parser as p
 from .. import classes as cls
-from .. import classifier as clf
 
 COLOR1 = '\033[94m%s\033[0m'
 COLOR2 = '\033[91m%s\033[0m'
@@ -36,6 +36,9 @@ def wiki_test(page='CERN'):  # pragma: no cover
         Hubble_Space_Telescope,
         Herschel_Space_Observatory
     """
+    if not wikipedia:
+        print("Cannot activate wiki_test. Please install the package wikipedia first.")
+        return
 
     content = wikipedia.page(page).content
     parsed = p.parse(content)
@@ -141,7 +144,7 @@ def load_expand_tests():
 
 
 ################################################################################
-class EndToEndTests(unittest.TestCase):
+class SetupTest(unittest.TestCase):
     """Test suite for the quantulum3 project."""
 
     def test_load_tests(self):
@@ -150,47 +153,6 @@ def test_load_tests(self):
         self.assertFalse(load_quantity_tests(False) is None)
         self.assertFalse(load_expand_tests() is None)
 
-    def test_parse_classifier(self):
-        """ Test that parsing works with classifier usage """
-        all_tests = load_quantity_tests(False) + load_quantity_tests(True)
-        # forcedly activate classifier
-        clf.USE_CLF = True
-        for test in sorted(all_tests, key=lambda x: len(x['req'])):
-            quants = p.parse(test['req'])
-            self.assertEqual(
-                quants, test['res'],
-                "{} \n {}".format([quant.__dict__ for quant in quants],
-                                  [quant.__dict__ for quant in test['res']]))
-
-    def test_parse_no_classifier(self):
-        """ Test that parsing works without classifier usage """
-        all_tests = load_quantity_tests(False)
-        # forcedly deactivate classifier
-        clf.USE_CLF = False
-        for test in sorted(all_tests, key=lambda x: len(x['req'])):
-            quants = p.parse(test['req'])
-            self.assertEqual(
-                quants, test['res'], "\nExcpected: {1} \nGot: {0}".format(
-                    [quant.__dict__ for quant in quants],
-                    [quant.__dict__ for quant in test['res']]))
-
-    def test_training(self):
-        """ Test that classifier training works """
-        # TODO - update test to not overwirte existing clf.pickle and wiki.json files.
-        # Test that no errors are thrown during training
-        obj = clf.train_classifier(download=False, store=False)
-        # Test that the classifier works with the currently downloaded data
-        clf.TFIDF_MODEL, clf.CLF, clf.TARGET_NAMES = obj['tfidf_model'], obj[
-            'clf'], obj['target_names']
-        # Don't run with ambiguities because result is non-detemernistic
-        self.test_parse_no_classifier()
-
-    def test_expand(self):
-        all_tests = load_expand_tests()
-        for test in all_tests:
-            result = p.inline_parse_and_expand(test['req'])
-            self.assertEqual(result, test['res'])
-
     def test_build_script(self):
         """ Test that the build script has run correctly """
         # Read raw 4 letter file
@@ -200,21 +162,6 @@ def test_build_script(self):
                 load.COMMON_WORDS[length], word_set,
                 "Build script has not been run since change to critical files")
 
-    def test_classifier_up_to_date(self):
-        """ Test that the classifier has been built with the latest version of scikit-learn """
-        path = os.path.join(load.TOPDIR, 'clf.pickle')
-        with open(path, 'rb') as clf_file:
-            obj = pickle.load(clf_file, encoding='latin1')
-        clf_version = obj['scikit-learn_version']
-        with urllib.request.urlopen(
-                "https://pypi.org/pypi/scikit-learn/json") as response:
-            cur_version = json.loads(
-                response.read().decode('utf-8'))['info']['version']
-        self.assertEqual(
-            clf_version, cur_version,
-            "Classifier has been built with scikit-learn version {}, while the newest version is {}. Please update scikit-learn."
-            .format(clf_version, cur_version))
-
 
 ################################################################################
 if __name__ == '__main__':  # pragma: no cover