Merge 0b1a06b into 18e4083

nielstron · Sep 19, 2018 · 1b74bcf · 1b74bcf
2 parents 18e4083 + 0b1a06b
commit 1b74bcf
Show file tree

Hide file tree

Showing 7 changed files with 188 additions and 196 deletions.
diff --git a/quantulum3/classes.py b/quantulum3/classes.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-'''
+"""
 :mod:`Quantulum` classes.
-'''
+"""
 
 # Dependences
 import inflect
@@ -14,9 +14,9 @@
 
 
 class Quantity(object):
-    '''
+    """
     Class for a quantity (e.g. "4.2 gallons").
-    '''
+    """
 
     def __init__(self,
                  value=None,
@@ -59,10 +59,10 @@ def as_string(self):  # pragma: no cover
         return '{} {}'.format(self.value, self.unit.name)
 
     def to_spoken(self):
-        '''
+        """
         Express quantity as a speakable string
         :return: Speakable version of this quantity
-        '''
+        """
         count = self.value
         if self.unit.entity.name == "currency" and self.unit.currency_code:
             try:
@@ -83,9 +83,9 @@ def to_spoken(self):
 
 ################################################################################
 class Unit(object):
-    '''
+    """
     Class for a unit (e.g. "gallon").
-    '''
+    """
 
     def __init__(self,
                  name=None,
@@ -106,11 +106,11 @@ def __init__(self,
 
     @staticmethod
     def name_from_dimensions(dimensions):
-        '''
+        """
         Build the name of the unit from its dimensions.
         Param:
             dimensions: List of dimensions
-        '''
+        """
 
         name = ''
 
@@ -133,21 +133,20 @@ def name_from_dimensions(dimensions):
         return name
 
     def infer_name(self):
-        '''
+        """
         Set own name based on dimensions
         :return: new name of this unit
-        '''
+        """
         self.name = self.name_from_dimensions(
             self.dimensions) if self.dimensions else None
         return self.name
 
     def to_spoken(self, count=1):
-        '''
+        """
         Convert a given unit to the unit in words, correctly inflected.
-        :param unit: The unit as class or string (only quantulum class supported so far)
         :param count: The value of the quantity (i.e. 1 for one watt, 2 for two seconds)
         :return: A string with the correctly inflected spoken version of the unit
-        '''
+        """
         if self.name == "dimensionless":
             unit_string = ""
         elif self.surfaces:
@@ -192,9 +191,9 @@ def __ne__(self, other):
 
 ################################################################################
 class Entity(object):
-    '''
+    """
     Class for an entity (e.g. "volume").
-    '''
+    """
 
     def __init__(self, name=None, dimensions=None, uri=None):
 

diff --git a/quantulum3/classifier.py b/quantulum3/classifier.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
-'''
+"""
 :mod:`Quantulum` classifier functions.
-'''
+"""
 
 # Standard library
 import os
@@ -18,22 +18,23 @@
     from sklearn.feature_extraction.text import TfidfVectorizer
     USE_CLF = True
 except ImportError:
+    SGDClassifier, TfidfVectorizer = None, None
     USE_CLF = False
 
 # Quantulum
-from . import load as l
+from . import load
 
 
 ################################################################################
 def download_wiki(store=True):  # pragma: no cover
-    '''
+    """
     Download WikiPedia pages of ambiguous units.
     @:param store (bool) store wikipedia data in wiki.json file
-    '''
+    """
     import wikipedia
 
-    ambiguous = [i for i in list(l.UNITS.items()) if len(i[1]) > 1]
-    ambiguous += [i for i in list(l.DERIVED_ENT.items()) if len(i[1]) > 1]
+    ambiguous = [i for i in list(load.UNITS.items()) if len(i[1]) > 1]
+    ambiguous += [i for i in list(load.DERIVED_ENT.items()) if len(i[1]) > 1]
     pages = set([(j.name, j.uri) for i in ambiguous for j in i[1]])
 
     print()
@@ -51,7 +52,7 @@ def download_wiki(store=True):  # pragma: no cover
         obj['unit'] = page[0]
         objs.append(obj)
 
-    path = os.path.join(l.TOPDIR, 'wiki.json')
+    path = os.path.join(load.TOPDIR, 'wiki.json')
     os.remove(path)
     if store:
         with open(path, 'w') as wiki_file:
@@ -63,9 +64,9 @@ def download_wiki(store=True):  # pragma: no cover
 
 ################################################################################
 def clean_text(text):
-    '''
+    """
     Clean text for TFIDF
-    '''
+    """
     from stemming.porter2 import stem
 
     my_regex = re.compile(r'[%s]' % re.escape(string.punctuation))
@@ -86,18 +87,18 @@ def train_classifier(download=True,
                      parameters=None,
                      ngram_range=(1, 1),
                      store=True):
-    '''
+    """
     Train the intent classifier
     TODO auto invoke if sklearn version is new or first install or sth
     @:param store (bool) store classifier in clf.pickle
-    '''
-    path = os.path.join(l.TOPDIR, 'train.json')
+    """
+    path = os.path.join(load.TOPDIR, 'train.json')
     with open(path, 'r', encoding='utf-8') as train_file:
         training_set = json.load(train_file)
 
     wiki_set = download_wiki(store) if download else None
     if not wiki_set:
-        path = os.path.join(l.TOPDIR, 'wiki.json')
+        path = os.path.join(load.TOPDIR, 'wiki.json')
         with open(path, 'r', encoding='utf-8') as wiki_file:
             wiki_set = json.load(wiki_file)
 
@@ -133,19 +134,19 @@ def train_classifier(download=True,
         target_names
     }
     if store:  # pragma: no cover
-        path = os.path.join(l.TOPDIR, 'clf.pickle')
+        path = os.path.join(load.TOPDIR, 'clf.pickle')
         with open(path, 'wb') as file:
             pickle.dump(obj, file)
     return obj
 
 
 ################################################################################
 def load_classifier():
-    '''
+    """
     Load the intent classifier
-    '''
+    """
 
-    path = os.path.join(l.TOPDIR, 'clf.pickle')
+    path = os.path.join(load.TOPDIR, 'clf.pickle')
     with open(path, 'rb') as file:
         obj = pickle.load(file, encoding='latin1')
 
@@ -168,25 +169,25 @@ def load_classifier():
 
 ################################################################################
 def disambiguate_entity(key, text):
-    '''
+    """
     Resolve ambiguity between entities with same dimensionality.
-    '''
+    """
 
-    new_ent = l.DERIVED_ENT[key][0]
+    new_ent = load.DERIVED_ENT[key][0]
 
-    if len(l.DERIVED_ENT[key]) > 1:
+    if len(load.DERIVED_ENT[key]) > 1:
         transformed = TFIDF_MODEL.transform([text])
         scores = CLF.predict_proba(transformed).tolist()[0]
         scores = zip(scores, TARGET_NAMES)
 
         # Filter for possible names
-        names = [i.name for i in l.DERIVED_ENT[key]]
+        names = [i.name for i in load.DERIVED_ENT[key]]
         scores = [i for i in scores if i[1] in names]
 
         # Sort by rank
         scores = sorted(scores, key=lambda x: x[0], reverse=True)
         try:
-            new_ent = l.ENTITIES[scores[0][1]]
+            new_ent = load.ENTITIES[scores[0][1]]
         except IndexError:
             logging.debug('\tAmbiguity not resolved for "%s"', str(key))
 
@@ -195,14 +196,14 @@ def disambiguate_entity(key, text):
 
 ################################################################################
 def disambiguate_unit(unit, text):
-    '''
+    """
     Resolve ambiguity between units with same names, symbols or abbreviations.
-    '''
+    """
 
-    new_unit = l.UNIT_SYMBOLS.get(unit) or l.UNITS.get(unit)
+    new_unit = load.UNIT_SYMBOLS.get(unit) or load.UNITS.get(unit)
     if not new_unit:
-        new_unit = l.LOWER_UNITS.get(unit.lower()) or l.UNIT_SYMBOLS_LOWER.get(
-            unit.lower())
+        new_unit = load.LOWER_UNITS.get(
+            unit.lower()) or load.UNIT_SYMBOLS_LOWER.get(unit.lower())
         if not new_unit:
             raise KeyError('Could not find unit "%s" from "%s"' % (unit, text))
 
@@ -218,7 +219,7 @@ def disambiguate_unit(unit, text):
         # Sort by rank
         scores = sorted(scores, key=lambda x: x[0], reverse=True)
         try:
-            final = l.UNITS[scores[0][1]][0]
+            final = load.UNITS[scores[0][1]][0]
             logging.debug('\tAmbiguity resolved for "%s" (%s)', unit, scores)
         except IndexError:
             logging.debug('\tAmbiguity not resolved for "%s"', unit)

diff --git a/quantulum3/load.py b/quantulum3/load.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-'''
+"""
 :mod:`Quantulum` unit and entity loading functions.
-'''
+"""
 
 from builtins import open
 
@@ -63,34 +63,33 @@ def get_string_json(raw_json_text):
 
 ################################################################################
 def get_key_from_dimensions(derived):
-    '''
+    """
     Translate dimensionality into key for DERIVED_UNI and DERIVED_ENT dicts.
-    '''
+    """
 
     return tuple((i['base'], i['power']) for i in derived)
 
 
 ################################################################################
 def get_dimension_permutations(entities, derived):
-    '''
+    """
     Get all possible dimensional definitions for an entity.
-    '''
+    """
 
     new_derived = defaultdict(int)
     for item in derived:
         new = entities[item['base']].dimensions
         if new:
             for new_item in new:
-                new_derived[new_item['base']] += new_item['power'] * \
-                    item['power']
+                new_derived[new_item['base']] += (
+                    new_item['power'] * item['power'])
         else:
             new_derived[item['base']] += item['power']
 
     final = [[{
         'base': i[0],
         'power': i[1]
-    } for i in list(new_derived.items())]]
-    final.append(derived)
+    } for i in list(new_derived.items())], derived]
     final = [sorted(i, key=lambda x: x['base']) for i in final]
 
     candidates = []
@@ -103,9 +102,9 @@ def get_dimension_permutations(entities, derived):
 
 ################################################################################
 def load_entities():
-    '''
+    """
     Load entities from JSON file.
-    '''
+    """
 
     path = os.path.join(TOPDIR, 'entities.json')
     string_json = ''.join(open(path, encoding='utf-8').readlines())
@@ -141,9 +140,9 @@ def load_entities():
 
 ################################################################################
 def get_derived_units(names):
-    '''
+    """
     Create dictionary of unit dimensions.
-    '''
+    """
 
     derived_uni = {}
 
@@ -165,9 +164,9 @@ def get_derived_units(names):
 
 ################################################################################
 def load_units():
-    '''
+    """
     Load units from JSON file.
-    '''
+    """
 
     names = {}
     unit_symbols, unit_symbols_lower, = defaultdict(list), defaultdict(list)