Skip to content

Commit

Permalink
Merge 0b1a06b into 18e4083
Browse files Browse the repository at this point in the history
  • Loading branch information
nielstron committed Sep 19, 2018
2 parents 18e4083 + 0b1a06b commit 1b74bcf
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 196 deletions.
33 changes: 16 additions & 17 deletions quantulum3/classes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
"""
:mod:`Quantulum` classes.
'''
"""

# Dependences
import inflect
Expand All @@ -14,9 +14,9 @@


class Quantity(object):
'''
"""
Class for a quantity (e.g. "4.2 gallons").
'''
"""

def __init__(self,
value=None,
Expand Down Expand Up @@ -59,10 +59,10 @@ def as_string(self): # pragma: no cover
return '{} {}'.format(self.value, self.unit.name)

def to_spoken(self):
'''
"""
Express quantity as a speakable string
:return: Speakable version of this quantity
'''
"""
count = self.value
if self.unit.entity.name == "currency" and self.unit.currency_code:
try:
Expand All @@ -83,9 +83,9 @@ def to_spoken(self):

################################################################################
class Unit(object):
'''
"""
Class for a unit (e.g. "gallon").
'''
"""

def __init__(self,
name=None,
Expand All @@ -106,11 +106,11 @@ def __init__(self,

@staticmethod
def name_from_dimensions(dimensions):
'''
"""
Build the name of the unit from its dimensions.
Param:
dimensions: List of dimensions
'''
"""

name = ''

Expand All @@ -133,21 +133,20 @@ def name_from_dimensions(dimensions):
return name

def infer_name(self):
'''
"""
Set own name based on dimensions
:return: new name of this unit
'''
"""
self.name = self.name_from_dimensions(
self.dimensions) if self.dimensions else None
return self.name

def to_spoken(self, count=1):
'''
"""
Convert a given unit to the unit in words, correctly inflected.
:param unit: The unit as class or string (only quantulum class supported so far)
:param count: The value of the quantity (i.e. 1 for one watt, 2 for two seconds)
:return: A string with the correctly inflected spoken version of the unit
'''
"""
if self.name == "dimensionless":
unit_string = ""
elif self.surfaces:
Expand Down Expand Up @@ -192,9 +191,9 @@ def __ne__(self, other):

################################################################################
class Entity(object):
'''
"""
Class for an entity (e.g. "volume").
'''
"""

def __init__(self, name=None, dimensions=None, uri=None):

Expand Down
61 changes: 31 additions & 30 deletions quantulum3/classifier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
'''
"""
:mod:`Quantulum` classifier functions.
'''
"""

# Standard library
import os
Expand All @@ -18,22 +18,23 @@
from sklearn.feature_extraction.text import TfidfVectorizer
USE_CLF = True
except ImportError:
SGDClassifier, TfidfVectorizer = None, None
USE_CLF = False

# Quantulum
from . import load as l
from . import load


################################################################################
def download_wiki(store=True): # pragma: no cover
'''
"""
Download WikiPedia pages of ambiguous units.
@:param store (bool) store wikipedia data in wiki.json file
'''
"""
import wikipedia

ambiguous = [i for i in list(l.UNITS.items()) if len(i[1]) > 1]
ambiguous += [i for i in list(l.DERIVED_ENT.items()) if len(i[1]) > 1]
ambiguous = [i for i in list(load.UNITS.items()) if len(i[1]) > 1]
ambiguous += [i for i in list(load.DERIVED_ENT.items()) if len(i[1]) > 1]
pages = set([(j.name, j.uri) for i in ambiguous for j in i[1]])

print()
Expand All @@ -51,7 +52,7 @@ def download_wiki(store=True): # pragma: no cover
obj['unit'] = page[0]
objs.append(obj)

path = os.path.join(l.TOPDIR, 'wiki.json')
path = os.path.join(load.TOPDIR, 'wiki.json')
os.remove(path)
if store:
with open(path, 'w') as wiki_file:
Expand All @@ -63,9 +64,9 @@ def download_wiki(store=True): # pragma: no cover

################################################################################
def clean_text(text):
'''
"""
Clean text for TFIDF
'''
"""
from stemming.porter2 import stem

my_regex = re.compile(r'[%s]' % re.escape(string.punctuation))
Expand All @@ -86,18 +87,18 @@ def train_classifier(download=True,
parameters=None,
ngram_range=(1, 1),
store=True):
'''
"""
Train the intent classifier
TODO auto invoke if sklearn version is new or first install or sth
@:param store (bool) store classifier in clf.pickle
'''
path = os.path.join(l.TOPDIR, 'train.json')
"""
path = os.path.join(load.TOPDIR, 'train.json')
with open(path, 'r', encoding='utf-8') as train_file:
training_set = json.load(train_file)

wiki_set = download_wiki(store) if download else None
if not wiki_set:
path = os.path.join(l.TOPDIR, 'wiki.json')
path = os.path.join(load.TOPDIR, 'wiki.json')
with open(path, 'r', encoding='utf-8') as wiki_file:
wiki_set = json.load(wiki_file)

Expand Down Expand Up @@ -133,19 +134,19 @@ def train_classifier(download=True,
target_names
}
if store: # pragma: no cover
path = os.path.join(l.TOPDIR, 'clf.pickle')
path = os.path.join(load.TOPDIR, 'clf.pickle')
with open(path, 'wb') as file:
pickle.dump(obj, file)
return obj


################################################################################
def load_classifier():
'''
"""
Load the intent classifier
'''
"""

path = os.path.join(l.TOPDIR, 'clf.pickle')
path = os.path.join(load.TOPDIR, 'clf.pickle')
with open(path, 'rb') as file:
obj = pickle.load(file, encoding='latin1')

Expand All @@ -168,25 +169,25 @@ def load_classifier():

################################################################################
def disambiguate_entity(key, text):
'''
"""
Resolve ambiguity between entities with same dimensionality.
'''
"""

new_ent = l.DERIVED_ENT[key][0]
new_ent = load.DERIVED_ENT[key][0]

if len(l.DERIVED_ENT[key]) > 1:
if len(load.DERIVED_ENT[key]) > 1:
transformed = TFIDF_MODEL.transform([text])
scores = CLF.predict_proba(transformed).tolist()[0]
scores = zip(scores, TARGET_NAMES)

# Filter for possible names
names = [i.name for i in l.DERIVED_ENT[key]]
names = [i.name for i in load.DERIVED_ENT[key]]
scores = [i for i in scores if i[1] in names]

# Sort by rank
scores = sorted(scores, key=lambda x: x[0], reverse=True)
try:
new_ent = l.ENTITIES[scores[0][1]]
new_ent = load.ENTITIES[scores[0][1]]
except IndexError:
logging.debug('\tAmbiguity not resolved for "%s"', str(key))

Expand All @@ -195,14 +196,14 @@ def disambiguate_entity(key, text):

################################################################################
def disambiguate_unit(unit, text):
'''
"""
Resolve ambiguity between units with same names, symbols or abbreviations.
'''
"""

new_unit = l.UNIT_SYMBOLS.get(unit) or l.UNITS.get(unit)
new_unit = load.UNIT_SYMBOLS.get(unit) or load.UNITS.get(unit)
if not new_unit:
new_unit = l.LOWER_UNITS.get(unit.lower()) or l.UNIT_SYMBOLS_LOWER.get(
unit.lower())
new_unit = load.LOWER_UNITS.get(
unit.lower()) or load.UNIT_SYMBOLS_LOWER.get(unit.lower())
if not new_unit:
raise KeyError('Could not find unit "%s" from "%s"' % (unit, text))

Expand All @@ -218,7 +219,7 @@ def disambiguate_unit(unit, text):
# Sort by rank
scores = sorted(scores, key=lambda x: x[0], reverse=True)
try:
final = l.UNITS[scores[0][1]][0]
final = load.UNITS[scores[0][1]][0]
logging.debug('\tAmbiguity resolved for "%s" (%s)', unit, scores)
except IndexError:
logging.debug('\tAmbiguity not resolved for "%s"', unit)
Expand Down
31 changes: 15 additions & 16 deletions quantulum3/load.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
"""
:mod:`Quantulum` unit and entity loading functions.
'''
"""

from builtins import open

Expand Down Expand Up @@ -63,34 +63,33 @@ def get_string_json(raw_json_text):

################################################################################
def get_key_from_dimensions(derived):
'''
"""
Translate dimensionality into key for DERIVED_UNI and DERIVED_ENT dicts.
'''
"""

return tuple((i['base'], i['power']) for i in derived)


################################################################################
def get_dimension_permutations(entities, derived):
'''
"""
Get all possible dimensional definitions for an entity.
'''
"""

new_derived = defaultdict(int)
for item in derived:
new = entities[item['base']].dimensions
if new:
for new_item in new:
new_derived[new_item['base']] += new_item['power'] * \
item['power']
new_derived[new_item['base']] += (
new_item['power'] * item['power'])
else:
new_derived[item['base']] += item['power']

final = [[{
'base': i[0],
'power': i[1]
} for i in list(new_derived.items())]]
final.append(derived)
} for i in list(new_derived.items())], derived]
final = [sorted(i, key=lambda x: x['base']) for i in final]

candidates = []
Expand All @@ -103,9 +102,9 @@ def get_dimension_permutations(entities, derived):

################################################################################
def load_entities():
'''
"""
Load entities from JSON file.
'''
"""

path = os.path.join(TOPDIR, 'entities.json')
string_json = ''.join(open(path, encoding='utf-8').readlines())
Expand Down Expand Up @@ -141,9 +140,9 @@ def load_entities():

################################################################################
def get_derived_units(names):
'''
"""
Create dictionary of unit dimensions.
'''
"""

derived_uni = {}

Expand All @@ -165,9 +164,9 @@ def get_derived_units(names):

################################################################################
def load_units():
'''
"""
Load units from JSON file.
'''
"""

names = {}
unit_symbols, unit_symbols_lower, = defaultdict(list), defaultdict(list)
Expand Down

0 comments on commit 1b74bcf

Please sign in to comment.