Skip to content

Commit

Permalink
hfst-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Mika committed Aug 5, 2021
1 parent de189e8 commit d04c6d2
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 10 deletions.
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='1.3.0',
version='1.3.1',

description='An NLP library for Uralic languages such as Finnish and Sami. Also supports Arabic, Russian etc.',
long_description=long_description,
Expand All @@ -34,7 +34,7 @@
zip_safe=False,
# Author details
author='Mika Hämäläinen',
author_email='mika.hamalainen@helsinki.fi',
author_email='mika@rootroo.com',

# Choose your license
license='Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License',
Expand Down Expand Up @@ -69,7 +69,7 @@
# your project is installed. For an analysis of "install_requires" vs pip's
# requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=["requests", "hfst", "mikatools>=0.0.6", "argparse", "future>=0.18.2", "tinydb"],
install_requires=["requests", "hfst-dev", "mikatools>=0.0.6", "argparse", "future>=0.18.2", "tinydb"],

# List additional groups of dependencies here (e.g. development
# dependencies). You can install these using the following syntax,
Expand Down
7 changes: 4 additions & 3 deletions test_uralicnlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import re
from mikatools import *

#uralicApi.get_all_forms("kissa", "N", "fin")
print(uralicApi.get_all_forms("kissa", "N", "fin"))

#uralicApi.get_transducer("spa", analyzer=True).lookup_optimize()
#print(uralicApi.analyze("hola", "spa"))
Expand Down Expand Up @@ -123,9 +123,10 @@
print(word[0].get_attribute("form"))
"""

"""
print(uralicApi.analyze("hörpähdin", "fin", neural_fallback=True))
print(uralicApi.lemmatize("nirhautan", "fin", neural_fallback=True))
print(uralicApi.generate("hömpötti+N+Sg+Gen", "fin", neural_fallback=True))
print(uralicApi.generate("koirailla+V+Act+Ind+Prs+Sg1", "fin", neural_fallback=True))
print(uralicApi.analyze("juoksen", "fin", neural_fallback=True))
print(uralicApi.analyze("juoksen", "fin", neural_fallback=True))
"""
9 changes: 5 additions & 4 deletions uralicNLP/uralicApi.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
# Fall back to Python 2's urllib2
from urllib2 import urlopen
new_python = False
import hfst

import hfst_dev as hfst

api_url = "https://akusanat.com/smsxml/"
download_server_url = "https://models.uralicnlp.com/nightly/"
Expand Down Expand Up @@ -224,9 +225,9 @@ def get_all_forms(word, pos, language, descriptive=True, limit_forms=-1, filter_
analyzer2 = analyzer.copy()
analyzer2.compose(reg)
output = analyzer2.extract_paths(max_cycles=1, max_number=limit_forms,output='text').replace("@_EPSILON_SYMBOL_@","").split("\n")
output = filter(lambda x: x, output)
output = list(map(lambda x: x.split('\t'), output))
return list(map(lambda x: (x[0], float(x[1]),), output))
output = [_o.split('\t') for _o in output if _o]
output = [(":".join(_o[:-1]), float(_o[-1]), ) for _o in output]
return output

def generate(query, language, force_local=True, descriptive=False, dictionary_forms=False, remove_symbols=True, filename=None, neural_fallback=False):
if force_local or __where_models(language, safe=True):
Expand Down

0 comments on commit d04c6d2

Please sign in to comment.