Skip to content

Commit

Permalink
test fixture and doc updated
Browse files Browse the repository at this point in the history
  • Loading branch information
thePortus committed Mar 27, 2018
1 parent 9e71e64 commit 11235f8
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 32 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ The first time you use a language-specific text object, you need to run its .set
```sh
from dhelp import EnglishText

EnglishText('').setup()
EnglishText.setup()

```

Expand Down Expand Up @@ -315,7 +315,7 @@ Before you use this object for any of the methods below you need to download tra

```python
from dhelp import EnglishText
EnglishText('').setup()
EnglishText.setup()
```

**Examples**
Expand Down
24 changes: 15 additions & 9 deletions dhelp/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,21 @@

NLTK_PACKAGES = {
'english': [
('verbnet', 'corpora/verbnet'),
('wordnet', 'corpora/verbnet'),
('words', 'corpora/verbnet'),
('large_grammars', 'corpora/verbnet'),
('averaged_perceptron_tagger', 'corpora/verbnet'),
('maxent_treebank_pos_tagger', 'corpora/verbnet'),
('universal_tagset', 'corpora/verbnet'),
('punkt', 'corpora/verbnet'),
('maxent_ne_chunker', 'corpora/verbnet'),
('verbnet', ['corpora', 'verbnet.zip']),
('wordnet', ['corpora', 'wordnet.zip']),
('words', ['corpora', 'words.zip']),
('large_grammars', ['grammars', 'large_grammars.zip']),
(
'averaged_perceptron_tagger',
['taggers', 'averaged_perceptron_tagger.zip']
),
(
'maxent_treebank_pos_tagger',
['taggers', 'maxent_treebank_pos_tagger.zip']
),
('universal_tagset', ['taggers', 'universal_tagset.zip']),
('punkt', ['tokenizers', 'punkt.zip']),
('maxent_ne_chunker', ['chunkers', 'maxent_ne_chunker.zip']),
]
}

Expand Down
23 changes: 12 additions & 11 deletions dhelp/text/nltk.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class NLTKMixin:
>>> class EnglishText(NLTKTextMixin, EnglishText):
"""

@classmethod
def setup(self):
"""Download NLTK packages and trainer corpora.
Expand All @@ -34,19 +35,19 @@ def setup(self):
sets.
Example:
>>> EnglishText('').setup()
>>> EnglishText.setup()
"""
# get root directory of nltk data
nltk_root_dir = os.path.join(
os.path.expanduser('~'),
'nltk_data',
)
for nltk_package, nltk_package_dir in settings.NLTK_PACKAGES[
self.options['language']
for package, package_path_segments in settings.NLTK_PACKAGES[
'english'
]:
# check for package locally, if not extant, download
if os.path.exists(os.path.join(nltk_root_dir, nltk_package_dir)):
nltk.download(nltk_package)
package_path = os.sep.join(package_path_segments)
# will trigger error if no file, if file found, do nothing
try:
nltk.data.find(package_path)
pass
# if no file was found, download the respective package
except:
nltk.download(package)
return True

def rm_stopwords(self, stoplist=[]):
Expand Down
12 changes: 2 additions & 10 deletions dhelp/text/tests/test_english.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,14 @@

import unittest

import os

from ..nltk import EnglishText


class EnglishSetupLayer:

@classmethod
def setUp(cls):
if not os.path.exists(
os.path.join(
os.path.expanduser('~'),
'nltk_data'
)
):
EnglishText('').setup()
def testSetUp(cls):
EnglishText.setup()


class TestEnglishText(unittest.TestCase):
Expand Down

0 comments on commit 11235f8

Please sign in to comment.