Skip to content

Commit

Permalink
Allow an iterable for from_wordlist()
Browse files Browse the repository at this point in the history
Allow an iterable (most probably list or set) of strings
as argument of from_wordlist().
  • Loading branch information
jacksonllee committed Sep 22, 2016
1 parent f947000 commit 82b94ea
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 18 deletions.
Binary file modified docs/.doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/.doctrees/lexicon.doctree
Binary file not shown.
Binary file modified docs/.doctrees/read.doctree
Binary file not shown.
4 changes: 2 additions & 2 deletions docs/read.html
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,8 @@
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>wordlist_object</strong> &#8211; either a list of word types as strings
or a dict of word types mapped to their token counts</li>
<li><strong>wordlist_object</strong> &#8211; either a dict of word types (as strings) mapped to
their token counts or an iterable of word types (as strings).</li>
<li><strong>kwargs</strong> &#8211; keyword arguments for parameters and their values.</li>
</ul>
</td>
Expand Down
2 changes: 1 addition & 1 deletion docs/searchindex.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions linguistica/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ def from_wordlist(wordlist_object, **kwargs):
"""
Create a Linguistica object with a wordlist object.
:param wordlist_object: either a list of word types as strings
or a dict of word types mapped to their token counts
:param wordlist_object: either a dict of word types (as strings) mapped to
their token counts or an iterable of word types (as strings).
:param kwargs: keyword arguments for parameters and their values.
"""
return Lexicon(wordlist_object=wordlist_object, wordlist_file=False,
Expand Down
25 changes: 12 additions & 13 deletions linguistica/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,17 +221,8 @@ def _initialize(self):
self._wordlist = None
if self.wordlist_object is not None:
# self.wordlist_object is
# either a list of str or a dict of word-count pairs
if type(self.wordlist_object) is list:
if self.parameters_['keep_case']:
wordlist = sorted(set(self.wordlist_object))
else:
wordlist = sorted(
set(w.lower() for w in self.wordlist_object))
self._wordlist = wordlist
self._word_unigram_counter = {word: 1 for word in wordlist}

elif type(self.wordlist_object) is dict:
# either an iterable or a dict of word-count pairs
if type(self.wordlist_object) is dict:
word_count_dict = dict()
if self.parameters_['keep_case']:
word_count_dict = self.wordlist_object
Expand All @@ -248,9 +239,17 @@ def _initialize(self):
reverse=True)]
self._word_unigram_counter = word_count_dict

elif hasattr(self.wordlist_object, '__iter__'):
if self.parameters_['keep_case']:
self._wordlist = sorted(set(self.wordlist_object))
else:
self._wordlist = sorted(
set(w.lower() for w in self.wordlist_object))
self._word_unigram_counter = {w: 1 for w in self._wordlist}

else:
raise TypeError('wordlist object must be '
'either a list or a dict')
raise TypeError('wordlist object must be a dict of word-count'
'pairs or an iterable of words')

# signature-related objects
self._stems_to_words = None
Expand Down

0 comments on commit 82b94ea

Please sign in to comment.