From 2430cca7f2e55fb8921099b0ae627e5f19373b36 Mon Sep 17 00:00:00 2001 From: Joseph Albert Date: Thu, 1 Sep 2016 05:12:58 -0400 Subject: [PATCH 01/19] Attempting to fix slow NaiveBayes Three changes: 1) basic_extractor can accept a list of strings as well as a list of ('word','label') tuples. 2) BaseClassifier now has an instance variable _word_set which is a set of tokens seen by the classifier. 1+2) BaseClassifier.extract_features passes _word_set to extractor rather than the training set. 3) NLTKClassifier.update adds new words to the _word_set. --- textblob/classifiers.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/textblob/classifiers.py b/textblob/classifiers.py index 782bbebc..c3b81ce1 100644 --- a/textblob/classifiers.py +++ b/textblob/classifiers.py @@ -76,9 +76,15 @@ def basic_extractor(document, train_set): :param document: The text to extract features from. Can be a string or an iterable. :param list train_set: Training data set, a list of tuples of the form - ``(words, label)``. + ``(words, label)`` OR an iterable of strings. """ - word_features = _get_words_from_dataset(train_set) + el_zero = iter(train_set).next() #Infer input from first element. + if isinstance(el_zero, tuple): + word_features = _get_words_from_dataset(train_set) + elif isinstance(el_zero, str): + word_features = train_set + else: + raise ValueError('train_set is proabably malformed.') tokens = _get_document_tokens(document) features = dict(((u'contains({0})'.format(word), (word in tokens)) for word in word_features)) @@ -123,6 +129,7 @@ def __init__(self, train_set, feature_extractor=basic_extractor, format=None, ** self.train_set = self._read_data(train_set, format) else: # train_set is a list of tuples self.train_set = train_set + self._word_set = _get_words_from_dataset(train_set) #Keep a hidden set of unique words. self.train_features = None def _read_data(self, dataset, format=None): @@ -166,7 +173,7 @@ def extract_features(self, text): ''' # Feature extractor may take one or two arguments try: - return self.feature_extractor(text, self.train_set) + return self.feature_extractor(text, self._word_set) except (TypeError, AttributeError): return self.feature_extractor(text) @@ -260,6 +267,7 @@ def update(self, new_data, *args, **kwargs): ``(text, label)``. """ self.train_set += new_data + self._word_set.update(_get_words_from_dataset(new_data)) self.train_features = [(self.extract_features(d), c) for d, c in self.train_set] try: From 7505da49800d907ac211f08e4477e35284a2332c Mon Sep 17 00:00:00 2001 From: Joseph Albert Date: Thu, 1 Sep 2016 06:36:22 -0400 Subject: [PATCH 02/19] Special-cased when train_set is the null set Now returns an empty dict if passed an empty training set. Also, cover some bases if train_set is consumed by .next() --- textblob/classifiers.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/textblob/classifiers.py b/textblob/classifiers.py index c3b81ce1..faf7c193 100644 --- a/textblob/classifiers.py +++ b/textblob/classifiers.py @@ -78,13 +78,20 @@ def basic_extractor(document, train_set): :param list train_set: Training data set, a list of tuples of the form ``(words, label)`` OR an iterable of strings. """ - el_zero = iter(train_set).next() #Infer input from first element. - if isinstance(el_zero, tuple): - word_features = _get_words_from_dataset(train_set) - elif isinstance(el_zero, str): - word_features = train_set + + try: + el_zero = iter(train_set).next() #Infer input from first element. + except StopIteration: + return {} + if isinstance(el_zero, str): + word_features = [w for w in chain([el_zero],train_set)] else: - raise ValueError('train_set is proabably malformed.') + try: + assert(isinstance(el_zero[0], str)) + word_features = _get_words_from_dataset(chain([el_zero],train_set)) + except: + raise ValueError('train_set is proabably malformed.') + tokens = _get_document_tokens(document) features = dict(((u'contains({0})'.format(word), (word in tokens)) for word in word_features)) From 61c7e4768114ef05c93e0d1a69bd60fcf4256d06 Mon Sep 17 00:00:00 2001 From: Joseph Albert Date: Sat, 6 May 2017 19:04:49 -0400 Subject: [PATCH 03/19] Base_Classifier wasn't unicode-ready. Fixed bug where _word_set was based on train_set, even if train_set is filelike instead of iterable. --- textblob/classifiers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/textblob/classifiers.py b/textblob/classifiers.py index faf7c193..0f1afe18 100644 --- a/textblob/classifiers.py +++ b/textblob/classifiers.py @@ -83,11 +83,11 @@ def basic_extractor(document, train_set): el_zero = iter(train_set).next() #Infer input from first element. except StopIteration: return {} - if isinstance(el_zero, str): + if isinstance(el_zero, basestring): word_features = [w for w in chain([el_zero],train_set)] else: try: - assert(isinstance(el_zero[0], str)) + assert(isinstance(el_zero[0], basestring)) word_features = _get_words_from_dataset(chain([el_zero],train_set)) except: raise ValueError('train_set is proabably malformed.') @@ -136,7 +136,7 @@ def __init__(self, train_set, feature_extractor=basic_extractor, format=None, ** self.train_set = self._read_data(train_set, format) else: # train_set is a list of tuples self.train_set = train_set - self._word_set = _get_words_from_dataset(train_set) #Keep a hidden set of unique words. + self._word_set = _get_words_from_dataset(self.train_set) #Keep a hidden set of unique words. self.train_features = None def _read_data(self, dataset, format=None): From 721b4aa1f09f581af3b4e75b0fd24368c48477bc Mon Sep 17 00:00:00 2001 From: jcalbert Date: Sat, 6 May 2017 22:25:45 -0400 Subject: [PATCH 04/19] Updated translation test to reflect online translator's new translation --- tests/test_translate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_translate.py b/tests/test_translate.py index e81d5b52..6287ed75 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -99,7 +99,7 @@ def test_translate_text(self): def test_translate_non_ascii(self): text = "ذات سيادة كاملة" translated = self.translator.translate(text, from_lang='ar', to_lang='en') - assert_equal(translated, "With full sovereignty") + assert_equal(translated, "Fully sovereign") text2 = "美丽比丑陋更好" translated = self.translator.translate(text2, from_lang="zh-CN", to_lang='en') From 57b8969a9d71eb9ad28c652aa73e22f5be8000ca Mon Sep 17 00:00:00 2001 From: jcalbert Date: Thu, 11 May 2017 02:28:36 -0400 Subject: [PATCH 05/19] Fixed a .next() call that broke py3 compatibility. --- textblob/classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textblob/classifiers.py b/textblob/classifiers.py index 0f1afe18..742e837c 100644 --- a/textblob/classifiers.py +++ b/textblob/classifiers.py @@ -80,7 +80,7 @@ def basic_extractor(document, train_set): """ try: - el_zero = iter(train_set).next() #Infer input from first element. + el_zero = next(iter(train_set)) #Infer input from first element. except StopIteration: return {} if isinstance(el_zero, basestring): From 871145b36ece2aca45d233b7a29bd547bade006e Mon Sep 17 00:00:00 2001 From: Jeff Schnurr Date: Sun, 4 Jun 2017 13:18:49 -0400 Subject: [PATCH 06/19] fix #166 to use specified tokenizer when tagging. --- tests/test_blob.py | 14 ++++++++++++++ textblob/base.py | 2 +- textblob/blob.py | 9 ++++++--- textblob/en/taggers.py | 19 +++++++++++-------- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/tests/test_blob.py b/tests/test_blob.py index d63e5080..bb864034 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -752,6 +752,20 @@ def test_tokenize_method(self): # Pass in the TabTokenizer assert_equal(blob.tokenize(tokenizer), tb.WordList(["This is", "text."])) + def test_tags_uses_custom_tokenizer(self): + tokenizer = nltk.tokenize.regexp.WordPunctTokenizer() + blob = tb.TextBlob("Good muffins cost $3.88\nin New York.", tokenizer=tokenizer) + assert_equal(blob.tags, [(u'Good', u'JJ'), (u'muffins', u'NNS'), (u'cost', u'VBP'), ( + u'3', u'CD'), (u'88', u'CD'), (u'in', u'IN'), (u'New', u'NNP'), (u'York', u'NNP')]) + + def test_tags_with_custom_tokenizer_and_tagger(self): + tokenizer = nltk.tokenize.regexp.WordPunctTokenizer() + tagger = tb.taggers.PatternTagger() + blob = tb.TextBlob("Good muffins cost $3.88\nin New York.", tokenizer=tokenizer, pos_tagger=tagger) + # PatterTagger takes raw text (not tokens), and handles tokenization itself. + assert_equal(blob.tags, [(u'Good', u'JJ'), (u'muffins', u'NNS'), (u'cost', u'NN'), + (u'3.88', u'CD'), (u'in', u'IN'), (u'New', u'NNP'), (u'York', u'NNP')]) + @mock.patch('textblob.translate.Translator.translate') def test_translate(self, mock_translate): mock_translate.return_value = 'Esta es una frase.' diff --git a/textblob/base.py b/textblob/base.py index 79322f9b..e4ac6e3f 100644 --- a/textblob/base.py +++ b/textblob/base.py @@ -22,7 +22,7 @@ class BaseTagger(with_metaclass(ABCMeta)): @abstractmethod def tag(self, text, tokenize=True): """Return a list of tuples of the form (word, tag) - for a given set of text. + for a given set of text or BaseBlob instance. """ return diff --git a/textblob/blob.py b/textblob/blob.py index f8182f20..ab25034a 100644 --- a/textblob/blob.py +++ b/textblob/blob.py @@ -458,9 +458,12 @@ def pos_tags(self): :rtype: list of tuples """ - return [(Word(word, pos_tag=t), unicode(t)) - for word, t in self.pos_tagger.tag(self.raw) - if not PUNCTUATION_REGEX.match(unicode(t))] + if isinstance(self, TextBlob): + return [val for sublist in [s.pos_tags for s in self.sentences] for val in sublist] + else: + return [(Word(word, pos_tag=t), unicode(t)) + for word, t in self.pos_tagger.tag(self) + if not PUNCTUATION_REGEX.match(unicode(t))] tags = pos_tags diff --git a/textblob/en/taggers.py b/textblob/en/taggers.py index a35fed60..ef9d29d2 100644 --- a/textblob/en/taggers.py +++ b/textblob/en/taggers.py @@ -3,10 +3,11 @@ from __future__ import absolute_import import nltk +import six +import textblob as tb from textblob.en import tag as pattern_tag from textblob.decorators import requires_nltk_corpus -from textblob.tokenizers import word_tokenize from textblob.base import BaseTagger @@ -17,7 +18,9 @@ class PatternTagger(BaseTagger): """ def tag(self, text, tokenize=True): - """Tag a string `text`.""" + """Tag a string or BaseBlob.""" + if not isinstance(text, six.text_type): + text = text.raw return pattern_tag(text, tokenize) @@ -27,9 +30,9 @@ class NLTKTagger(BaseTagger): """ @requires_nltk_corpus - def tag(self, text, tokenize=True): - """Tag a string `text`.""" - if tokenize: - text = list(word_tokenize(text)) - tagged = nltk.tag.pos_tag(text) - return tagged + def tag(self, text): + """Tag a string or BaseBlob.""" + if isinstance(text, six.text_type): + text = tb.TextBlob(text) + + return nltk.tag.pos_tag(text.tokens) From e1aa4f0bb32fe8f5740097823bb2892a43564cb6 Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Tue, 15 Aug 2017 23:35:45 -0400 Subject: [PATCH 07/19] Minor style fixes --- textblob/classifiers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/textblob/classifiers.py b/textblob/classifiers.py index 742e837c..f58701f1 100644 --- a/textblob/classifiers.py +++ b/textblob/classifiers.py @@ -80,15 +80,15 @@ def basic_extractor(document, train_set): """ try: - el_zero = next(iter(train_set)) #Infer input from first element. + el_zero = next(iter(train_set)) # Infer input from first element. except StopIteration: return {} if isinstance(el_zero, basestring): - word_features = [w for w in chain([el_zero],train_set)] + word_features = [w for w in chain([el_zero], train_set)] else: try: assert(isinstance(el_zero[0], basestring)) - word_features = _get_words_from_dataset(chain([el_zero],train_set)) + word_features = _get_words_from_dataset(chain([el_zero], train_set)) except: raise ValueError('train_set is proabably malformed.') From 27a8321623099e7e96303905d8651557f579d7de Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Tue, 15 Aug 2017 23:38:10 -0400 Subject: [PATCH 08/19] Add travis autodeployment to PyPI --- .travis.yml | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/.travis.yml b/.travis.yml index 90890061..719c97d5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,20 +1,26 @@ language: python sudo: false python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" + - "2.7" + - "3.4" + - "3.5" + - "3.6" before_install: - - "wget https://s3.amazonaws.com/textblob/nltk_data-0.11.0.tar.gz" - - "tar -xzvf nltk_data-0.11.0.tar.gz -C ~" -# Install dependencies + - wget https://s3.amazonaws.com/textblob/nltk_data-0.11.0.tar.gz + - tar -xzvf nltk_data-0.11.0.tar.gz -C ~ install: - - "pip install numpy" - - "pip install -U ." - - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then pip install -r docs/requirements.txt; fi -# Run tests + - pip install numpy + - pip install -U . + - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then pip install -r docs/requirements.txt; + fi script: - - "python run_tests.py" - # Run doctests against py34 - - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then cd docs && make doctest; fi + - python run_tests.py + - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then cd docs && make doctest; fi +deploy: + provider: pypi + user: sloria + password: + secure: aPoSh6zkeB6PnS77fmoeT/PzB/oeE7aM0g9ZrPd19ZwC5aORtF7/ifDfzYwYWhdyua4fLAzaEu3Z+pk5z644r1Zq8Jxryv18LeFzkzO/Sk/O9LxpJQ+ypbTIIK9Oc5LdQ0qCd5L3RtMV3zIvocvnpryVmkAm/vYBm77rCBFcMxg= + on: + tags: true + distributions: sdist bdist_wheel From 29aa333721776daf03ca8917a68eba0433e5ef2e Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Tue, 15 Aug 2017 23:38:31 -0400 Subject: [PATCH 09/19] Remove unnecessary publish task --- tasks.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tasks.py b/tasks.py index 0b0b5c6f..cac9064d 100644 --- a/tasks.py +++ b/tasks.py @@ -49,14 +49,3 @@ def readme(ctx, browse=False): def doctest(ctx): os.chdir(docs_dir) ctx.run("make doctest") - -@task -def publish(ctx, test=False): - """Publish to the cheeseshop.""" - clean(ctx) - if test: - ctx.run('python setup.py register -r test sdist bdist_wheel', echo=True) - ctx.run('twine upload dist/* -r test', echo=True) - else: - ctx.run('python setup.py register sdist bdist_wheel', echo=True) - ctx.run('twine upload dist/*', echo=True) From 3174af30d8501558114c6fe9ad1cbdb4673fe1b8 Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Tue, 15 Aug 2017 23:43:40 -0400 Subject: [PATCH 10/19] Bump version; update changelog; update AUTHORS --- AUTHORS.rst | 1 + CHANGELOG.rst | 8 ++++++++ textblob/__init__.py | 5 ++--- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 8c263ae3..4ad81be4 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -23,3 +23,4 @@ Contributors (chronological) - Adrián López Calvo `@AdrianLC `_ - Nitish Kulshrestha `@nitkul `_ - Jhon Eslava `@EpicJhon `_ +- `@jcalbert `_ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8537c68b..4ce9ef34 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,14 @@ Changelog ========= +0.13.0 (2017-08-15) +------------------- + +Features: + +- Performance improvements to `NaiveBayesClassifier` (:issue:`63`, :issue:`77`, + :issue:`123`). Thanks :user:`jcalbert` for the PR. + 0.12.0 (2017-02-27) ------------------- diff --git a/textblob/__init__.py b/textblob/__init__.py index 585862f5..41d02fa5 100644 --- a/textblob/__init__.py +++ b/textblob/__init__.py @@ -1,13 +1,12 @@ import os +from .blob import TextBlob, Word, Sentence, Blobber, WordList -__version__ = '0.12.0' +__version__ = '0.13.0' __license__ = 'MIT' __author__ = 'Steven Loria' PACKAGE_DIR = os.path.dirname(os.path.abspath(__file__)) -from .blob import TextBlob, Word, Sentence, Blobber, WordList - __all__ = [ 'TextBlob', 'Word', From 13d63fae1b6eb90e22b32fa0d7789250d403eb6d Mon Sep 17 00:00:00 2001 From: "PAVEL\\Pavel" Date: Sun, 29 Oct 2017 10:15:29 +0200 Subject: [PATCH 11/19] the format argument was not passed --- textblob/classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textblob/classifiers.py b/textblob/classifiers.py index f58701f1..03e3eb76 100644 --- a/textblob/classifiers.py +++ b/textblob/classifiers.py @@ -260,7 +260,7 @@ def accuracy(self, test_set, format=None): file format. """ if is_filelike(test_set): - test_data = self._read_data(test_set) + test_data = self._read_data(test_set, format) else: # test_set is a list of tuples test_data = test_set test_features = [(self.extract_features(d), c) for d, c in test_data] From bb25d7a4f5c2325cd22cc8143808e7c9b70be611 Mon Sep 17 00:00:00 2001 From: Tyler James Harden Date: Sun, 5 Nov 2017 23:31:23 -0500 Subject: [PATCH 12/19] Check for synonyms attribute on basestring An arbitrary string that matches the RE_SYNSET regex is not necessarily a Synset if it does not have the synonyms attribute. --- textblob/_text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textblob/_text.py b/textblob/_text.py index 18e4d14a..1f9f8e28 100644 --- a/textblob/_text.py +++ b/textblob/_text.py @@ -815,7 +815,7 @@ def avg(assessments, weighted=lambda w: 1): # A synset id. # Sentiment("a-00193480") => horrible => (-0.6, 1.0) (English WordNet) # Sentiment("c_267") => verschrikkelijk => (-0.9, 1.0) (Dutch Cornetto) - elif isinstance(s, basestring) and RE_SYNSET.match(s): + elif isinstance(s, basestring) and RE_SYNSET.match(s) and hasattr(s, "synonyms"): a = [(s.synonyms[0],) + self.synset(s.id, pos=s.pos) + (None,)] # A string of words. # Sentiment("a horrible movie") => (-0.6, 1.0) From 0b0273355d2cb662cea45bb0503c05bc506151f1 Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Sat, 11 Nov 2017 11:22:02 -0500 Subject: [PATCH 13/19] Fix incorrect translation test --- tests/test_translate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_translate.py b/tests/test_translate.py index 6287ed75..d6d9186c 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -81,7 +81,7 @@ def test_detect_non_ascii(self): def test_translate_spaces(self): es_text = "Hola, me llamo Adrián! Cómo estás? Yo bien" to_en = self.translator.translate(es_text, from_lang="es", to_lang="en") - assert_equal(to_en, "Hello, my name is Adrian! How are you? I am good") + assert_equal(to_en, "Hi, my name is Adrián! How are you? I am good") def test_translate_missing_from_language_auto_detects(self): text = "Ich hole das Bier" From c27a4d4972164ba972f06fe71d523714d05e7a9a Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Sat, 11 Nov 2017 11:27:35 -0500 Subject: [PATCH 14/19] Update changelog and add @pavelmalai and @tylerjharden to AUTHORS --- AUTHORS.rst | 2 ++ CHANGELOG.rst | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 4ad81be4..b0d52a99 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -24,3 +24,5 @@ Contributors (chronological) - Nitish Kulshrestha `@nitkul `_ - Jhon Eslava `@EpicJhon `_ - `@jcalbert `_ +- Tyler James Harden `@tylerjharden `_ +- `@pavelmalai `_ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4ce9ef34..913c090a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,16 @@ Changelog ========= +0.13.1 (unreleased) +------------------- + +Bug fixes: + +- Avoid AttributeError when using pattern's sentiment analyzer + (:issue:`178`). Thanks :user:`tylerjharden` for the catch and patch. +- Correctly pass ``format`` argument to ``NLTKClassifier.accuracy`` + (:issue:`177`). Thanks :user:`pavelmalai` for the catch and patch. + 0.13.0 (2017-08-15) ------------------- From 2a13b33ca897ae1b836138a2c965e9264b0fdf43 Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Sat, 11 Nov 2017 11:31:31 -0500 Subject: [PATCH 15/19] Use svg badges --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 422d664b..ec76ed55 100644 --- a/README.rst +++ b/README.rst @@ -2,11 +2,11 @@ TextBlob: Simplified Text Processing ==================================== -.. image:: https://badge.fury.io/py/textblob.png +.. image:: https://badge.fury.io/py/textblob.svg :target: http://badge.fury.io/py/textblob :alt: Latest version -.. image:: https://travis-ci.org/sloria/TextBlob.png?branch=master +.. image:: https://travis-ci.org/sloria/TextBlob.svg?branch=master :target: https://travis-ci.org/sloria/TextBlob :alt: Travis-CI From ca9101bcd019cc3dac4ac5e6801caffe66bd31c6 Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Sat, 11 Nov 2017 18:48:19 -0500 Subject: [PATCH 16/19] Fix travis installation error on python 3.4 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 719c97d5..a63eabd2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ before_install: - tar -xzvf nltk_data-0.11.0.tar.gz -C ~ install: - pip install numpy + - pip install -U six - pip install -U . - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then pip install -r docs/requirements.txt; fi From 73687770cd860786f107ac28d175d0a66f8ae9ad Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Sat, 11 Nov 2017 18:52:08 -0500 Subject: [PATCH 17/19] Bump version and update changelog --- CHANGELOG.rst | 2 +- textblob/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 913c090a..2225e17b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Changelog ========= -0.13.1 (unreleased) +0.13.1 (2017-11-11) ------------------- Bug fixes: diff --git a/textblob/__init__.py b/textblob/__init__.py index 41d02fa5..a9fdf0e1 100644 --- a/textblob/__init__.py +++ b/textblob/__init__.py @@ -1,7 +1,7 @@ import os from .blob import TextBlob, Word, Sentence, Blobber, WordList -__version__ = '0.13.0' +__version__ = '0.13.1' __license__ = 'MIT' __author__ = 'Steven Loria' From e659a57464521c3f4211e8bbaafc6b6f217dec2d Mon Sep 17 00:00:00 2001 From: Jeff Schnurr Date: Mon, 20 Nov 2017 19:36:43 -0500 Subject: [PATCH 18/19] Use textblob.compat instead of six. --- textblob/en/taggers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/textblob/en/taggers.py b/textblob/en/taggers.py index ef9d29d2..65e30629 100644 --- a/textblob/en/taggers.py +++ b/textblob/en/taggers.py @@ -3,7 +3,7 @@ from __future__ import absolute_import import nltk -import six +import textblob.compat import textblob as tb from textblob.en import tag as pattern_tag @@ -19,7 +19,7 @@ class PatternTagger(BaseTagger): def tag(self, text, tokenize=True): """Tag a string or BaseBlob.""" - if not isinstance(text, six.text_type): + if not isinstance(text, textblob.compat.text_type): text = text.raw return pattern_tag(text, tokenize) @@ -32,7 +32,7 @@ class NLTKTagger(BaseTagger): @requires_nltk_corpus def tag(self, text): """Tag a string or BaseBlob.""" - if isinstance(text, six.text_type): + if isinstance(text, textblob.compat.text_type): text = tb.TextBlob(text) return nltk.tag.pos_tag(text.tokens) From 7763b312da1e8d8e106db0b1a73de5d2b4e71e6a Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Mon, 20 Nov 2017 20:37:38 -0500 Subject: [PATCH 19/19] Bump version and update changelog --- CHANGELOG.rst | 8 ++++++++ textblob/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2225e17b..844989ff 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,14 @@ Changelog ========= +0.14.0 (2017-11-20) +------------------- + +Features: + +- Use specified tokenizer when tagging (:issue:`167`). Thanks + :user:`jschnurr` for the PR. + 0.13.1 (2017-11-11) ------------------- diff --git a/textblob/__init__.py b/textblob/__init__.py index a9fdf0e1..9fe31c97 100644 --- a/textblob/__init__.py +++ b/textblob/__init__.py @@ -1,7 +1,7 @@ import os from .blob import TextBlob, Word, Sentence, Blobber, WordList -__version__ = '0.13.1' +__version__ = '0.14.0' __license__ = 'MIT' __author__ = 'Steven Loria'