## Blobber
- Textblob factory that all share the same tagger, tokenizer, parser, classifier, and np_extractor.

In [1]:
from textblob import Blobber
from textblob.taggers import NLTKTagger
from textblob.tokenizers import SentenceTokenizer
from textblob.parsers import PatternParser
from textblob.np_extractors import ConllExtractor
from textblob.sentiments import NaiveBayesAnalyzer

In [2]:
tb = Blobber(pos_tagger=NLTKTagger(), analyzer=NaiveBayesAnalyzer())

In [3]:
zen_1 = tb('Beautiful is better than ugly!.\
            Explicit, is better than implicit.\
            Simple is better than complex^^.')

zen_2 = tb('Although, never is often better than right now.\
            If the implementation is hard! to explain, it\'s a bad idea.\
            If the implementation is easy! to explain, it may\'be a good idea.')

In [4]:
# pos_tags
print('pos tags')
print('zen_1: ',zen_1.pos_tags)
print('zen_2: ',zen_2.pos_tags)

pos tags
zen_1:  [('Beautiful', 'NNP'), ('is', 'VBZ'), ('better', 'JJR'), ('than', 'IN'), ('ugly', 'RB'), ('Explicit', 'NNP'), ('is', 'VBZ'), ('better', 'JJR'), ('than', 'IN'), ('implicit', 'NN'), ('Simple', 'NN'), ('is', 'VBZ'), ('better', 'JJR'), ('than', 'IN'), ('complex^^', 'NN')]
zen_2:  [('Although', 'IN'), ('never', 'RB'), ('is', 'VBZ'), ('often', 'RB'), ('better', 'JJR'), ('than', 'IN'), ('right', 'RB'), ('now', 'RB'), ('If', 'IN'), ('the', 'DT'), ('implementation', 'NN'), ('is', 'VBZ'), ('hard', 'JJ'), ('to', 'TO'), ('explain', 'VB'), ('it', 'PRP'), ("'s", 'VBZ'), ('a', 'DT'), ('bad', 'JJ'), ('idea', 'NN'), ('If', 'IN'), ('the', 'DT'), ('implementation', 'NN'), ('is', 'VBZ'), ('easy', 'JJ'), ('to', 'TO'), ('explain', 'VB'), ('it', 'PRP'), ("may'be", 'VBZ'), ('a', 'DT'), ('good', 'JJ'), ('idea', 'NN')]


In [5]:
# sentiment
for s in zen_1.sentences:
    print(f"{s} -- {s.sentiment}")

Beautiful is better than ugly!. -- Sentiment(classification='neg', p_pos=0.4740131622439792, p_neg=0.5259868377560211)
Explicit, is better than implicit. -- Sentiment(classification='pos', p_pos=0.5362362732905498, p_neg=0.46376372670944993)
Simple is better than complex^^. -- Sentiment(classification='pos', p_pos=0.763510047426406, p_neg=0.23648995257359412)


In [6]:
# compare
zen_1.pos_tagger is zen_2.pos_tagger

True

In [7]:
blob = Blobber(parser=PatternParser(), np_extractor=ConllExtractor(), tokenizer=SentenceTokenizer())

In [8]:
zen = blob('Beautiful is better than ugly!.\
            Explicit, is better than implicit.\
            Simple is better than complex^^.')

In [9]:
print('Parsed: ', zen.parse().split(' '))

Parsed:  ['Beautiful/JJ/B-ADJP/O', 'is/VBZ/B-VP/O', 'better/JJR/B-ADJP/O', 'than/IN/B-PP/O', 'ugly/JJ/B-ADJP/O', '!/./O/O', '././O/O\nExplicit/JJ/B-ADJP/O', ',/,/O/O', 'is/VBZ/B-VP/O', 'better/JJR/B-ADJP/O', 'than/IN/B-PP/O', 'implicit/JJ/B-ADJP/O', '././O/O\nSimple/JJ/B-ADJP/O', 'is/VBZ/B-VP/O', 'better/JJR/B-ADJP/O', 'than/IN/B-PP/B-PNP', 'complex/JJ/B-NP/I-PNP', '^/NN/I-NP/I-PNP', '^/NN/I-NP/I-PNP', '././O/O']


In [10]:
print('Extracted phrases: ', zen.noun_phrases)

Extracted phrases:  ['complex ^ ^']


In [11]:
print('Extracted count: ', zen.np_counts)

Extracted count:  defaultdict(<class 'int'>, {'complex ^ ^': 1})


In [12]:
print('Tokens: ', zen.tokens)

Tokens:  ['Beautiful is better than ugly!.', 'Explicit, is better than implicit.', 'Simple is better than complex^^.']
