<a href="https://colab.research.google.com/github/rjc89/ML_exercises/blob/master/TextBlob_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

In [0]:
from textblob import TextBlob

In [0]:
text = '''
The titular threat of The Blob has always struck me as the ultimate movie
monster: an insatiably hungry, amoeba-like mass able to penetrate
virtually any safeguard, capable of--as a doomed doctor chillingly
describes it--"assimilating flesh on contact.
Snide comparisons to gelatin be damned, it's a concept with the most
devastating of potential consequences, not unlike the grey goo scenario
proposed by technological theorists fearful of
artificial intelligence run rampant.
'''

blob = TextBlob(text)


In [0]:
blob.tags           # [('The', 'DT'), ('titular', 'JJ'),
                    #  ('threat', 'NN'), ('of', 'IN'), ...]



[('The', 'DT'),
 ('titular', 'JJ'),
 ('threat', 'NN'),
 ('of', 'IN'),
 ('The', 'DT'),
 ('Blob', 'NNP'),
 ('has', 'VBZ'),
 ('always', 'RB'),
 ('struck', 'VBN'),
 ('me', 'PRP'),
 ('as', 'IN'),
 ('the', 'DT'),
 ('ultimate', 'JJ'),
 ('movie', 'NN'),
 ('monster', 'NN'),
 ('an', 'DT'),
 ('insatiably', 'RB'),
 ('hungry', 'JJ'),
 ('amoeba-like', 'JJ'),
 ('mass', 'NN'),
 ('able', 'JJ'),
 ('to', 'TO'),
 ('penetrate', 'VB'),
 ('virtually', 'RB'),
 ('any', 'DT'),
 ('safeguard', 'NN'),
 ('capable', 'JJ'),
 ('of', 'IN'),
 ('as', 'IN'),
 ('a', 'DT'),
 ('doomed', 'JJ'),
 ('doctor', 'NN'),
 ('chillingly', 'RB'),
 ('describes', 'VBZ'),
 ('it', 'PRP'),
 ('assimilating', 'VBG'),
 ('flesh', 'NN'),
 ('on', 'IN'),
 ('contact', 'NN'),
 ('Snide', 'JJ'),
 ('comparisons', 'NNS'),
 ('to', 'TO'),
 ('gelatin', 'VB'),
 ('be', 'VB'),
 ('damned', 'VBN'),
 ('it', 'PRP'),
 ("'s", 'VBZ'),
 ('a', 'DT'),
 ('concept', 'NN'),
 ('with', 'IN'),
 ('the', 'DT'),
 ('most', 'RBS'),
 ('devastating', 'JJ'),
 ('of', 'IN'),
 ('potenti

In [0]:
blob.noun_phrases   # WordList(['titular threat', 'blob',
                    #            'ultimate movie monster',
                    #            'amoeba-like mass', ...])



WordList(['titular threat', 'blob', 'ultimate movie monster', 'amoeba-like mass', 'snide', 'potential consequences', 'grey goo scenario', 'technological theorists fearful', 'artificial intelligence run rampant'])

In [0]:
for sentence in blob.sentences:
    print(sentence.sentiment.polarity)
# 0.060
# -0.341

0.06000000000000001
-0.34166666666666673


In [0]:
blob.translate(to="es")  # 'La amenaza titular de The Blob...'

TextBlob("La amenaza titular de The Blob siempre me ha parecido la mejor película.
monstruo: una masa insaciablemente hambrienta, similar a una ameba, capaz de penetrar
prácticamente cualquier salvaguarda, capaz de, como un médico condenado, escalofriante
lo describe - "asimilando la carne al contacto.
Malditas comparaciones con la gelatina, maldita sea, es un concepto con la mayoría
devastador de posibles consecuencias, no muy diferente del escenario de la sustancia gris
propuesto por teóricos tecnológicos temerosos de
la inteligencia artificial corre desenfrenada.")

#Text Classifier

In [0]:
train = [
('I love this sandwich.', 'pos'),
('this is an amazing place!', 'pos'),
('I feel very good about these beers.', 'pos'),
('this is my best work.', 'pos'),
("what an awesome view", 'pos'),
('I do not like this restaurant', 'neg'),
('I am tired of this stuff.', 'neg'),
("I can't deal with this", 'neg'),
('he is my sworn enemy!', 'neg'),
('my boss is horrible.', 'neg')
]
test = [
('the beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]

In [0]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [0]:
cl.classify("This is an amazing library!")

'pos'

In [0]:
prob_dist = cl.prob_classify("This one's a doozy.")
prob_dist.max()


'pos'

In [0]:

round(prob_dist.prob("pos"), 2)



0.63

In [0]:
round(prob_dist.prob("neg"), 2)

0.37

Classifying Sentences within textblobs

In [0]:
>>> from textblob import TextBlob
>>> blob = TextBlob("The beer is good. But the hangover is horrible.", classifier=cl)
>>> blob.classify()

'pos'

In [0]:
for s in blob.sentences:
    print(s)
    print(s.classify())

The beer is good.
pos
But the hangover is horrible.
neg


In [0]:
cl.accuracy(test)

0.8333333333333334

In [0]:
new_data = [('She is my best friend.', 'pos'),
("I'm happy to have a new friend.", 'pos'),
("Stay thirsty, my friend.", 'pos'),
("He ain't from around here.", 'neg')]


In [0]:
cl.update(new_data)



True

In [0]:
cl.accuracy(test)

1.0

FEATURE EXTRACTORS

In [0]:
def end_word_extractor(document):
    tokens = document.split()
    first_word, last_word = tokens[0], tokens[-1]
    feats = {}
    feats["first({0})".format(first_word)] = True
    feats["last({0})".format(last_word)] = False
    return feats


In [0]:
features = end_word_extractor("I feel happy")
assert features == {'last(happy)': False, 'first(I)': True}

In [0]:
cl2 = NaiveBayesClassifier(test, feature_extractor=end_word_extractor)
blob = TextBlob("I'm excited to try my new classifier.", classifier=cl2)
blob.classify()

'pos'