# Building a Text Classification System using TextBlob

## Loading Data and Creating a Classifier

In [10]:
train = [
...     ('I love this sandwich.', 'pos'),
...     ('this is an amazing place!', 'pos'),
...     ('I feel very good about these beers.', 'pos'),
...     ('this is my best work.', 'pos'),
...     ("what an awesome view", 'pos'),
...     ('I do not like this restaurant', 'neg'),
...     ('I am tired of this stuff.', 'neg'),
...     ("I can't deal with this", 'neg'),
...     ('he is my sworn enemy!', 'neg'),
...     ('my boss is horrible.', 'neg')
... ]

In [11]:
 test = [
...     ('the beer was good.', 'pos'),
...     ('I do not enjoy my job', 'neg'),
...     ("I ain't feeling dandy today.", 'neg'),
...     ("I feel amazing!", 'pos'),
...     ('Gary is a friend of mine.', 'pos'),
...     ("I can't believe I'm doing this.", 'neg')
... ]

In [12]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

## Classifying Text

In [13]:
cl.classify("This is an amazing library!")

'pos'

In [14]:
prob_dist = cl.prob_classify("This one's a doozy.")
prob_dist.max()

'pos'

In [15]:
round(prob_dist.prob("pos"), 2)

0.63

In [16]:
round(prob_dist.prob("neg"), 2)

0.37

## Classifying TextBlobs

In [17]:
from textblob import TextBlob
blob = TextBlob("The beer is good. But the hangover is horrible.", classifier=cl)
blob.classify()

'pos'

In [18]:
 for s in blob.sentences:
...     print(s)
...     print(s.classify())


The beer is good.
pos
But the hangover is horrible.
neg


## Evaluating Classifiers

In [19]:
cl.accuracy(test)

0.8333333333333334

In [20]:
cl.show_informative_features(5)

Most Informative Features
            contains(my) = True              neg : pos    =      1.7 : 1.0
            contains(an) = False             neg : pos    =      1.6 : 1.0
             contains(I) = False             pos : neg    =      1.4 : 1.0
             contains(I) = True              neg : pos    =      1.4 : 1.0
            contains(my) = False             pos : neg    =      1.3 : 1.0


## Updating Classifiers with New Data

In [21]:
new_data = [('She is my best friend.', 'pos'),
...             ("I'm happy to have a new friend.", 'pos'),
...             ("Stay thirsty, my friend.", 'pos'),
...             ("He ain't from around here.", 'neg')]

In [22]:
cl.update(new_data)

True

In [23]:
cl.accuracy(test)

1.0

## Feature Extractors

#### Create a feature extractor that just uses the first and last words of a document as its features

In [24]:
def end_word_extractor(document):
...     tokens = document.split()
...     first_word, last_word = tokens[0], tokens[-1]
...     feats = {}
...     feats["first({0})".format(first_word)] = True
...     feats["last({0})".format(last_word)] = False
...     return feats
features = end_word_extractor("I feel happy")
assert features == {'last(happy)': False, 'first(I)': True}

#### Feature extractor in a classifier by passing it as the second argument of the constructor

In [25]:
cl2 = NaiveBayesClassifier(test, feature_extractor=end_word_extractor)
blob = TextBlob("I'm excited to try my new classifier.", classifier=cl2)
blob.classify()

'pos'