1. pip install -U textblob 
2. python -m textblob.download_corpora

## Basic

In [2]:
from textblob import TextBlob

In [3]:
## creating a textblob object
blob = TextBlob("Test Leaf is a great platform to learn technology.")

In [4]:
## textblobs are like python strings

blob[1:5]

TextBlob("est ")

In [5]:
blob.upper()

TextBlob("TEST LEAF IS A GREAT PLATFORM TO LEARN TECHNOLOGY.")

In [6]:
blob2 = TextBlob("It also helps community through Class, discussions,etc.")

In [7]:
## concat

blob + " And " + blob2

TextBlob("Test Leaf is a great platform to learn technology. And It also helps community through Class, discussions,etc.")

## Tokenization

In [8]:
blob = TextBlob("Test Leaf is a great platform to learn technology.\n It helps community through Class, discussions,etc.")


In [9]:
blob.sentences

[Sentence("Test Leaf is a great platform to learn technology."),
 Sentence("It helps community through Class, discussions,etc.")]

In [10]:
blob.sentences[0]

Sentence("Test Leaf is a great platform to learn technology.")

In [11]:
blob.sentences[0].words

WordList(['Test', 'Leaf', 'is', 'a', 'great', 'platform', 'to', 'learn', 'technology'])

In [12]:
for words in blob.sentences[0].words:
    print (words)

Test
Leaf
is
a
great
platform
to
learn
technology


## Noun phrase extraction

In [13]:
blob = TextBlob("Test Leaf is a great platform to learn technology.")
for np in blob.tags:
    print (np)

('Test', 'NNP')
('Leaf', 'NNP')
('is', 'VBZ')
('a', 'DT')
('great', 'JJ')
('platform', 'NN')
('to', 'TO')
('learn', 'VB')
('technology', 'NN')


As we can see that isn't correct but we were working with machines.

## POS tagging

In [14]:
for words, tag in blob.tags:
    print (words, tag)

Test NNP
Leaf NNP
is VBZ
a DT
great JJ
platform NN
to TO
learn VB
technology NN


## Sentiment Analysis

In [15]:
print (blob)
blob.sentiment

Test Leaf is a great platform to learn technology.


Sentiment(polarity=0.8, subjectivity=0.75)

## Word Inflection and Lemmatization

In [16]:
blob = TextBlob("Test Leaf is a great platform to learn technology. \n It helps community through Class, discussions,etc.")
print (blob.sentences[1].words[1])
print (blob.sentences[1].words[1].singularize())

helps
help


In [17]:
from textblob import Word
w = Word('Platform')
w.pluralize()

'Platforms'

In [18]:
## using tags
for word,pos in blob.tags:
    if pos == 'NN':
        print (word.pluralize())

platforms
technologies
communities


In [19]:
## lemmatization

w = Word('jokes')
w.lemmatize()  ## v here represents verb

'joke'

## Ngrams

In [20]:
blob = TextBlob("I went to sri lanka")
for ngram in blob.ngrams(2):
    print (ngram)

['I', 'went']
['went', 'to']
['to', 'sri']
['sri', 'lanka']


## Spelling correction

In [22]:
blob = TextBlob('TestLeaf is a gret platfrm to learn technology')
blob.correct()

TextBlob("TestLeaf is a great platform to ran technology")

In [33]:
blob.words[3].spellcheck()

[('great', 0.5351351351351351),
 ('get', 0.3162162162162162),
 ('grew', 0.11216216216216217),
 ('grey', 0.026351351351351353),
 ('greet', 0.006081081081081081),
 ('fret', 0.002702702702702703),
 ('grit', 0.0006756756756756757),
 ('cret', 0.0006756756756756757)]

## Creating a short summary from a text

In [34]:
import random

blob = TextBlob('TestLeaf is a thriving community for data driven industry. This platform allows \
    people to know more about technology from its classes and learning paths. Also, we help \
    professionals & amateurs to sharpen their skillsets by providing a platform.')

In [35]:
nouns = list()
for word, tag in blob.tags:
    if tag == 'NN':
        nouns.append(word.lemmatize())

print ("This text is about...")
for item in random.sample(nouns, 5):
    word = Word(item)
    print (word.pluralize())

This text is about...
platforms
industries
technologies
communities
platforms


## Language Translation

In [36]:
blob = TextBlob('Hi Hope you are doing good?')

In [37]:
blob.translate(to ='es')

TextBlob("Hola espero que estés bien")

In [38]:
blob1 = TextBlob('هذا رائع')

In [39]:
blob1.detect_language()

'ar'

In [40]:
blob = TextBlob("¿Hola como estás?")
blob.detect_language()


'es'

In [41]:
blob1.translate(from_lang='ar', to ='en')

TextBlob("that's cool")

In [42]:
blob.translate(to= 'en')

TextBlob("Hi how are you?")

## Text Classification using textblob

In [43]:
training = [
            ('Tom Holland is a terrible spiderman.','neg'),
            ('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','neg'),
            ('The Dark Knight Rises is the greatest superhero movie ever!','pos'),
            ('Fantastic Four should have never been made.','neg'),
            ('Wes Anderson is my favorite director!','pos'),
            ('Captain America 2 is pretty awesome.','pos'),
            ('Let\s pretend "Batman and Robin" never happened..','neg'),
            ]
testing = [
           ('Superman was never an interesting character.','neg'),
           ('Fantastic Mr Fox is an awesome film!','pos'),
           ('Dragonball Evolution is simply terrible!!','neg')
           ]

In [44]:
from textblob import classifiers

classifier = classifiers.NaiveBayesClassifier(training)

In [45]:
print (classifier.accuracy(testing))
classifier.show_informative_features(3)

1.0
Most Informative Features
            contains(is) = True              pos : neg    =      2.9 : 1.0
         contains(never) = False             pos : neg    =      1.8 : 1.0
      contains(terrible) = False             pos : neg    =      1.8 : 1.0


In [110]:
blob = TextBlob('I like spiderman ', classifier=classifier)
print (blob.classify())

neg
