**Create a TextBlob**

In [26]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('brown')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/pranavanand/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/pranavanand/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package brown to
[nltk_data]     /home/pranavanand/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/pranavanand/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [27]:
from textblob import TextBlob

**Part-of-speech Tagging**

In [28]:
wiki = TextBlob("Python is a high-level, general-purpose programming language.")
wiki.tags

[('Python', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('high-level', 'JJ'),
 ('general-purpose', 'JJ'),
 ('programming', 'NN'),
 ('language', 'NN')]

**Noun Phrase Extraction**

In [29]:
wiki.noun_phrases

WordList(['python'])

**Sentiment Analysis**

In [30]:
testimonial = TextBlob("Textblob is amazingly simple to use. What great fun!")
testimonial.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [31]:
testimonial.sentiment.polarity

0.39166666666666666

**Tokenization**


In [32]:
zen = TextBlob("Beautiful is better than ugly. "
               "Explicit is better than implicit. "
               "Simple is better than complex. ")

In [33]:
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [34]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

**Words Inflection and Lemmatization**

In [35]:
sentence = TextBlob('Use 4 Spaces per indentation level')
sentence.words

WordList(['Use', '4', 'Spaces', 'per', 'indentation', 'level'])

In [36]:
sentence.words[2].singularize()

'Space'

In [37]:
sentence.words[-1].pluralize()

'levels'

In [38]:
from textblob import Word
w = Word("octopi")
w.lemmatize()

'octopus'

**WordNet Integration**

In [39]:
from textblob import Word
from textblob.wordnet import VERB
word = Word("octopus")
word.synsets

[Synset('octopus.n.01'), Synset('octopus.n.02')]

In [40]:
Word("Octopus").definitions

['tentacles of octopus prepared as food',
 'bottom-living cephalopod having a soft oval body with eight long tentacles']

In [41]:
from textblob.wordnet import Synset
octopus = Synset('octopus.n.02')
shrimp = Synset('shrimp.n.03')
octopus.path_similarity(shrimp)

0.1111111111111111

**WordList**

In [42]:
animals = TextBlob("cat dog octopus")
animals.words

WordList(['cat', 'dog', 'octopus'])

In [44]:
animals.words.pluralize()

WordList(['cats', 'dogs', 'octopodes'])

**Spelling Correction**

In [45]:
from textblob import Word
w = Word('falibility')
w.spellcheck()

[('fallibility', 1.0)]

**Get Word and Noun Phrase Frequencies**

In [48]:
monty = TextBlob("We are no longer the Knights who say Ni. "
                     "We are now the Knights who say Ekki ekki ekki PTANG.")
monty.word_counts['ekki']

3

In [49]:
monty.words.count('ekki', case_sensitive=True)

2

**Parsing**

In [52]:
b = TextBlob("And now for something completely different.")
print(b.parse())

And/CC/O/O now/RB/B-ADVP/O for/IN/B-PP/B-PNP something/NN/B-NP/I-PNP completely/RB/B-ADJP/O different/JJ/I-ADJP/O ././O/O


**n-grams**

In [53]:
blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

**Get Start and End Indices of Sentences**

In [54]:
for s in zen.sentences:
    print(s)
    print("---- Starts at index {}, Ends at index {}".format(s.start, s.end))

Beautiful is better than ugly.
---- Starts at index 0, Ends at index 30
Explicit is better than implicit.
---- Starts at index 31, Ends at index 64
Simple is better than complex.
---- Starts at index 65, Ends at index 95
