#### Text Blob library for text processing

In [138]:
# importing library

from textblob import TextBlob

##### Part of speech tagging using spacy

In [139]:
# Code

text = TextBlob("I love to read about data science, not about movies")

In [140]:
text.tags

[('I', 'PRP'),
 ('love', 'VBP'),
 ('to', 'TO'),
 ('read', 'VB'),
 ('about', 'IN'),
 ('data', 'NNS'),
 ('science', 'NN'),
 ('not', 'RB'),
 ('about', 'IN'),
 ('movies', 'NNS')]

We can see the output that I as Preposition , Love as Verb etc.

##### Noun phrase Extraction
In noun phrase extraction it will return noun out of sentence

In [141]:
text = TextBlob("My name is Nikhil and i work for data")

In [None]:
text.noun_phrases

#### Sentiment Analysis using textblob

In [142]:
# example for positive sentiments

text = TextBlob("You are too good")

In [143]:
text.sentiment

Sentiment(polarity=0.7, subjectivity=0.6000000000000001)

In the above out it is basically representing the sentiments and whether my sentiments negative or positive is stated by polarity and how subjective am i while showing my sentiments.So the above output shows positive sentiments.

In [144]:
# example for negative sentiments.

text = TextBlob("This is bad")

In [145]:
text.sentiment

Sentiment(polarity=-0.6999999999999998, subjectivity=0.6666666666666666)

The negative polarity states the negative sentiments.

In [146]:
# example of sentence without any sentiments.

text = TextBlob("How're you doing")
text.sentiment

Sentiment(polarity=0.0, subjectivity=0.0)

The output above shows polarity and subjective as 0 stating that there is no sentiments behind the sentence.

In [147]:
text = TextBlob("What a taste , a must visit destination for kfc lover i mean IFC lover .The taste of the Burger is full"
                 "of heaven. I ordered large bucket worth of 530 but believe me the awesomeness was beyond imagination."
                "The taste was much better than the Brand KFC")

In [148]:
text.sentiment

Sentiment(polarity=0.17544642857142856, subjectivity=0.4290178571428572)

#### Tokenization
We can perform character,words,sentence tokenization using spacy library.Tokenization basically used to assign token to a single sentence passed inside function if it is word tokenization then it'll split the individual word into token etc.

In [149]:
# word tokenization example

text = TextBlob("This is something new")
text.words

WordList(['This', 'is', 'something', 'new'])

In [150]:
# sentence tokenization

text = TextBlob("This is my 1st sentence. This is my last sentence. ")
text.sentences

[Sentence("This is my 1st sentence."), Sentence("This is my last sentence.")]

#### Word Inflation and Lemmitization


In [151]:
text = TextBlob("This is spaces where i'll be using many box")
text.words

WordList(['This', 'is', 'spaces', 'where', 'i', "'ll", 'be', 'using', 'many', 'box'])

In [152]:
# singularization

text.words[2].singularize() # converting spaces to space

'space'

In [153]:
# pluralization

text.words[-1].pluralize()

'boxes'

#### lemmitization

In [154]:
from textblob import Word

text = Word("lions")
text.lemmatize()

'lion'

In [155]:
# 

text = Word("went")
text.lemmatize('v') # try to convert to base word as we have provided v as verb into lemmatization function.

'go'

#### Synset
It is used basically to produce similar kind of set say for example synonyms but based on dictionary

In [156]:
from textblob import Word
from textblob.wordnet import VERB
word = Word("machine")
word.synsets

[Synset('machine.n.01'),
 Synset('machine.n.02'),
 Synset('machine.n.03'),
 Synset('machine.n.04'),
 Synset('machine.n.05'),
 Synset('car.n.01'),
 Synset('machine.v.01'),
 Synset('machine.v.02')]

In [157]:
word = Word("books")
word.synsets

[Synset('book.n.01'),
 Synset('book.n.02'),
 Synset('record.n.05'),
 Synset('script.n.01'),
 Synset('ledger.n.01'),
 Synset('book.n.06'),
 Synset('book.n.07'),
 Synset('koran.n.01'),
 Synset('bible.n.01'),
 Synset('book.n.10'),
 Synset('book.n.11'),
 Synset('book.v.01'),
 Synset('reserve.v.04'),
 Synset('book.v.03'),
 Synset('book.v.04')]

In [158]:
# to get definition of any words we can use definitions

text = Word("Computer")
print(text.definitions)

text = Word("Cricket")
print(text.definitions)

['a machine for performing calculations automatically', 'an expert at calculation (or at operating calculating machines)']
['leaping insect; male makes chirping noises by rubbing the forewings together', 'a game played with a ball and bat by two teams of 11 players; teams take turns trying to score runs', 'play cricket']


#### Spelling Correction

In [159]:
# performing spelling correction

text = TextBlob("reade")
print(text.correct())

text = TextBlob("computor")
print(text.correct())

ready
computer


spellcheck function gives the output with probability of correct and incorrect

In [160]:
# spellcheck function

text = Word("computoor")
print(text.spellcheck())

text = Word("pronounceee")
print(text.spellcheck())

[('computer', 1.0)]
[('pronounced', 0.7), ('pronounce', 0.3)]


#### Word Count 

In [161]:
text = TextBlob("This is for word word count")
text.word_counts

defaultdict(int, {'this': 1, 'is': 1, 'for': 1, 'word': 2, 'count': 1})

#### Language Translation

In [162]:
# English to Hindi

text = TextBlob("Action speaks louder than words")
text.translate(to = 'hi')

TextBlob("काम बोलता है बातें नहीं")

In [163]:
# Hindi to English

text = TextBlob("काम बोलता है बातें नहीं")
text.translate(to = 'en')

TextBlob("action speaks louder than words")

In [164]:
# English to Chinese

text = TextBlob("Action speaks louder than words")
text.translate(to = 'zh-CN')

TextBlob("行动胜于雄辩")

#### Language Detection

In [165]:
text = TextBlob("行动胜于雄辩")
text.detect_language()

'zh-CN'

#### n-gram
n-gram is used to generate all the combinations of words with the given parameter set and it is basically used before training NLP model to send every combination of words to model while training.

In [166]:
text = TextBlob("This is for testing purpose")
text.ngrams(2)

[WordList(['This', 'is']),
 WordList(['is', 'for']),
 WordList(['for', 'testing']),
 WordList(['testing', 'purpose'])]

### Text Classification Model

In [167]:
train = [
    ('I love to eat apple','pos'),
    ('Your habit is bad','neg'),
    ('It was an amazing movie','pos'),
    ('this is my best work','pos'),
    ('i am not feeling good','neg'),
    ('my friends are good','pos'),
    ('i do not wanna talk anyone','neg'),
    ('he is my enemy','neg'),
    ('your attitude was horrible','neg')
]

test = [
    ('The juice was good','pos'),
    ('you are fooling me','pos'),
    ('I feel amazing','pos'),
    ('i cannot believe this')
]

In [168]:
# traing our model

from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [169]:
# prediction

cl.classify('i cannot believe this')

'neg'

#### Classifying text

In [170]:
# we can also check the probability of pos and negative class

text_prob = cl.prob_classify("I am suffering from fever")
text_prob.max()

'neg'

In [171]:
round(text_prob.prob("neg"),2)

0.57

#### Updating Classifier with new data
If we want to update model with new data then we can use update function to achieve the task

In [172]:
new_data = [('She is my best friend','pos'),
           ('I an very happy','pos'),
           ('this is not good','neg')]

cl.update(new_data)

True