###                                    TextBlob Library Understanding

In [1]:
#Install TextBlob
!pip install textblob

Collecting textblob
  Downloading textblob-0.15.3-py2.py3-none-any.whl (636 kB)
Installing collected packages: textblob
Successfully installed textblob-0.15.3


In [2]:
#Download models
!python -m textblob.download_corpora #Downloads all the required models at once

Finished.


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\saite\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\brown.zip.
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\saite\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\saite\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\saite\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\saite\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\conll2000.zip.
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\saite\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\movie_reviews.zip.


In [3]:
#Importing
from textblob import TextBlob

In [36]:
data = TextBlob('I love Natural language Processing, not you! I am a messi fan')

In [37]:
#POS tagging
data.tags

[('I', 'PRP'),
 ('love', 'VBP'),
 ('Natural', 'JJ'),
 ('language', 'NN'),
 ('Processing', 'NNP'),
 ('not', 'RB'),
 ('you', 'PRP'),
 ('I', 'PRP'),
 ('am', 'VBP'),
 ('a', 'DT'),
 ('messi', 'NN'),
 ('fan', 'NN')]

In [38]:
#Noun phrase extraction
data.noun_phrases

WordList(['natural language', 'processing', 'messi fan'])

### Tokenization

In [39]:
#word
data.words

WordList(['I', 'love', 'Natural', 'language', 'Processing', 'not', 'you', 'I', 'am', 'a', 'messi', 'fan'])

In [40]:
#Sentence
data.sentences

[Sentence("I love Natural language Processing, not you!"),
 Sentence("I am a messi fan")]

##### Word Inflation

In [118]:
sent = TextBlob('Use 4 spaces per indentation level')

In [119]:
sent.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [34]:
#Converts singular to plural but not accurate
sent.words[2].pluralize()

'spacess'

In [35]:
#Converts plural to singular but not accurate
sent.words[2].singularize()

'space'

##### Stemming

In [43]:
from textblob import Word
q = Word('history')
q.stem()

'histori'

##### Lematization

In [48]:
q = Word('histories')
q.lemmatize()

'history'

In [55]:
#Meaning of the word, works like a dictionary

Word('barricade').definitions

['a barrier set up by police to stop traffic on a street or road in order to catch a fugitive or inspect traffic etc.',
 'a barrier (usually thrown up hastily) to impede the advance of an enemy',
 'render unsuitable for passage',
 'prevent access to by barricading',
 'block off with barricades']

In [65]:
#Spelling correction

g = TextBlob('can you pronounce czechusovakia?')
g.correct()

TextBlob("can you pronounce czechoslovakia?")

In [73]:
g = TextBlob('can you pronounce psycho?')
g.correct()

TextBlob("can you pronounce psychic?")

In [69]:
k = Word('psycho')
k.spellcheck()

[('psychic', 1.0)]

In [79]:
#Word count

sent = TextBlob('she is not from here. No one knows it.')

sent.word_counts['No'] #converts our sentence to lowercase backend 

sent.words.count('No', case_sensitive = True) #Which gives exact match 

1

In [86]:
#Translation and language detection
blob = TextBlob('Something is better than nothing')
blob.translate(to = 'kn')

TextBlob("ಯಾವುದಕ್ಕಿಂತ ಯಾವುದೋ ಉತ್ತಮವಾಗಿದೆ")

In [89]:
blob = TextBlob('ಯಾವುದಕ್ಕಿಂತ ಯಾವುದೋ ಉತ್ತಮವಾಗಿದೆ')
blob.detect_language()

'kn'

In [112]:
text3 = """The Election Commission on Friday debarred Assam Minister and BJP leader Himanta Biswa Sarma from campaigning for ongoing Assam elections for 48 hours with effect from today. This came after Sarma allegedly made threatening remarks against opposition leader Hagrama Mohilary of the Bodoland People's Front. EC had on Thursday asked Sarma for an explanation by today over his remarks."""
word = TextBlob(text3.lower())
final = []
for i in word.words:
    i = Word(i).lemmatize()
    if 'NN' in TextBlob(i).tags[0][1]:
        final.append(i)
print(' '.join(final))

election commission friday assam minister bjp leader himanta biswa sarma assam election hour effect today sarma remark opposition leader hagrama mohilary bodoland people front ec thursday sarma explanation today remark


In [95]:
#Classification 

#Load data

train = [
     ('I love this sandwich.', 'pos'),
     ('this is an amazing place!', 'pos'),
     ('I feel very good about these beers.', 'pos'),
     ('this is my best work.', 'pos'),
     ("what an awesome view", 'pos'),
     ('I do not like this restaurant', 'neg'),
     ('I am tired of this stuff.', 'neg'),
     ("I can't deal with this", 'neg'),
     ('he is my sworn enemy!', 'neg'),
     ('my boss is horrible.', 'neg')
]

test = [
     ('the beer was good.', 'pos'),
     ('I do not enjoy my job', 'neg'),
     ("I ain't feeling dandy today.", 'neg'),
     ("I feel amazing!", 'pos'),
     ('Gary is a friend of mine.', 'pos'),
     ("I can't believe I'm doing this.", 'neg')
]




In [96]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [97]:
cl.accuracy(test)

0.8333333333333334

In [101]:
cl.classify('do not like library')

'neg'

In [113]:
prob_dist = cl.prob_classify('this is the worst library')

In [114]:
prob_dist.prob('neg')

0.3688524590163936

In [115]:
prob_dist.prob('pos')

0.6311475409836058

In [None]:
#To train our above model with new data
cl.update(new_data) #Which increase our model accuracy