In [1]:
# Importing the textBlob
from textblob import TextBlob

In [2]:
blob = TextBlob("Sambalpuri is an Indo-Aryan language variety spoken in western Odisha, India. \n It is alternatively known as Western Odia, and as Kosali, a recently popularised but controversial term, which draws on an association with the ancient Kosala, whose vast territories also included the present-day Sambalpur region.")

print(blob.sentences)

## printing words of first sentence
for words in blob.sentences[0].words:  
  print (words)

[Sentence("Sambalpuri is an Indo-Aryan language variety spoken in western Odisha, India."), Sentence("It is alternatively known as Western Odia, and as Kosali, a recently popularised but controversial term, which draws on an association with the ancient Kosala, whose vast territories also included the present-day Sambalpur region.")]
Sambalpuri
is
an
Indo-Aryan
language
variety
spoken
in
western
Odisha
India


In [3]:
# Determining the noun phrases
blob = TextBlob("Sambalpuri is an Indo-Aryan language variety spoken in western Odisha, India.")
for np in blob.noun_phrases:
 print (np)

sambalpuri
indo-aryan
language variety
odisha
india


In [4]:
# Performing Parts of speech tagging
for words, tag in blob.tags:
    print (words, tag)

Sambalpuri NNP
is VBZ
an DT
Indo-Aryan JJ
language NN
variety NN
spoken VBN
in IN
western JJ
Odisha NNP
India NNP


In [5]:
# Singularizing the word
blob = TextBlob("Sambalpuri is an Indo-Aryan language variety spoken in western Odisha, India. \n It is alternatively known as Western Odia, and as Kosali, a recently popularised but controversial term, which draws on an association with the ancient Kosala, whose vast territories also included the present-day Sambalpur region.")
print (blob.sentences[1].words[17])
print (blob.sentences[1].words[17].singularize())

draws
draw


In [6]:
#Pluralizing the word
from textblob import Word
w = Word('region')
w.pluralize()

'regions'

In [7]:
## using tags
for word,pos in blob.tags:
    if pos == 'NN':
        print (word.pluralize())

languages
varieties
terms
associations
ancients
regions


In [8]:
## lemmatization
w = Word('swimming')
w.lemmatize("v") ## v here represents verb

'swim'

In [9]:
# Performing N-Grams Operation
for ngram in blob.ngrams(2):
    print (ngram)

['Sambalpuri', 'is']
['is', 'an']
['an', 'Indo-Aryan']
['Indo-Aryan', 'language']
['language', 'variety']
['variety', 'spoken']
['spoken', 'in']
['in', 'western']
['western', 'Odisha']
['Odisha', 'India']
['India', 'It']
['It', 'is']
['is', 'alternatively']
['alternatively', 'known']
['known', 'as']
['as', 'Western']
['Western', 'Odia']
['Odia', 'and']
['and', 'as']
['as', 'Kosali']
['Kosali', 'a']
['a', 'recently']
['recently', 'popularised']
['popularised', 'but']
['but', 'controversial']
['controversial', 'term']
['term', 'which']
['which', 'draws']
['draws', 'on']
['on', 'an']
['an', 'association']
['association', 'with']
['with', 'the']
['the', 'ancient']
['ancient', 'Kosala']
['Kosala', 'whose']
['whose', 'vast']
['vast', 'territories']
['territories', 'also']
['also', 'included']
['included', 'the']
['the', 'present-day']
['present-day', 'Sambalpur']
['Sambalpur', 'region']


In [10]:
# Printing the blob 
print (blob)
blob.sentiment

Sambalpuri is an Indo-Aryan language variety spoken in western Odisha, India. 
 It is alternatively known as Western Odia, and as Kosali, a recently popularised but controversial term, which draws on an association with the ancient Kosala, whose vast territories also included the present-day Sambalpur region.


Sentiment(polarity=0.11000000000000001, subjectivity=0.44000000000000006)

In [11]:
# Performing the correction of the Bolb sentence
blob = TextBlob('Sambalpuri is an Indo-Aryan language variety spoken in western Odisha, India.')
blob.correct()

TextBlob("Sambalpuri is an Undo-Bryan language variety spoken in western Dish, India.")

In [12]:
# Performing the spell check
blob.words[4].spellcheck()

[('language', 1.0)]

In [13]:
# Importing the random
import random

blob = TextBlob('Sambalpuri is an Indo-Aryan language variety spoken in western Odisha, India. It is \
alternatively known as Western Odia, and as Kosali, a recently popularised but controversial term, which draws on an association with the ancient Kosala, whose vast territories also included the present-day Sambalpur region.There has \
been a language movement campaigning for the recognition of the language. Its main objective has \
been the inclusion of the language into the 8th schedule of the Indian constitution.')

In [14]:
# Performing the pluralization of every word
nouns = list()
for word, tag in blob.tags:
    if tag == 'NN':
        nouns.append(word.lemmatize())
print ("This text is about...")
for item in random.sample(nouns, 5):
    word = Word(item)
    print (word.pluralize())

This text is about...
languages
recognitions
varieties
terms
inclusions


In [15]:
# Detecting the language
blob.detect_language()

'en'

In [16]:
# Translating the language from english to french
blob.translate(from_lang='en', to ='fr')

TextBlob("Le sambalpuri est une variété de langue indo-aryenne parlée dans l'ouest d'Odisha, en Inde. Il est également connu sous le nom d'Odia occidental et de Kosali, un terme récemment popularisé mais controversé, qui s'appuie sur une association avec l'ancienne Kosala, dont les vastes territoires comprenaient également la région actuelle de Sambalpur. Un mouvement linguistique a fait campagne pour reconnaissance de la langue. Son objectif principal a été l'inclusion de la langue dans le 8ème calendrier de la constitution indienne.")

In [17]:
# Taking the training and testing data
training = [
('Tom Holland is a terrible spiderman.','pos'),
('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),
('The Dark Knight Rises is the greatest superhero movie ever!','neg'),
('Fantastic Four should have never been made.','pos'),
('Wes Anderson is my favorite director!','neg'),
('Captain America 2 is pretty awesome.','neg'),
('Let\s pretend "Batman and Robin" never happened..','pos'),
]
testing = [
('Superman was never an interesting character.','pos'),
('Fantastic Mr Fox is an awesome film!','neg'),
('Dragonball Evolution is simply terrible!!','pos')
]

In [18]:
# Classifier
from textblob import classifiers
classifier = classifiers.NaiveBayesClassifier(training)

In [19]:
## decision tree classifier
dt_classifier = classifiers.DecisionTreeClassifier(training)

In [20]:
# Printing the accuracy level of the classifier and displaying the show_informative_features
print (classifier.accuracy(testing))
classifier.show_informative_features(3)

1.0
Most Informative Features
            contains(is) = True              neg : pos    =      2.9 : 1.0
             contains(a) = False             neg : pos    =      1.8 : 1.0
      contains(terrible) = False             neg : pos    =      1.8 : 1.0


In [21]:
# Predicting the class of test tuple
blob = TextBlob('the weather is terrible!', classifier=classifier)
print (blob.classify())

neg


In [22]:
blob = TextBlob('this movie is interesting !', classifier=classifier)
print (blob.classify())

neg
