In [1]:
# Chapter 12 (Natural Language Processing)

In [2]:
import textblob
import nltk
from textblob import TextBlob

In [3]:
text = 'Today is a good day. Then tomorrow is likely to be bad day.'
blob = TextBlob(text)

In [4]:
blob.sentences

[Sentence("Today is a good day."),
 Sentence("Then tomorrow is likely to be bad day.")]

In [5]:
blob.words

WordList(['Today', 'is', 'a', 'good', 'day', 'Then', 'tomorrow', 'is', 'likely', 'to', 'be', 'bad', 'day'])

In [6]:
blob.tokens

WordList(['Today', 'is', 'a', 'good', 'day', '.', 'Then', 'tomorrow', 'is', 'likely', 'to', 'be', 'bad', 'day', '.'])

In [7]:
blob.tags

[('Today', 'NN'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('good', 'JJ'),
 ('day', 'NN'),
 ('Then', 'RB'),
 ('tomorrow', 'NN'),
 ('is', 'VBZ'),
 ('likely', 'JJ'),
 ('to', 'TO'),
 ('be', 'VB'),
 ('bad', 'JJ'),
 ('day', 'NN')]

In [8]:
blob.noun_phrases

WordList(['good day', 'bad day'])

In [9]:
blob.sentiment

Sentiment(polarity=3.700743415417188e-17, subjectivity=0.7555555555555555)

In [11]:
for sentence in blob.sentences:
    print(sentence.sentiment)

Sentiment(polarity=0.7, subjectivity=0.6000000000000001)
Sentiment(polarity=-0.3499999999999999, subjectivity=0.8333333333333333)


In [12]:
from textblob.sentiments import NaiveBayesAnalyzer

In [13]:
blob2 = TextBlob(text, analyzer = NaiveBayesAnalyzer())
blob2.sentiment

Sentiment(classification='pos', p_pos=0.6386587215262682, p_neg=0.3613412784737319)

In [14]:
for sentence in blob2.sentences:
    print(sentence.sentiment)

Sentiment(classification='pos', p_pos=0.7265237431528468, p_neg=0.2734762568471531)
Sentiment(classification='neg', p_pos=0.4518237741969971, p_neg=0.5481762258030025)


In [16]:
from textblob import Word

In [17]:
w = Word('similarities')
w.singularize()

'similarity'

In [18]:
w = Word('cacti')
w.singularize()

'cactus'

In [19]:
w = Word('cactiK')
w.spellcheck()

[('tactic', 0.3333333333333333),
 ('lactic', 0.3333333333333333),
 ('cactus', 0.3333333333333333)]

In [20]:
w = Word('yew')
w.spellcheck()

[('new', 0.42077831827658096),
 ('yes', 0.23905489923558026),
 ('yet', 0.1695621959694232),
 ('few', 0.15913829047949965),
 ('dew', 0.0034746351633078527),
 ('ye', 0.002779708130646282),
 ('pew', 0.002432244614315497),
 ('jew', 0.0010423905489923557),
 ('yer', 0.00034746351633078526),
 ('yep', 0.00034746351633078526),
 ('yea', 0.00034746351633078526),
 ('sew', 0.00034746351633078526),
 ('hew', 0.00034746351633078526)]

In [21]:
w.correct()

'new'

In [25]:
w = Word('varieties')
w.stem()

'varieti'

In [26]:
w.lemmatize()

'variety'

In [27]:
from pathlib import Path

In [28]:
blob = TextBlob(Path('./RomeoAndJuliet.txt').read_text(encoding='utf-8'))

In [32]:
blob.word_counts['romeo']

315

In [33]:
happy = Word('happy')
happy.definitions

['enjoying or showing or marked by joy or pleasure',
 'marked by good fortune',
 'eagerly disposed to act or to be of service',
 'well expressed and to the point']

In [34]:
happy.synsets

[Synset('happy.a.01'),
 Synset('felicitous.s.02'),
 Synset('glad.s.02'),
 Synset('happy.s.04')]

In [36]:
synonyms = set()
for synset in happy.synsets:
    for lemma in synset.lemmas():
        synonyms.add(lemma.name())

synonyms

{'felicitous', 'glad', 'happy', 'well-chosen'}

In [37]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/student/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [38]:
from nltk.corpus import stopwords

In [1]:
stops = stopwords.words('english')
stops

NameError: name 'stopwords' is not defined