In [1]:
# Chapter 12 (Natural Language Processing)

In [2]:
import textblob
import nltk
from textblob import TextBlob

In [3]:
text = 'Today is a good day. Then tomorrow is likely to be bad day.'
blob = TextBlob(text)

In [4]:
blob.sentences

[Sentence("Today is a good day."),
 Sentence("Then tomorrow is likely to be bad day.")]

In [5]:
blob.words

WordList(['Today', 'is', 'a', 'good', 'day', 'Then', 'tomorrow', 'is', 'likely', 'to', 'be', 'bad', 'day'])

In [6]:
blob.tokens

WordList(['Today', 'is', 'a', 'good', 'day', '.', 'Then', 'tomorrow', 'is', 'likely', 'to', 'be', 'bad', 'day', '.'])

In [7]:
blob.tags

[('Today', 'NN'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('good', 'JJ'),
 ('day', 'NN'),
 ('Then', 'RB'),
 ('tomorrow', 'NN'),
 ('is', 'VBZ'),
 ('likely', 'JJ'),
 ('to', 'TO'),
 ('be', 'VB'),
 ('bad', 'JJ'),
 ('day', 'NN')]

In [8]:
blob.noun_phrases

WordList(['good day', 'bad day'])

In [9]:
blob.sentiment

Sentiment(polarity=3.700743415417188e-17, subjectivity=0.7555555555555555)

In [10]:
for sentence in blob.sentences:
    print(sentence.sentiment)

Sentiment(polarity=0.7, subjectivity=0.6000000000000001)
Sentiment(polarity=-0.3499999999999999, subjectivity=0.8333333333333333)


In [11]:
from textblob.sentiments import NaiveBayesAnalyzer

In [12]:
blob2 = TextBlob(text, analyzer = NaiveBayesAnalyzer())
blob2.sentiment

Sentiment(classification='pos', p_pos=0.6386587215262682, p_neg=0.3613412784737319)

In [13]:
for sentence in blob2.sentences:
    print(sentence.sentiment)

Sentiment(classification='pos', p_pos=0.7265237431528468, p_neg=0.2734762568471531)
Sentiment(classification='neg', p_pos=0.4518237741969971, p_neg=0.5481762258030025)


In [14]:
from textblob import Word

In [15]:
w = Word('similarities')
w.singularize()

'similarity'

In [16]:
w = Word('cacti')
w.singularize()

'cactus'

In [33]:
w = Word('cactiK')
w.spellcheck()

[('tactic', 0.3333333333333333),
 ('lactic', 0.3333333333333333),
 ('cactus', 0.3333333333333333)]

In [35]:
w = Word('new')
w.spellcheck()

[('new', 1.0)]

In [19]:
w.correct()

'new'

In [20]:
w = Word('varieties')
w.stem()

'varieti'

In [21]:
w.lemmatize()

'variety'

In [22]:
from pathlib import Path

In [23]:
blob = TextBlob(Path('./RomeoAndJuliet.txt').read_text(encoding='utf-8'))

In [24]:
blob.word_counts['romeo']

315

In [25]:
happy = Word('happy')
happy.definitions

['enjoying or showing or marked by joy or pleasure',
 'marked by good fortune',
 'eagerly disposed to act or to be of service',
 'well expressed and to the point']

In [26]:
happy.synsets

[Synset('happy.a.01'),
 Synset('felicitous.s.02'),
 Synset('glad.s.02'),
 Synset('happy.s.04')]

In [27]:
synonyms = set()
for synset in happy.synsets:
    for lemma in synset.lemmas():
        synonyms.add(lemma.name())

synonyms

{'felicitous', 'glad', 'happy', 'well-chosen'}

In [28]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/student/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [29]:
from nltk.corpus import stopwords

In [30]:
stops = stopwords.words('english')
stops

['a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 "he'd",
 "he'll",
 'her',
 'here',
 'hers',
 'herself',
 "he's",
 'him',
 'himself',
 'his',
 'how',
 'i',
 "i'd",
 'if',
 "i'll",
 "i'm",
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it'd",
 "it'll",
 "it's",
 'its',
 'itself',
 "i've",
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'on