# Load the Book

In [3]:
with open("miracle_in_the_andes.txt", "r") as file:
    book = file.read()

# Find the most used words in the book (non-articles)

In [6]:
import re
pattern = re.compile("[a-zA-Z]+")
findings = re.findall(pattern, book.lower())
findings[:5]

['chapter', 'before', 'it', 'was', 'friday']

In [8]:
dictionary = {}
for word in findings:
    if word in dictionary.keys():
        dictionary[word] = dictionary[word] + 1
    else:
        dictionary[word] = 1

In [13]:
dictionary_list = [(value, key) for (key, value) in dictionary.items()]
d_list = sorted(dictionary_list, reverse=True)
d_list[:10]

[(5346, 'the'),
 (2795, 'and'),
 (2729, 'i'),
 (2400, 'to'),
 (2060, 'of'),
 (1566, 'a'),
 (1430, 'was'),
 (1419, 'in'),
 (1226, 'we'),
 (1169, 'my')]

In [18]:
!pip3.13 install nltk



In [21]:
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords
english_stopwords = stopwords.words("english")

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/mahmudurmahid/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [26]:
filtered_words = []

for count, word in d_list:
    if word not in english_stopwords:
        filtered_words.append((word, count))

In [28]:
filtered_words[:10]

[('would', 575),
 ('us', 519),
 ('said', 292),
 ('roberto', 284),
 ('could', 252),
 ('one', 249),
 ('snow', 227),
 ('mountain', 183),
 ('time', 182),
 ('like', 165)]

# Sentiment analysis: what is the most positive and negative chapter?

### Example of SentimentIntensityAnalyzer

In [36]:
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/mahmudurmahid/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [38]:
analyzer = SentimentIntensityAnalyzer()

In [59]:
scores = analyzer.polarity_scores("That yacht is huge")

In [60]:
if scores["pos"] > scores["neg"]:
    print("This is a positive text")
else:
    print("This is a negative text")

This is a positive text


In [62]:
analyzer.polarity_scores(book)

{'neg': 0.116, 'neu': 0.76, 'pos': 0.125, 'compound': 1.0}

### Chapter sentiment analysis

In [64]:
import re
pattern = re.compile("Chapter [0-9]+")
chapters = re.split(pattern, book)

In [69]:
chapters = chapters[1:]

In [72]:
for chapter in chapters:
    scores = analyzer.polarity_scores(chapter)
    print(scores)

{'neg': 0.141, 'neu': 0.721, 'pos': 0.138, 'compound': -0.9963}
{'neg': 0.118, 'neu': 0.742, 'pos': 0.141, 'compound': 0.9997}
{'neg': 0.124, 'neu': 0.761, 'pos': 0.115, 'compound': -0.9979}
{'neg': 0.136, 'neu': 0.761, 'pos': 0.103, 'compound': -0.9999}
{'neg': 0.12, 'neu': 0.786, 'pos': 0.094, 'compound': -0.9998}
{'neg': 0.097, 'neu': 0.824, 'pos': 0.079, 'compound': -0.9996}
{'neg': 0.086, 'neu': 0.733, 'pos': 0.181, 'compound': 1.0}


In [76]:
for number, chapter in enumerate(chapters):
    scores = analyzer.polarity_scores(chapter)
    print(number, scores)

0 {'neg': 0.141, 'neu': 0.721, 'pos': 0.138, 'compound': -0.9963}
1 {'neg': 0.118, 'neu': 0.742, 'pos': 0.141, 'compound': 0.9997}
2 {'neg': 0.124, 'neu': 0.761, 'pos': 0.115, 'compound': -0.9979}
3 {'neg': 0.136, 'neu': 0.761, 'pos': 0.103, 'compound': -0.9999}
4 {'neg': 0.12, 'neu': 0.786, 'pos': 0.094, 'compound': -0.9998}
5 {'neg': 0.097, 'neu': 0.824, 'pos': 0.079, 'compound': -0.9996}
6 {'neg': 0.086, 'neu': 0.733, 'pos': 0.181, 'compound': 1.0}
