# Loading the book

In [1]:
with open("miracle_in_the_andes.txt", "r") as file:
    book = file.read()

# The most used words (non-articles)

In [2]:
import re
pattern = re.compile("[a-zA-Z]+")
findings = re.findall(pattern, book.lower())


In [3]:
dict1 = {}
for word in findings:
    if word in dict1.keys():
        dict1[word] = dict1[word] + 1
    else:
        dict1[word] = 1
dict1_list = [(values, key) for (key, values) in dict1.items()]
dict1_list = sorted(dict1_list, reverse=True)
dict1_list[:5]

[(5346, 'the'), (2795, 'and'), (2729, 'i'), (2400, 'to'), (2060, 'of')]

### To know the python version

In [4]:
from platform import python_version
python_version()

'3.12.3'

### Install nltk library

In [5]:
!pip3.12 install nltk



In [6]:
import nltk
nltk.download("stopwords")
from nltk.corpus import stopwords
english_stopwords = stopwords.words("english")

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/doit_kgh/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [11]:
filtered_list = []
for count, word in dict1_list:
    if word not in english_stopwords:
        filtered_list.append((word, count))

In [12]:
filtered_list[:10]

[('would', 575),
 ('us', 519),
 ('said', 292),
 ('roberto', 284),
 ('could', 252),
 ('one', 249),
 ('snow', 227),
 ('mountain', 183),
 ('time', 182),
 ('like', 165)]

### Sentiment Analysis: What is the most positive and most negative chapter?

#### Analyzing the complete book

In [18]:
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download("vader_lexicon")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/doit_kgh/nltk_data...


True

In [19]:
analyzer = SentimentIntensityAnalyzer()
analyzer

<nltk.sentiment.vader.SentimentIntensityAnalyzer at 0x7d285866b8c0>

In [34]:
scores = analyzer.polarity_scores(book)

In [35]:
if scores["pos"] > scores["neg"]:
    print("It is positive text")
else:
    print("It is negative text")

It is positive text


#### Analyzing a chapter of the book

In [44]:
import re
pattern = re.compile("Chapter [0-9]+")
chapters = re.split(pattern, book)

chapters = chapters[1:]

In [53]:
scores_dict = {}
for index, chapter in enumerate(chapters, start=1):
    
    scores = analyzer.polarity_scores(chapter)
    if scores["pos"] > scores["neg"]:
        scores[index] = "Positive text"
    else:
        scores[index] = "Negative text"

In [52]:
scores_dict

{1: {'neg': 0.061, 'neu': 0.779, 'pos': 0.16, 'compound': 1.0},
 2: {'neg': 0.12, 'neu': 0.726, 'pos': 0.154, 'compound': 0.9991},
 3: {'neg': 0.145, 'neu': 0.751, 'pos': 0.105, 'compound': -0.9999},
 4: {'neg': 0.141, 'neu': 0.721, 'pos': 0.138, 'compound': -0.9963},
 5: {'neg': 0.118, 'neu': 0.742, 'pos': 0.141, 'compound': 0.9997},
 6: {'neg': 0.124, 'neu': 0.761, 'pos': 0.115, 'compound': -0.9979},
 7: {'neg': 0.136, 'neu': 0.761, 'pos': 0.103, 'compound': -0.9999},
 8: {'neg': 0.12, 'neu': 0.786, 'pos': 0.094, 'compound': -0.9998},
 9: {'neg': 0.097, 'neu': 0.824, 'pos': 0.079, 'compound': -0.9996},
 10: {'neg': 0.086, 'neu': 0.733, 'pos': 0.181, 'compound': 1.0}}