# Loading the book

In [1]:
with open("miracle-in-the-andes.txt", "r", encoding="UTF-8") as file:
    book = file.read()

# The most used words (non-article)

In [3]:
import re
pattern = re.compile("[a-zA-Z]+")
findings = re.findall(pattern, book.lower())
findings[:5]

['chapter', 'before', 'it', 'was', 'friday']

In [4]:
d = {}
for word in findings:
    if word in d.keys():
        d[word] = d[word] + 1
    else:
        d[word] = 1

In [6]:
d_list = [(value, key) for (key, value) in d.items()]
d_list = sorted(d_list,reverse=True)
d_list[:5]

[(5346, 'the'), (2795, 'and'), (2729, 'i'), (2400, 'to'), (2060, 'of')]

In [13]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
english_stopwords = stopwords.words("english")
english_stopwords[:5]

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hassa\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['i', 'me', 'my', 'myself', 'we']

In [14]:
filtered_words = []
for count, word in d_list:
    if word not in english_stopwords:
        filtered_words.append((word, count))

In [15]:
filtered_words[:5]

[('would', 575), ('us', 519), ('said', 292), ('roberto', 284), ('could', 252)]

# Sentiment Analysis: What is the most positive and negative chapters?

In [16]:
from nltk.sentiment import SentimentIntensityAnalyzer

### An example

In [19]:
nltk.download('vader_lexicon')
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\hassa\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [28]:
scores = analyzer.polarity_scores("i gift them")

In [29]:
if scores["pos"] > scores["neg"]:
    print("it is a positive text")
else:
    print("It is a negative text")

it is a positive text


In [31]:
analyzer.polarity_scores(book)

{'neg': 0.116, 'neu': 0.76, 'pos': 0.125, 'compound': 1.0}

### Chapters sentiment analysis

In [32]:
import re
pattern = re.compile("Chapter [0-9]+")
chapters = re.split(pattern, book)

In [35]:
for chapter in chapters:
    scores = analyzer.polarity_scores(chapter)
    print(scores)

{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.061, 'neu': 0.779, 'pos': 0.16, 'compound': 1.0}
{'neg': 0.12, 'neu': 0.726, 'pos': 0.154, 'compound': 0.9991}
{'neg': 0.145, 'neu': 0.751, 'pos': 0.105, 'compound': -0.9999}
{'neg': 0.141, 'neu': 0.721, 'pos': 0.138, 'compound': -0.9963}
{'neg': 0.118, 'neu': 0.742, 'pos': 0.141, 'compound': 0.9997}
{'neg': 0.124, 'neu': 0.761, 'pos': 0.115, 'compound': -0.9979}
{'neg': 0.136, 'neu': 0.761, 'pos': 0.103, 'compound': -0.9999}
{'neg': 0.12, 'neu': 0.786, 'pos': 0.094, 'compound': -0.9998}
{'neg': 0.097, 'neu': 0.824, 'pos': 0.079, 'compound': -0.9996}
{'neg': 0.086, 'neu': 0.733, 'pos': 0.181, 'compound': 1.0}
