In [95]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from collections import Counter
import re

In [96]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [97]:
story = """
In the heart of a bustling city, amidst the cacophony of urban life, there stood an old, dilapidated building.
Its walls were adorned with remnants of grandeur, once a symbol of prestige and culture. However, time had been unkind,
leaving behind only echoes of its former glory. Yet, hidden within its decaying walls lay a secret that few knew of—a forgotten
symphony waiting to be heard.

Deep within the building, tucked away from the prying eyes of the world, lived an old man named Gregory. He was the caretaker of
this forgotten relic, the last guardian of its lost beauty. With each passing day, he tended to the building's needs, preserving
what little remained of its grand past.

Gregory was a man of few words, his life a silent melody woven with the threads of solitude. His only companion was an ancient
piano, its keys worn with age but still whispering tales of forgotten melodies. It was here, in the quiet embrace of this forgotten
sanctuary, that Gregory found solace.

One evening, as the sun dipped below the horizon, casting an amber glow upon the city, Gregory sat by the piano lost in thought.
His fingers traced the keys, evoking a haunting melody that reverberated through the empty halls. It was a melody of longing, of
dreams left unfulfilled, and of memories etched in the walls.

Unbeknownst to Gregory, his music had not gone unheard. Across the street, in a tiny apartment nestled amidst the chaos of the
city, lived a young girl named Lily. She was drawn to the melancholic strains that drifted through her window, captivating her
restless soul.

Night after night, Lily would listen to Gregory's melodies, each note stirring something deep within her. They spoke of a world
beyond the mundane, of passions unspoken, and of a longing for connection. Intrigued by the music's allure, Lily made it her
mission to uncover its source.

With determination in her heart, Lily ventured into the abandoned building, guided by the ethereal strains of Gregory's piano.
The once-grand halls now echoed with the whispers of time, their faded elegance a testament to the passage of years. Yet, amidst
the decay, Lily found beauty in the forgotten, a beauty that spoke to her soul.

At last, she reached the room where Gregory sat, his fingers dancing across the keys with a fervor born of years of solitude.
He looked up, startled by her presence, yet there was a glimmer of recognition in his weary eyes.

"Who are you?" Gregory asked, his voice a mere whisper in the silence.

"I'm Lily," she replied, her voice tinged with excitement. "I've been listening to your music. It's beautiful."

A flicker of emotion passed across Gregory's face, a long-forgotten spark reignited by Lily's words. For the first time in
years, he felt a connection, a bond forged through the language of music.

Together, Gregory and Lily breathed life into the forgotten symphony that lay dormant within the old building's walls. Their
melodies intertwined, weaving a tapestry of sound that transcended time and space. With each note, they breathed new life into
the forgotten, transforming decay into beauty, and solitude into companionship.

And so, in the heart of the bustling city, amidst the chaos of urban life, a forgotten symphony found its voice once more,
echoing through the night as a testament to the power of music to unite souls across the divides of time and space.
"""

In [98]:
# Tokenization
tokens = word_tokenize(story)

print("Tokenization:", tokens[:10])

Tokenization: ['In', 'the', 'heart', 'of', 'a', 'bustling', 'city', ',', 'amidst', 'the']


In [99]:
# Removing stopwords
stop_words = (stopwords.words('english'))
stop_words.extend ([',', '.'])
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

print("Filtered Tokens (Stopwords Removed):", filtered_tokens[:10])

Filtered Tokens (Stopwords Removed): ['heart', 'bustling', 'city', 'amidst', 'cacophony', 'urban', 'life', 'stood', 'old', 'dilapidated']


In [100]:
# Stemming
porter = PorterStemmer()
stemmed_tokens = [porter.stem(word) for word in filtered_tokens]

print("Stemmed Tokens:", stemmed_tokens[:10])

Stemmed Tokens: ['heart', 'bustl', 'citi', 'amidst', 'cacophoni', 'urban', 'life', 'stood', 'old', 'dilapid']


In [101]:
# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]

print("Lemmatized Tokens:", lemmatized_tokens[:10])

Lemmatized Tokens: ['heart', 'bustling', 'city', 'amidst', 'cacophony', 'urban', 'life', 'stood', 'old', 'dilapidated']


In [102]:
# Frequency distribution
frequency_distribution = Counter(lemmatized_tokens)

print("Frequency Distribution:", frequency_distribution.most_common(10))

Frequency Distribution: [('Gregory', 11), ('forgotten', 8), ("'s", 8), ('Lily', 8), ('melody', 6), ('life', 5), ('building', 5), ('time', 5), ('music', 5), ('city', 4)]


In [103]:
# POS tagging
tagged_tokens = nltk.pos_tag(filtered_tokens)

In [106]:
# Function to extract main character using POS tags
def extract_main_character(tagged_tokens):
    character_names = ["Gregory", "Lily"]
    character_counts = Counter([word for word, tag in tagged_tokens if tag.startswith('NNP') and word in character_names])
    main_character = character_counts.most_common(1)[0][0] if character_counts else None
    return main_character

main_character = extract_main_character(tagged_tokens)

print("Main Character:", main_character)

Main Character: Gregory


In [105]:
# Get main character
character_names = ["Gregory", "Lily"]
character_counts = Counter([word for word in lemmatized_tokens if word in character_names])

main_character = character_counts.most_common(1)[0][0] if character_counts else None

print("Main Character:", main_character)

Main Character: Gregory
