In [None]:
import pandas as pd
import re

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

#### Initial settings and parameters for Pandas, SciKit-learn etc.

In [None]:
pd.set_option('display.max_rows', None)  
pd.set_option('display.max_columns', None)

# --- One-time setup: Download necessary NLTK data ---
print("Paths for NLTK:")
print(nltk.data.path)

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

#### Reading of .CSV and working with Sentimement

In [None]:
commentaries = pd.read_csv('sentimentdataset.csv')
commentaries = commentaries[['Text', 'Sentiment']]
print(commentaries.head())


In [None]:
sentiment_counts = commentaries[['Sentiment']].value_counts().reset_index()
sentiment_counts.columns = ['Sentiment', 'Count']

print(sentiment_counts)

In [None]:
mapping = {
    'Positive': 'Positive',
    'Joy': 'Positive',
    'Excitement': 'Positive',
    'Neutral': 'Neutral',
    'Happy': 'Positive',
    'Contentment': 'Positive',
    'Sad': 'Negative',
    'Hopeful': 'Positive',
    'Gratitude': 'Positive',
    'Curiosity': 'Neutral',
    'Embarrassed': 'Negative',
    'Loneliness': 'Negative',
    'Hate': 'Negative',
    'Bad': 'Negative',
    'Playful': 'Positive',
    'Despair': 'Negative',
    'Elation': 'Positive',
    'Confusion': 'Neutral',
    'Acceptance': 'Neutral',
    'Inspired': 'Positive',
    'Frustrated': 'Negative',
    'Bitterness': 'Negative',
    'Indifference': 'Neutral',
    'Nostalgia': 'Neutral',
    'Serenity': 'Positive',
    'Ambivalence': 'Neutral',
    'Numbness': 'Neutral',
    'Determination': 'Positive',
    'Enthusiasm': 'Positive',
    'Empowerment': 'Positive',
    'Melancholy': 'Negative',
    'Proud': 'Positive',
    'Betrayal': 'Negative',
    'Arousal': 'Positive',
    'Grateful': 'Positive',
    'Negative': 'Negative',
    'Euphoria': 'Positive',
    'Hope': 'Positive',
    'Tenderness': 'Positive',
    'Desolation': 'Negative',
    'Compassionate': 'Positive',
    'Inspiration': 'Positive',
    'Frustration': 'Negative',
    'Grief': 'Negative',
    'Awe': 'Positive',
    'Empathetic': 'Positive',
    'Accomplishment': 'Positive',
    'Pride': 'Positive',
    'Free-spirited': 'Positive',
    'Envious': 'Negative',
    'Boredom': 'Negative',
    'Overwhelmed': 'Negative',
    'Dismissive': 'Negative',
    'Devastated': 'Negative',
    'Calmness': 'Positive',
    'Surprise': 'Neutral',
    'Fearful': 'Negative',
    'Adventure': 'Positive',
    'Resentment': 'Negative',
    'Regret': 'Negative',
    'Bitter': 'Negative',
    'Confident': 'Positive',
    'Kind': 'Positive',
    'Jealous': 'Negative',
    'Zest': 'Positive',
    'Fear': 'Negative',
    'Love': 'Positive',
    'Envy': 'Negative',
    'Enjoyment': 'Positive',
    'Enchantment': 'Positive',
    'Mischievous': 'Positive',
    'Yearning': 'Neutral',
    'Whimsy': 'Positive',
    'Tranquility': 'Positive',
    'Thrill': 'Positive',
    'Adoration': 'Positive',
    'Affection': 'Positive',
    'Disgust': 'Negative',
    'Disappointed': 'Negative',
    'Disappointment': 'Negative',
    'Admiration': 'Positive',
    'Exploration': 'Positive',
    'Heartbreak': 'Negative',
    'Happiness': 'Positive',
    'Fulfillment': 'Positive',
    'Isolation': 'Negative',
    'Creativity': 'Positive',
    'Contemplation': 'Neutral',
    'Coziness': 'Positive',
    'Reflection': 'Neutral',
    'Shame': 'Negative',
    'Captivation': 'Positive',
    'Emotion': 'Neutral',
    'Amusement': 'Positive',
    'Rejuvenation': 'Positive',
    'Reverence': 'Positive',
    'Satisfaction': 'Positive',
    'Sadness': 'Negative',
    'Anger': 'Negative',
    'Anticipation': 'Positive',
    'Apprehensive': 'Negative',
    'Anxiety': 'Negative',
    'Appreciation': 'Positive',
    'Resilience': 'Positive',
    'Romance': 'Positive',
    'Ruins': 'Negative',
    'Runway Creativity': 'Positive',
    'Relief': 'Positive',
    'Renewed Effort': 'Positive',
    'Solace': 'Positive',
    'Solitude': 'Neutral',
    'Sorrow': 'Negative',
    'Spark': 'Positive',
    'Success': 'Positive',
    'Confidence': 'Positive',
    'Connection': 'Positive',
    'Celebration': 'Positive',
    'Celestial Wonder': 'Positive',
    'Challenge': 'Neutral',
    'Charm': 'Positive',
    'Colorful': 'Positive',
    'Bittersweet': 'Neutral',
    'Blessed': 'Positive',
    'Breakthrough': 'Positive',
    'Ecstasy': 'Positive',
    'Elegance': 'Positive',
    'EmotionalStorm': 'Negative',
    'Heartache': 'Negative',
    'Heartwarming': 'Positive',
    'Helplessness': 'Negative',
    'Harmony': 'Positive',
    'Freedom': 'Positive',
    'Friendship': 'Positive',
    'Grandeur': 'Positive',
    'JoyfulReunion': 'Positive',
    'Kindness': 'Positive',
    'LostLove': 'Negative',
    'Journey': 'Neutral',
    'Joy in Baking': 'Positive',
    'InnerJourney': 'Neutral',
    'Intimidation': 'Negative',
    'Intrigue': 'Positive',
    'Creative Inspiration': 'Positive',
    'Culinary Adventure': 'Positive',
    'Exhaustion': 'Negative',
    'FestiveJoy': 'Positive',
    'Envisioning History': 'Neutral',
    'Energy': 'Positive',
    'Engagement': 'Positive',
    'Hypnotic': 'Positive',
    'Iconic': 'Positive',
    'Imagination': 'Positive',
    'Immersion': 'Positive',
    'Overjoyed': 'Positive',
    'Pensive': 'Neutral',
    'PlayfulJoy': 'Positive',
    'Obstacle': 'Negative',
    "Ocean's Freedom": 'Positive',
    'Optimism': 'Positive',
    'Miscalculation': 'Negative',
    'Motivation': 'Positive',
    "Nature's Beauty": 'Positive',
    'Positivity': 'Positive',
    'Pressure': 'Negative',
    'Radiance': 'Positive',
    'DreamChaser': 'Positive',
    'Desperation': 'Negative',
    'CulinaryOdyssey': 'Positive',
    'Darkness': 'Negative',
    'Dazzle': 'Positive',
    'Marvel': 'Positive',
    'Melodic': 'Positive',
    'Mesmerizing': 'Positive',
    'Mindfulness': 'Positive',
    'Whispers of the Past': 'Neutral',
    'Winter Magic': 'Positive',
    'Wonder': 'Positive',
    'Wonderment': 'Positive',
    'Thrilling Journey': 'Positive',
    'Touched': 'Positive',
    'Triumph': 'Positive',
    'Vibrancy': 'Positive',
    'Suffering': 'Negative',
    'Suspense': 'Neutral',
    'Sympathy': 'Positive',
    'ArtisticBurst': 'Positive',
    'Amazement': 'Positive',
    'Adrenaline': 'Positive',
    'Admiration': 'Positive',
    'Betrayal': 'Negative',
    'Jealousy': 'Negative',
    'Loss': 'Negative',
    'Compassion': 'Positive'
}
commentaries['Sentiment'] = commentaries['Sentiment'].str.strip()
commentaries['Sentiment'] = commentaries['Sentiment'].str.lower()
mapping_lower = {k.lower().strip(): v for k, v in mapping.items()}

commentaries['Sentiment'] = commentaries['Sentiment'].replace(mapping_lower)

commentaries.to_csv('./sentimentdataset_sentimentsChanged.csv')
print("Unique values after replacement:")
print(commentaries['Sentiment'].unique())



#### Text Preprocessing

In [None]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_with_nltk(text: str) -> str:
    """
    Cleans and normalizes a string:
      - lowercases
      - strips URLs
      - removes non-alpha chars
      - tokenizes
      - removes stopwords & short tokens
      - lemmatizes
      - rejoins tokens to a cleaned string
    """
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'[^a-z\s]', '', text)

    tokens = word_tokenize(text)
    # Removing stopwords and short tokens -> then lemmatize
    clean_tokens = [
        lemmatizer.lemmatize(tok)
        for tok in tokens
        if tok not in stop_words and len(tok) > 2
    ]
    
    return ' '.join(clean_tokens)


#### Training of the model MultinomialNB

In [None]:
commentaries['clean_text'] = commentaries['Text'].astype(str).apply(preprocess_with_nltk)

commentaries.to_csv('./preprocessed_text_and_sentiments.csv')

X = commentaries['clean_text']
y = commentaries['Sentiment']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.20,
    random_state=42,
    stratify=y
)

In [None]:
vect = TfidfVectorizer(lowercase=True, stop_words='english', max_df=0.80, min_df=4)

X_train_vec = vect.fit_transform(X_train)
X_test_vec  = vect.transform(X_test)

In [None]:
clf = MultinomialNB()
clf.fit(X_train_vec, y_train)

In [None]:
y_pred = clf.predict(X_test_vec)

print('Test Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
def predictNextTextSentiment(text: str) -> str:
    vec = vect.transform([text])
    return clf.predict(vec)[0]

examples = [
    "I absolutely loved this product!",
    "Worst experience ever. Do not buy.",
    "It was okay, not great but not bad.",
    "If you would like to die, you have really big problems...",
    "Meow, i am cat girl!",
    "Bad product, dislike. I wouldn't buy this shit again, guys : (",
    "Cool product. I love it. Thanks for developers"
]

for example in examples:
    print(predictNextTextSentiment(example))