In [14]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import emoji

In [15]:
data = {
    'text': [
        "I am so happy today!", "I am feeling sad", "This is amazing!",
        "I hate everything!", "I am excited for the trip!", "I am crying",
        "I am so angry right now!", "I love this!", "This is frustrating!",
        "I am feeling joyful!"
    ],
    'emotion': [
        "happy", "sad", "happy", "angry", "happy", "sad",
        "angry", "happy", "angry", "happy"
    ]
}

df = pd.DataFrame(data)
print(df.head())

                         text emotion
0        I am so happy today!   happy
1            I am feeling sad     sad
2            This is amazing!   happy
3          I hate everything!   angry
4  I am excited for the trip!   happy


In [16]:
emoji_dict = {
    "happy": "😊",
    "sad": "😢",
    "angry": "😡"
}

df['emoji'] = df['emotion'].map(emoji_dict)
print(df[['text', 'emoji']])

                         text emoji
0        I am so happy today!     😊
1            I am feeling sad     😢
2            This is amazing!     😊
3          I hate everything!     😡
4  I am excited for the trip!     😊
5                 I am crying     😢
6    I am so angry right now!     😡
7                I love this!     😊
8        This is frustrating!     😡
9        I am feeling joyful!     😊


In [17]:
nltk.download('stopwords')
stop_words = set(stopwords.words("english"))

def clean_text(text):
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
    text = re.sub(r'\@w+|\#', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

df['clean_text'] = df['text'].apply(clean_text)
print(df[['text', 'clean_text']])

                         text       clean_text
0        I am so happy today!      happy today
1            I am feeling sad      feeling sad
2            This is amazing!          amazing
3          I hate everything!  hate everything
4  I am excited for the trip!     excited trip
5                 I am crying           crying
6    I am so angry right now!      angry right
7                I love this!             love
8        This is frustrating!      frustrating
9        I am feeling joyful!   feeling joyful


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [18]:
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(df['clean_text']).toarray()
y = df['emotion']

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)

In [21]:
print("Naïve Bayes Accuracy:", accuracy_score(y_test, nb_predictions))
print(classification_report(y_test, nb_predictions))

Naïve Bayes Accuracy: 0.0
              precision    recall  f1-score   support

       angry       0.00      0.00      0.00       1.0
       happy       0.00      0.00      0.00       0.0
         sad       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
def predict_emoji(text):
    text = clean_text(text)
    text_vector = vectorizer.transform([text]).toarray()
    prediction = nb_model.predict(text_vector)[0]
    return emoji_dict[prediction]

user_text = input("Enter a message: ")
print("Predicted Emoji:", predict_emoji(user_text))

Enter a message: I am happy today
Predicted Emoji: 😊
