In [1]:
# Step 1: Import libraries
import pandas as pd

# Step 2: Load the dataset
# Make sure train.txt is in your notebook folder
train_data = pd.read_csv("train.txt", sep=";", names=["text", "emotion"])

# Step 3: Quick look at the data
print("First 5 rows of the dataset:")
print(train_data.head())

print("\nCount of each emotion:")
print(train_data['emotion'].value_counts())

# Step 4: Basic cleaning (optional for small dataset)
train_data = train_data.dropna()          # Remove missing values
train_data['text'] = train_data['text'].str.lower()   # Lowercase all text

print("\nDataset after cleaning:")
print(train_data.head())

First 5 rows of the dataset:
                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger

Count of each emotion:
emotion
joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: count, dtype: int64

Dataset after cleaning:
                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger


In [2]:
# Step 2: Import required libraries
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

# Step 2a: Split data into features (X) and labels (y)
X = train_data['text']       # Text column
y = train_data['emotion']    # Emotion labels

# Step 2b: Split into training and validation set (optional small validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

# Step 2c: Convert text into numbers using CountVectorizer
vectorizer = CountVectorizer()       # Bag-of-words representation
X_train_vect = vectorizer.fit_transform(X_train)
X_val_vect = vectorizer.transform(X_val)

# Step 2d: Check shapes
print("Shape of training data:", X_train_vect.shape)
print("Shape of validation data:", X_val_vect.shape)

Shape of training data: (14400, 14319)
Shape of validation data: (1600, 14319)


In [6]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

model = MultinomialNB()
model.fit(X_train_vect, y_train)
y_pred = model.predict(X_val_vect)

print("Accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

Accuracy: 0.75
              precision    recall  f1-score   support

       anger       0.91      0.59      0.72       217
        fear       0.83      0.53      0.65       174
         joy       0.69      0.96      0.80       503
        love       0.89      0.21      0.35       159
     sadness       0.76      0.94      0.84       494
    surprise       1.00      0.02      0.04        53

    accuracy                           0.75      1600
   macro avg       0.85      0.54      0.56      1600
weighted avg       0.79      0.75      0.71      1600



In [7]:
# Step 4: Input Box + Auto Save Predictions
# ===============================

import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import warnings
from fpdf import FPDF
import os

warnings.filterwarnings('ignore')

# Output area
output = widgets.Output()

# Storage file path
csv_filename = "emotion_predictions.csv"
pdf_filename = "emotion_predictions.pdf"

# Initialize results list
results = []

# Load previous data if exists
if os.path.exists(csv_filename):
    old_data = pd.read_csv(csv_filename)
    results = old_data.to_dict(orient='records')

# ===============================
# Prediction Function (uses model + vectorizer)
# ===============================
def predict_emotion(text):
    text = [text.lower()]
    text_vect = vectorizer.transform(text)
    prediction = model.predict(text_vect)
    return prediction[0]

# ===============================
# On Predict Button Click
# ===============================
def on_button_click(b):
    sentence = text_area.value.strip()
    if sentence == "":
        return

    emotion = predict_emotion(sentence)
    results.append({"Sentence": sentence, "Predicted Emotion": emotion})

    # Convert to DataFrame
    df_results = pd.DataFrame(results)

    # Auto-save to CSV
    df_results.to_csv(csv_filename, index=False)

    # Also auto-save to PDF (overwrite for simplicity)
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt="Emotion Predictions", ln=True, align='C')
    pdf.ln(10)

    for idx, row in df_results.iterrows():
        text_line = f"Sentence: {row['Sentence']}\nPredicted Emotion: {row['Predicted Emotion']}\n"
        pdf.multi_cell(0, 10, txt=text_line)
        pdf.ln(2)

    pdf.output(pdf_filename)

    # Display DataFrame output
    with output:
        output.clear_output(wait=True)
        display(df_results)
        print(f"✅ Prediction saved automatically to {csv_filename} and {pdf_filename}")

# ===============================
# Widgets
# ===============================
text_area = widgets.Textarea(
    value='',
    placeholder='Type your sentence here',
    description='Input:',
    layout=widgets.Layout(width='70%', height='100px')
)

predict_button = widgets.Button(
    description="Predict Emotion",
    button_style='success',
    tooltip="Click to predict and auto-save result"
)
predict_button.on_click(on_button_click)

# ===============================
# Display
# ===============================
display(text_area, predict_button, output)

Textarea(value='', description='Input:', layout=Layout(height='100px', width='70%'), placeholder='Type your se…

Button(button_style='success', description='Predict Emotion', style=ButtonStyle(), tooltip='Click to predict a…

Output()