In [1]:
# Step 1: Import libraries
import pandas as pd

# Step 2: Load the dataset
# Make sure train.txt is in your notebook folder
train_data = pd.read_csv("train.txt", sep=";", names=["text", "emotion"])

# Step 3: Quick look at the data
print("First 5 rows of the dataset:")
print(train_data.head())

print("\nCount of each emotion:")
print(train_data['emotion'].value_counts())

# Step 4: Basic cleaning (optional for small dataset)
train_data = train_data.dropna()          # Remove missing values
train_data['text'] = train_data['text'].str.lower()   # Lowercase all text

print("\nDataset after cleaning:")
print(train_data.head())

First 5 rows of the dataset:
                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger

Count of each emotion:
emotion
joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: count, dtype: int64

Dataset after cleaning:
                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger


In [2]:
# Step 2: Import required libraries
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

# Step 2a: Split data into features (X) and labels (y)
X = train_data['text']       # Text column
y = train_data['emotion']    # Emotion labels

# Step 2b: Split into training and validation set (optional small validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

# Step 2c: Convert text into numbers using CountVectorizer
vectorizer = CountVectorizer()       # Bag-of-words representation
X_train_vect = vectorizer.fit_transform(X_train)
X_val_vect = vectorizer.transform(X_val)

# Step 2d: Check shapes
print("Shape of training data:", X_train_vect.shape)
print("Shape of validation data:", X_val_vect.shape)

Shape of training data: (14400, 14319)
Shape of validation data: (1600, 14319)


In [3]:
# Step 3: Import ML model and metrics
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Step 3a: Initialize the model
model = LogisticRegression(max_iter=200)

# Step 3b: Train the model
model.fit(X_train_vect, y_train)

# Step 3c: Predict on validation set
y_pred = model.predict(X_val_vect)

# Step 3d: Evaluate the model
print("Accuracy on validation set:", accuracy_score(y_val, y_pred))
print("\nClassification Report:\n", classification_report(y_val, y_pred))

Accuracy on validation set: 0.895

Classification Report:
               precision    recall  f1-score   support

       anger       0.91      0.89      0.90       217
        fear       0.84      0.83      0.84       174
         joy       0.90      0.94      0.92       503
        love       0.84      0.84      0.84       159
     sadness       0.93      0.92      0.92       494
    surprise       0.82      0.70      0.76        53

    accuracy                           0.90      1600
   macro avg       0.87      0.85      0.86      1600
weighted avg       0.89      0.90      0.89      1600



In [4]:
# Step 4: Multi-line text input + store and save predictions to PDF
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import warnings
!pip install fpdf
from fpdf import FPDF  # Make sure fpdf is installed: pip install fpdf

warnings.filterwarnings('ignore')

# Create output widget
output = widgets.Output()

# List to store results
results = []

# Function to predict emotion
def predict_emotion(text):
    text = [text.lower()]
    text_vect = vectorizer.transform(text)
    prediction = model.predict(text_vect)
    return prediction[0]

# Function to handle prediction button click
def on_button_click(b):
    sentence = text_area.value.strip()
    if sentence == "":
        return  # Skip empty input
    emotion = predict_emotion(sentence)
    
    # Store result
    results.append({"Sentence": sentence, "Predicted Emotion": emotion})
    
    # Display results
    with output:
        output.clear_output(wait=True)
        df_results = pd.DataFrame(results)
        display(df_results)

# Function to save results to PDF
def save_to_pdf(b):
    if not results:
        with output:
            print("No results to save!")
        return
    
    df_results = pd.DataFrame(results)
    
    # Create PDF
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt="Emotion Predictions", ln=True, align='C')
    pdf.ln(10)
    
    # Add each row
    for idx, row in df_results.iterrows():
        text = f"Sentence: {row['Sentence']}\nPredicted Emotion: {row['Predicted Emotion']}\n"
        pdf.multi_cell(0, 10, txt=text)
        pdf.ln(2)
    
    # Save PDF
    pdf.output("emotion_predictions.pdf")
    
    with output:
        print("Saved results to emotion_predictions.pdf successfully!")

# Multi-line input box
text_area = widgets.Textarea(
    value='',
    placeholder='Type your sentence(s) here',
    description='Input:',
    layout=widgets.Layout(width='70%', height='100px')
)

# Buttons
submit_button = widgets.Button(description="Predict Emotion")
submit_button.on_click(on_button_click)

save_button = widgets.Button(description="Save to PDF")
save_button.on_click(save_to_pdf)

# Display all widgets
display(text_area, submit_button, save_button, output)



Textarea(value='', description='Input:', layout=Layout(height='100px', width='70%'), placeholder='Type your se…

Button(description='Predict Emotion', style=ButtonStyle())

Button(description='Save to PDF', style=ButtonStyle())

Output()