In [1]:
# Cell 1: Core Imports, Global Configuration, and Initializations

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import MultiLabelBinarizer
import joblib
import os
import gc
import warnings
import subprocess

# Hugging Face specific imports
from transformers import AutoTokenizer, AutoModel
from datasets import load_dataset, Dataset, concatenate_datasets
import torch

# Suppress warnings for a cleaner output
warnings.filterwarnings('ignore')

# --- Device and Model Configuration ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_checkpoint = "distilbert-base-uncased"

# --- CRITICAL: Initialize Tokenizer and Model (Done ONCE) ---
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModel.from_pretrained(model_checkpoint)
model.eval()  # Set to evaluation mode for inference
model.to(device)  # Move model to the configured device
print(f"Initialized tokenizer and model: {model_checkpoint} on {device}")

# --- Label and Path Definitions ---
goemotions_labels = [
    "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion",
    "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment",
    "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism",
    "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
]
BROAD_EMOTION_CATEGORIES = ['negative', 'neutral', 'positive']
NUM_BROAD_LABELS = len(BROAD_EMOTION_CATEGORIES)
BROAD_EMOTION_CATEGORIES_MAPPING = {'negative': 0, 'neutral': 1, 'positive': 2}

positive_indices = {i for i, label in enumerate(goemotions_labels) if label in ["admiration", "amusement", "approval", "caring", "curiosity", "desire", "excitement", "gratitude", "joy", "love", "optimism", "pride", "relief", "surprise"]}
negative_indices = {i for i, label in enumerate(goemotions_labels) if label in ["anger", "annoyance", "disappointment", "disapproval", "disgust", "embarrassment", "fear", "grief", "nervousness", "remorse", "sadness"]}
neutral_indices = {i for i, label in enumerate(goemotions_labels) if label in ["neutral", "confusion", "realization"]}

# Define paths for the final embedding checkpoints
EMBEDDINGS_TRAIN_POOL_PATH = './data/embeddings/MentalTrain'
EMBEDDINGS_TEST_PATH = './data/embeddings/MentalTest'
MODELS_DIR = './trained_models'
os.makedirs(MODELS_DIR, exist_ok=True)

Using device: cpu
Initialized tokenizer and model: distilbert-base-uncased on cpu


In [1]:
# Cell 2: Core Utility Functions (Optimized)

def tokenize_function(examples):
    """Tokenizes the 'text' column of a batch of examples."""
    return tokenizer(examples["text"], truncation=True, padding='max_length', max_length=512)

def extract_cls_embeddings(batch):
    """Extracts [CLS] token embeddings from a batch of tokenized inputs."""
    input_ids = torch.tensor(batch['input_ids']).to(device)
    attention_mask = torch.tensor(batch['attention_mask']).to(device)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        cls_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
    return {'cls_embedding': cls_embeddings}

def transform_to_broad_category_single(original_labels):
    """
    Transforms a batch of multi-hot labels into a single-dimension array representing the dominant emotion.
    PRECEDENCE RULE: Negative (0) > Positive (2) > Neutral (1).
    """
    broad_labels = np.full(original_labels.shape[0], BROAD_EMOTION_CATEGORIES_MAPPING['neutral'], dtype=int)
    has_positive = original_labels[:, list(positive_indices)].any(axis=1)
    has_negative = original_labels[:, list(negative_indices)].any(axis=1)
    broad_labels[has_positive] = BROAD_EMOTION_CATEGORIES_MAPPING['positive']
    broad_labels[has_negative] = BROAD_EMOTION_CATEGORIES_MAPPING['negative']
    return broad_labels

print("Optimized utility functions are defined.")

Optimized utility functions are defined.


In [1]:
# Cell 3: Data Ingestion and Feature Extraction Pipeline (NEW LOGIC)

# This cell now automates the download of the pre-computed embeddings from Kaggle.
# It checks if the data exists locally, and if not, uses the Kaggle CLI to download it.

# --- Configuration ---
DATASET_SLUG = "kianhutchinson/mentalheathdatabase"
LOCAL_DATA_PATH = "./data/embeddings/"

train_pool_embeddings_dataset = None
test_embeddings_dataset = None

# --- Phase 1: Check if embeddings already exist locally --- 
if os.path.exists(EMBEDDINGS_TRAIN_POOL_PATH) and os.path.exists(EMBEDDINGS_TEST_PATH):
    print(f"Phase 1: Found pre-computed embeddings locally. Loading from disk.")
    try:
        train_pool_embeddings_dataset = Dataset.load_from_disk(EMBEDDINGS_TRAIN_POOL_PATH)
        test_embeddings_dataset = Dataset.load_from_disk(EMBEDDINGS_TEST_PATH)
    except Exception as e:
        print(f"Error loading local embeddings: {e}. Attempting to re-download.")
        train_pool_embeddings_dataset = None

# --- Phase 2: If embeddings not found, download from Kaggle --- 
if train_pool_embeddings_dataset is None:
    print(f"Phase 2: Local embeddings not found. Attempting to download from Kaggle: {DATASET_SLUG}")
    os.makedirs(LOCAL_DATA_PATH, exist_ok=True)
    
    try:
        # Construct and execute the Kaggle CLI command
        command = [
            "kaggle", "datasets", "download",
            "-d", DATASET_SLUG,
            "-p", LOCAL_DATA_PATH,
            "--unzip"
        ]
        print(f"\nExecuting Kaggle command: {' '.join(command)}")
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        print("\n--- Kaggle CLI Output ---")
        print(result.stdout)
        print(f"Successfully downloaded and unzipped dataset to: {LOCAL_DATA_PATH}")
        
        # Now that it's downloaded, load it from the disk
        print("\nLoading the newly downloaded embeddings...")
        train_pool_embeddings_dataset = Dataset.load_from_disk(EMBEDDINGS_TRAIN_POOL_PATH)
        test_embeddings_dataset = Dataset.load_from_disk(EMBEDDINGS_TEST_PATH)

    except FileNotFoundError:
        print("\n--- DOWNLOAD FAILED ---")
        print("Error: The 'kaggle' command was not found. Please ensure the Kaggle library is installed ('pip install kaggle').")
    except subprocess.CalledProcessError as e:
        print("\n--- DOWNLOAD FAILED ---")
        print("The Kaggle command failed to execute.")
        print(f"Error details: {e.stderr}")
        print("Please ensure your Kaggle API token ('kaggle.json') is correctly configured.")
    except Exception as e:
        print(f"\n--- AN UNEXPECTED ERROR OCCURRED ---")
        print(f"Error details: {e}")


print("\n--- Data Ingestion Complete ---")
if train_pool_embeddings_dataset and test_embeddings_dataset:
    print("Final training pool embeddings dataset:", train_pool_embeddings_dataset)
    print("Final test embeddings dataset:", test_embeddings_dataset)
else:
    print("FATAL: Could not load or download the required embedding datasets. Please check the errors above.")

# --- Final Memory Cleanup --- 
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()


Phase 1: Found pre-computed embeddings locally. Loading from disk.



--- Data Ingestion Complete ---
Final training pool embeddings dataset: Dataset({
    features: ['labels', '__index_level_0__', 'input_ids', 'attention_mask', 'cls_embedding'],
    num_rows: 168980
})
Final test embeddings dataset: Dataset({
    features: ['labels', '__index_level_0__', 'input_ids', 'attention_mask', 'cls_embedding'],
    num_rows: 42245
})


In [1]:

# Cell 4: Data Preparation for Scikit-learn Models (MEMORY OPTIMIZED)

# This cell has been re-engineered to be more memory-efficient and prevent kernel crashes.
# It processes data sequentially and deletes large intermediate objects as soon as possible.

print("\nPreparing data for scikit-learn models...")

# --- Step 1: Train/Validation Split ---
print("Splitting the training pool into final train and validation sets...")
train_val_split = train_pool_embeddings_dataset.train_test_split(test_size=0.10, seed=42)
train_dataset = train_val_split['train']
validation_dataset = train_val_split['test']

print(f"  - Final Training Set size: {len(train_dataset)}")
print(f"  - Final Validation Set size: {len(validation_dataset)}")

# --- Step 2: Process and Convert Data Sequentially for Memory Efficiency ---

# Process Training Data
print("Processing training data...")
X_train_raw = np.array(train_dataset['cls_embedding'], dtype='float32')
Y_train_original_2D = np.array(train_dataset['labels'], dtype=int)
Y_train_broad = transform_to_broad_category_single(Y_train_original_2D)

# Process Validation Data
print("Processing validation data...")
X_val = np.array(validation_dataset['cls_embedding'], dtype='float32')
Y_val_original_2D = np.array(validation_dataset['labels'], dtype=int)
Y_val = transform_to_broad_category_single(Y_val_original_2D)

# Process Test Data
print("Processing test data...")
X_test = np.array(test_embeddings_dataset['cls_embedding'], dtype='float32')
Y_test_original_2D = np.array(test_embeddings_dataset['labels'], dtype=int)
Y_test = transform_to_broad_category_single(Y_test_original_2D)

# --- Step 3: Aggressive Memory Cleanup --- 
# Delete all large, intermediate objects before the memory-intensive oversampling step.
print("Cleaning up intermediate data objects to free memory...")
del train_pool_embeddings_dataset, test_embeddings_dataset, train_dataset, validation_dataset
del Y_train_original_2D, Y_val_original_2D, Y_test_original_2D
gc.collect()

# --- Step 4: Oversampling (Applied ONLY to the training set) ---
print("\nClass distribution of the training set before oversampling:")
print(pd.Series(Y_train_broad).value_counts())

ros = RandomOverSampler(random_state=42)
X_train, Y_train = ros.fit_resample(X_train_raw, Y_train_broad)

print("\nClass distribution of the training set after oversampling:")
print(pd.Series(Y_train).value_counts())

# --- Final Data Shapes ---
print(f"\nFinal shape of X_train (oversampled): {X_train.shape}")
print(f"Final shape of Y_train (oversampled): {Y_train.shape}")
print(f"Final shape of X_val: {X_val.shape}")
print(f"Final shape of Y_val: {Y_val.shape}")
print(f"Final shape of X_test: {X_test.shape}")
print(f"Final shape of Y_test: {Y_test.shape}")

def train_save_evaluate_model(name, model_obj, X_train_data, Y_train_data, X_val_data, Y_val_data):
    print(f"\n--- Training {name} ---")
    model_obj.fit(X_train_data, Y_train_data)
    Y_val_pred = model_obj.predict(X_val_data)
    micro_f1 = f1_score(Y_val_data, Y_val_pred, average='micro')
    filename = os.path.join(MODELS_DIR, f"{name.replace(' ', '_')}_MicroF1_{micro_f1:.4f}.pkl")
    joblib.dump(model_obj, filename)
    print(f"{name} Validation Metrics:")
    print(classification_report(Y_val_data, Y_val_pred, target_names=BROAD_EMOTION_CATEGORIES))
    print(f"Model saved to: {filename}")
    gc.collect()
    return model_obj, Y_val_pred

# Dictionaries to store trained models and their validation predictions
models = {}
val_preds = {}


Preparing data for scikit-learn models...
Splitting the training pool into final train and validation sets...
  - Final Training Set size: 152082
  - Final Validation Set size: 16898
Processing training data...


Processing validation data...
Processing test data...
Cleaning up intermediate data objects to free memory...

Class distribution of the training set before oversampling:
2    63295
1    49190
0    39597
Name: count, dtype: int64

Class distribution of the training set after oversampling:
2    63295
0    63295
1    63295
Name: count, dtype: int64

Final shape of X_train (oversampled): (189885, 768)
Final shape of Y_train (oversampled): (189885,)
Final shape of X_val: (16898, 768)
Final shape of Y_val: (16898,)
Final shape of X_test: (42245, 768)
Final shape of Y_test: (42245,)


In [1]:
# Cell 5.1: Model Training - Logistic Regression

models['Logistic Regression'], val_preds['Logistic Regression'] = train_save_evaluate_model(
    'Logistic Regression',
    LogisticRegression(max_iter=2000, solver='saga', C=0.2, n_jobs=-1, random_state=42),
    X_train, Y_train, X_val, Y_val
)


--- Training Logistic Regression ---


Logistic Regression Validation Metrics:
              precision    recall  f1-score   support

    negative       0.52      0.66      0.59      4272
     neutral       0.54      0.54      0.54      5515
    positive       0.73      0.62      0.67      7111

    accuracy                           0.60     16898
   macro avg       0.60      0.61      0.60     16898
weighted avg       0.62      0.60      0.61     16898

Model saved to: ./trained_models/Logistic_Regression_MicroF1_0.6026.pkl


In [1]:
# Cell 5.2: Model Training - Linear SVM

models['Linear SVM'], val_preds['Linear SVM'] = train_save_evaluate_model(
    'Linear SVM',
    LinearSVC(dual=False, max_iter=2000, C=0.1, random_state=42),
    X_train, Y_train, X_val, Y_val
)


--- Training Linear SVM ---
Linear SVM Validation Metrics:
              precision    recall  f1-score   support

    negative       0.52      0.68      0.59      4272
     neutral       0.54      0.52      0.53      5515
    positive       0.73      0.62      0.67      7111

    accuracy                           0.60     16898
   macro avg       0.60      0.60      0.60     16898
weighted avg       0.61      0.60      0.60     16898

Model saved to: ./trained_models/Linear_SVM_MicroF1_0.6005.pkl


In [1]:
# Cell 5.3: Model Training - XGBoost

models['XGBoost'], val_preds['XGBoost'] = train_save_evaluate_model(
    'XGBoost',
    XGBClassifier(n_estimators=1000, learning_rate=0.05, max_depth=5, objective='multi:softmax', 
                  eval_metric='mlogloss', use_label_encoder=False, tree_method='hist', n_jobs=-1, random_state=42),
    X_train, Y_train, X_val, Y_val
)


--- Training XGBoost ---
XGBoost Validation Metrics:
              precision    recall  f1-score   support

    negative       0.55      0.67      0.61      4272
     neutral       0.55      0.55      0.55      5515
    positive       0.74      0.65      0.69      7111

    accuracy                           0.62     16898
   macro avg       0.61      0.62      0.62     16898
weighted avg       0.63      0.62      0.62     16898

Model saved to: ./trained_models/XGBoost_MicroF1_0.6205.pkl


In [1]:
# Cell 5.4: Model Training - LightGBM

models['LightGBM'], val_preds['LightGBM'] = train_save_evaluate_model(
    'LightGBM',
    LGBMClassifier(n_estimators=1000, learning_rate=0.05, num_leaves=31, objective='softmax', 
                   n_jobs=-1, random_state=42),
    X_train, Y_train, X_val, Y_val
)


--- Training LightGBM ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.871324 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 195840
[LightGBM] [Info] Number of data points in the train set: 189885, number of used features: 768
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
LightGBM Validation Metrics:
              precision    recall  f1-score   support

    negative       0.56      0.67      0.61      4272
     neutral       0.55      0.55      0.55      5515
    positive       0.74      0.65      0.69      7111

    accuracy                           0.62     16898
   macro avg       0.62      0.62      0.62     16898
weighted avg       0.63      0.62      0.63     16898

Model saved to: ./trained_models/LightGBM_MicroF1_0.6240.pkl


In [1]:
# Cell 5.5: Model Training - Random Forest

models['Random Forest'], val_preds['Random Forest'] = train_save_evaluate_model(
    'Random Forest',
    RandomForestClassifier(n_estimators=1500, max_depth=15, min_samples_leaf=3, n_jobs=-1, random_state=42),
    X_train, Y_train, X_val, Y_val
)


--- Training Random Forest ---


Random Forest Validation Metrics:
              precision    recall  f1-score   support

    negative       0.56      0.65      0.60      4272
     neutral       0.55      0.54      0.55      5515
    positive       0.72      0.66      0.69      7111

    accuracy                           0.62     16898
   macro avg       0.61      0.62      0.61     16898
weighted avg       0.63      0.62      0.62     16898

Model saved to: ./trained_models/Random_Forest_MicroF1_0.6192.pkl


In [1]:
# Cell 5.6: Model Training - CatBoost

models['CatBoost'], val_preds['CatBoost'] = train_save_evaluate_model(
    'CatBoost',
    CatBoostClassifier(iterations=1000, learning_rate=0.05, depth=8, loss_function='MultiClass', 
                       verbose=0, random_seed=42, thread_count=-1),
    X_train, Y_train, X_val, Y_val
)


--- Training CatBoost ---


CatBoost Validation Metrics:
              precision    recall  f1-score   support

    negative       0.54      0.67      0.60      4272
     neutral       0.55      0.55      0.55      5515
    positive       0.74      0.64      0.68      7111

    accuracy                           0.62     16898
   macro avg       0.61      0.62      0.61     16898
weighted avg       0.63      0.62      0.62     16898

Model saved to: ./trained_models/CatBoost_MicroF1_0.6175.pkl


In [1]:
# Cell 6: Definitive Evaluation and Champion Selection (UPDATED)

# This cell is now a standalone evaluation script. It ignores any models in memory
# and instead loads ALL .pkl files from the 'trained_models' directory for a comprehensive test.

print("\n--- Loading All Saved Models for Final Evaluation ---")

# Use a new dictionary to hold the models loaded from disk
loaded_models = {}

if os.path.exists(MODELS_DIR):
    for model_filename in os.listdir(MODELS_DIR):
        if model_filename.endswith(".pkl"):
            try:
                model_path = os.path.join(MODELS_DIR, model_filename)
                # The key is the full filename for explicit identification
                loaded_models[model_filename] = joblib.load(model_path)
                print(f"  Successfully loaded model: '{model_filename}'")
            except Exception as e:
                print(f"  ERROR: Failed to load {model_filename} due to: {e}")
else:
    print(f"Warning: Models directory '{MODELS_DIR}' not found. Cannot load any models.")

# Proceed only if models were successfully loaded.
if not loaded_models:
    print("\nFATAL: No models available to evaluate. Please run the training cells first.")
else:
    print("\n--- Evaluating All Models on the Final Test Set ---")

    test_results = {}

    for name, model_instance in loaded_models.items():
        print(f"\n--- Final Evaluation for: {name} ---")
        Y_test_pred = model_instance.predict(X_test)
        accuracy = accuracy_score(Y_test, Y_test_pred)
        micro_f1 = f1_score(Y_test, Y_test_pred, average='micro')
        test_results[name] = {'accuracy': accuracy, 'micro_f1': micro_f1}
        
        print(classification_report(Y_test, Y_test_pred, target_names=BROAD_EMOTION_CATEGORIES, zero_division=0))
    
    # Determine the best model and print the final summary list.
    if test_results:
        final_summary = sorted(test_results.items(), key=lambda item: item[1]['micro_f1'], reverse=True)

        print("\n" + "="*75)
        print("FINAL MODEL PERFORMANCE LEADERBOARD (ON TEST SET)".center(75))
        print("="*75)
        
        for model_name, scores in final_summary:
            print(f"  - Model: {model_name:<50} | F1 Score: {scores['micro_f1']:.4f} | Accuracy: {scores['accuracy']:.4f}")
        
        # Announce the best performing model
        best_model_name, best_model_scores = final_summary[0]
        
        print("\n" + "*"*75)
        print(f"OVERALL BEST MODEL: \"{best_model_name}\"".center(75))
        print(f"(With F1 Score: {best_model_scores['micro_f1']:.4f} and Accuracy: {best_model_scores['accuracy']:.4f})" .center(75))
        print("*"*75)

        # Set the final model for the inference cell, using the winner from the loaded models
        final_model = loaded_models[best_model_name]
        print(f"\nSuccessfully loaded champion model '{best_model_name}' for prediction.")
    else:
        print("\nEvaluation could not be completed as no models were successfully tested.")



--- Loading All Saved Models for Final Evaluation ---
  Successfully loaded model: 'XGBoost_MicroF1_0.6205.pkl'
  Successfully loaded model: 'CatBoost_MicroF1_0.6175.pkl'
  Successfully loaded model: 'Random_Forest_MicroF1_0.6192.pkl'
  Successfully loaded model: 'Linear_SVM_MicroF1_0.6005.pkl'
  Successfully loaded model: 'LightGBM_MicroF1_0.6240.pkl'
  Successfully loaded model: 'Logistic_Regression_MicroF1_0.6026.pkl'

--- Evaluating All Models on the Final Test Set ---

--- Final Evaluation for: XGBoost_MicroF1_0.6205.pkl ---
              precision    recall  f1-score   support

    negative       0.55      0.67      0.60     10929
     neutral       0.55      0.55      0.55     13794
    positive       0.73      0.64      0.68     17522

    accuracy                           0.62     42245
   macro avg       0.61      0.62      0.61     42245
weighted avg       0.63      0.62      0.62     42245


--- Final Evaluation for: CatBoost_MicroF1_0.6175.pkl ---
              precision

In [1]:
# Cell 7: Prediction Function for New Text Data (Optimized)

# This function now uses the 'final_model' object that was loaded into memory in the previous cell.
# This is far more efficient than loading the model from disk on every single call.

def predict_emotion(text: str):
    """
    Predicts a single broad emotion category for a given text using the best trained model.
    """
    # Ensure the global 'final_model' has been loaded
    if 'final_model' not in globals():
        raise RuntimeError("The 'final_model' is not loaded. Please run the evaluation cell (Cell 6) first.")

    # The main Transformer model needs to be in evaluation mode
    model.eval()
    
    # Tokenize the input text and move to the correct device
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Get the embedding from the Transformer model
    with torch.no_grad():
        outputs = model(**inputs)
        text_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()

    # Use the pre-loaded champion classifier to predict
    predicted_class_index = final_model.predict(text_embedding)[0]
    predicted_emotion_name = BROAD_EMOTION_CATEGORIES[predicted_class_index]

    return predicted_emotion_name, predicted_class_index

print("Optimized prediction function is ready.")

# --- Demonstration --- 
try:
    print(f"\n--- Demonstrating Predictions with the Best Model: {best_model_name} ---")

    texts_to_predict = [
        "I am absolutely thrilled with the results, feeling pure joy and excitement!",
        "This is quite disappointing, I had hoped for a better outcome, feeling a bit sad.",
        "The instructions for this assembly are confusing, I'm just utterly lost.",
        "I feel nothing. Completely devoid of any emotion."
    ]

    for text in texts_to_predict:
        name, index = predict_emotion(text)
        print(f'\nText: "{text}"')
        print(f'--> Predicted Emotion: {name} (index: {index})')
except NameError:
    print("\nCould not run prediction examples because the 'final_model' was not set. Please run Cell 6 first.")

Optimized prediction function is ready.

--- Demonstrating Predictions with the Best Model: LightGBM_MicroF1_0.6240.pkl ---

Text: "I am absolutely thrilled with the results, feeling pure joy and excitement!"
--> Predicted Emotion: positive (index: 2)

Text: "This is quite disappointing, I had hoped for a better outcome, feeling a bit sad."
--> Predicted Emotion: negative (index: 0)

Text: "The instructions for this assembly are confusing, I'm just utterly lost."
--> Predicted Emotion: negative (index: 0)

Text: "I feel nothing. Completely devoid of any emotion."
--> Predicted Emotion: neutral (index: 1)
