In [2]:
import pandas as pd

df = pd.read_csv("dev_sent_emo.csv")
import pandas as pd

# Assuming your dataframe is called df
df = df[["Utterance", "Emotion", "Sentiment"]]

df


Unnamed: 0,Utterance,Emotion,Sentiment
0,"Oh my God, hes lost it. Hes totally lost it.",sadness,negative
1,What?,surprise,negative
2,"Or! Or, we could go to the bank, close our acc...",neutral,neutral
3,Youre a genius!,joy,positive
4,"Aww, man, now we wont be bank buddies!",sadness,negative
...,...,...,...
1104,No.,sadness,negative
1105,What? Oh my God! Im gonna miss you so much!,sadness,negative
1106,Im gonna miss you!,sadness,negative
1107,I mean its the end of an era!,sadness,negative


## Training for Dataset (MELD) - Bert Model (text classifier)

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from sklearn.metrics import accuracy_score, classification_report

# 1. Load MELD data
df = pd.read_csv("dev_sent_emo.csv")
df = df[["Utterance", "Emotion"]]

# 2. Load a proper classification model
model_name = "bhadresh-savani/bert-base-uncased-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# 3. Create pipeline
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=-1)

# 4. Define label mapping from model → MELD
label_map = {
    "happy": "joy",
    "sad": "sadness",
    "angry": "anger",
    "fear": "fear",
    "disgust": "disgust",
    "surprise": "surprise",
    "neutral": "neutral"  # some models won’t predict neutral, so we’ll handle later
}

# 5. Run predictions
preds = []
for utt in df["Utterance"].tolist():
    result = classifier(utt)[0]
    pred = result["label"].lower()
    mapped_pred = label_map.get(pred, "neutral")  # default to neutral if not in mapping
    preds.append(mapped_pred)

# 6. Compare with true labels
true = df["Emotion"].str.lower().tolist()
accuracy = accuracy_score(true, preds)

print("Accuracy:", accuracy)
print("\nDetailed Report:\n", classification_report(true, preds))

# # Encode emotion labels (e.g., joy -> 0, sadness -> 1, etc.)
# label_encoder = LabelEncoder()
# y_true = label_encoder.fit_transform(df["Emotion"])


Device set to use cpu


Accuracy: 0.38954012623985573

Detailed Report:
               precision    recall  f1-score   support

       anger       0.00      0.00      0.00       153
     disgust       0.00      0.00      0.00        22
        fear       0.06      0.15      0.08        40
         joy       0.00      0.00      0.00       163
     neutral       0.42      0.90      0.58       470
     sadness       0.00      0.00      0.00       111
    surprise       0.50      0.02      0.04       150

    accuracy                           0.39      1109
   macro avg       0.14      0.15      0.10      1109
weighted avg       0.25      0.39      0.25      1109



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Training for Dataset (MELD) - RNNDialogue

In [113]:
import sys
import pandas as pd
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# --- Make sure the directory that contains model.py is on sys.path ---
# If your repo structure is: <project_root>/DialogueRNN/model.py
# then add <project_root>/DialogueRNN to the path (adjust as needed)
sys.path.append("DialogueRNN")

# ✅ Import the class itself (not "model")
try:
    from model import DialogueRNN   # when model.py is inside "DialogueRNN/" folder
except ImportError:
    from DialogueRNN.model import DialogueRNN  # alt path if package-installed

# --- Data ---
df = pd.read_csv("dev_sent_emo.csv")[["Utterance", "Emotion"]]

# --- BERT embeddings (CLS) ---
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
bert_model = AutoModel.from_pretrained("bert-base-uncased")

def get_embeddings(sentences):
    inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = bert_model(**inputs)
    return outputs.last_hidden_state[:, 0, :]   # (batch, 768)

embeddings = get_embeddings(df["Utterance"].tolist())  # (N, 768)

# --- Labels ---
label_encoder = LabelEncoder()
y_true = torch.tensor(label_encoder.fit_transform(df["Emotion"]), dtype=torch.long)

# --- Instantiate DialogueRNN (constructor args go here) ---
dialogue_rnn_model = DialogueRNN(
    D_m=768,          # input dim = BERT
    D_g=150,
    D_p=150,
    D_e=100,          # emotion state dim
    listener_state=False,
    context_attention="simple",
    D_a=100,
    dropout=0.5
)
dialogue_rnn_model.eval()

# --- Prepare inputs for forward(U, qmask) ---
U = embeddings.unsqueeze(1)             # (seq_len=N, batch=1, D_m=768)
qmask = torch.ones(U.size(0), 1, 1)     # (seq_len, batch=1, num_speakers=1)

# --- Forward: DialogueRNN returns hidden emotion states, not class logits ---
with torch.no_grad():
    e, _ = dialogue_rnn_model(U, qmask)   # e: (seq_len, 1, D_e)

# --- Classifier head on top of e (D_e -> n_classes) ---
n_classes = len(label_encoder.classes_)
classifier = nn.Linear(e.size(-1), n_classes)

# (Optional) quick training of the head so predictions aren’t random
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=1e-3)

epochs = 10
for epoch in range(epochs):
    optimizer.zero_grad()
    logits = classifier(e.squeeze(1))          # (seq_len, n_classes)
    loss = criterion(logits, y_true)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}/{epochs} - loss: {loss.item():.4f}")

# --- Evaluate ---
with torch.no_grad():
    logits = classifier(e.squeeze(1))
    y_pred = torch.argmax(logits, dim=1).cpu().numpy()

print("Accuracy:", accuracy_score(y_true.numpy(), y_pred))
print(classification_report(y_true.numpy(), y_pred, target_names=label_encoder.classes_))


Epoch 1/10 - loss: 1.9470
Epoch 2/10 - loss: 1.9276
Epoch 3/10 - loss: 1.9089
Epoch 4/10 - loss: 1.8911
Epoch 5/10 - loss: 1.8741
Epoch 6/10 - loss: 1.8579
Epoch 7/10 - loss: 1.8425
Epoch 8/10 - loss: 1.8280
Epoch 9/10 - loss: 1.8142
Epoch 10/10 - loss: 1.8012
Accuracy: 0.4238052299368801
              precision    recall  f1-score   support

       anger       0.00      0.00      0.00       153
     disgust       0.00      0.00      0.00        22
        fear       0.00      0.00      0.00        40
         joy       0.00      0.00      0.00       163
     neutral       0.42      1.00      0.60       470
     sadness       0.00      0.00      0.00       111
    surprise       0.00      0.00      0.00       150

    accuracy                           0.42      1109
   macro avg       0.06      0.14      0.09      1109
weighted avg       0.18      0.42      0.25      1109



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [120]:
import sys
from pathlib import Path

sys.path.append("DialogueGCN-mianzhang/dgcn")  # add the package directory itself
from model.DialogueGCN import DialogueGCN

print(DialogueGCN)


ModuleNotFoundError: No module named 'torch_geometric'