In [None]:
import os
import pandas as pd

# List the files you’ve just uploaded to verify their names
print(os.listdir('/content'))

# Adjust these filenames if needed to match exactly what you see
train_path = '/content/train_sent_emo_cleaned_processed.csv'
dev_path   = '/content/dev_sent_emo_cleaned_processed.csv'
test_path  = '/content/test_sent_emo_cleaned_processed.csv'

# Load into pandas
train_df = pd.read_csv(train_path)
dev_df   = pd.read_csv(dev_path)
test_df  = pd.read_csv(test_path)

print("Train shape:", train_df.shape)
print("Dev   shape:", dev_df.shape)
print("Test  shape:", test_df.shape)
train_df.head()


['.config', 'train_sent_emo_cleaned_processed.csv', 'drive', 'dev_with_context.csv', 'test_with_context.csv', 'train_with_context.csv', 'test_sent_emo_cleaned_processed.csv', 'dev_sent_emo_cleaned_processed.csv', 'sample_data']
Train shape: (9988, 11)
Dev   shape: (1108, 11)
Test  shape: (2610, 11)


Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
0,1,also I was the point person on my company’s tr...,Chandler,neutral,neutral,0,0,8,21,"00:16:16,059","00:16:21,731"
1,2,You must’ve had your hands full.,The Interviewer,neutral,neutral,0,1,8,21,"00:16:21,940","00:16:23,442"
2,3,That I did. That I did.,Chandler,neutral,neutral,0,2,8,21,"00:16:23,442","00:16:26,389"
3,4,So let’s talk a little bit about your duties.,The Interviewer,neutral,neutral,0,3,8,21,"00:16:26,820","00:16:29,572"
4,5,My duties? All right.,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917"


In [None]:
import pandas as pd

# Paths to the MELD CSV files (ensure they are in the working directory)
train_path = 'train_sent_emo_cleaned_processed.csv'
dev_path = 'dev_sent_emo_cleaned_processed.csv'
test_path = 'test_sent_emo_cleaned_processed.csv'

# Load datasets
train_df = pd.read_csv(train_path)
dev_df = pd.read_csv(dev_path)
test_df = pd.read_csv(test_path)

def construct_input_speaker_window(dialogue_df):
    """
    For a single dialogue, construct BERT-ERC input strings using a speaker-specific context window.
    Includes the previous and next utterances by the same speaker, regardless of intervening turns.
    """
    # Sort utterances by their order
    dialogue_df = dialogue_df.sort_values('Utterance_ID').reset_index(drop=True)

    # Map each speaker to the list of indices of their utterances in this dialogue
    speaker_to_indices = {}
    for idx, spk in enumerate(dialogue_df['Speaker']):
        speaker_to_indices.setdefault(spk, []).append(idx)

    bert_inputs = []
    # For each utterance, find its position among same-speaker turns
    for idx, row in dialogue_df.iterrows():
        spk = row['Speaker']
        utt = row['Utterance']
        indices = speaker_to_indices[spk]
        pos = indices.index(idx)

        parts = []
        # Previous same-speaker turn
        if pos > 0:
            prev_idx = indices[pos - 1]
            prev_utt = dialogue_df.at[prev_idx, 'Utterance']
            parts.append(f"{spk} says: {prev_utt}")
        # Target (query) utterance
        parts.append(f"<s> {spk} <mask> says: {utt} </s>")
        # Next same-speaker turn
        if pos < len(indices) - 1:
            next_idx = indices[pos + 1]
            next_utt = dialogue_df.at[next_idx, 'Utterance']
            parts.append(f"{spk} says: {next_utt}")

        # Join with BERT SEP token
        bert_inputs.append("".join(parts))

    return bert_inputs

def add_bert_input_column(df):
    """
    Applies construct_input_speaker_window per dialogue and returns a new DataFrame
    with an added 'bert_input' column.
    """
    bert_column = []
    # Process each dialogue separately
    for _, dialogue in df.groupby('Dialogue_ID'):
        inputs = construct_input_speaker_window(dialogue)
        bert_column.extend(inputs)
    df_with_input = df.copy()
    df_with_input['bert_input'] = bert_column
    return df_with_input

# Generate bert_input for each split
train_with_context = add_bert_input_column(train_df)
dev_with_context   = add_bert_input_column(dev_df)
test_with_context  = add_bert_input_column(test_df)

# Save to new CSV files
train_with_context.to_csv('train_with_context.csv', index=False)
dev_with_context.to_csv('dev_with_context.csv', index=False)
test_with_context.to_csv('test_with_context.csv', index=False)

train_with_context.head()

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,bert_input
0,1,also I was the point person on my company’s tr...,Chandler,neutral,neutral,0,0,8,21,"00:16:16,059","00:16:21,731",<s> Chandler <mask> says: also I was the point...
1,2,You must’ve had your hands full.,The Interviewer,neutral,neutral,0,1,8,21,"00:16:21,940","00:16:23,442",<s> The Interviewer <mask> says: You must’ve h...
2,3,That I did. That I did.,Chandler,neutral,neutral,0,2,8,21,"00:16:23,442","00:16:26,389",Chandler says: also I was the point person on ...
3,4,So let’s talk a little bit about your duties.,The Interviewer,neutral,neutral,0,3,8,21,"00:16:26,820","00:16:29,572",The Interviewer says: You must’ve had your han...
4,5,My duties? All right.,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917",Chandler says: That I did. That I did.<s> Chan...


In [None]:
from google.colab import drive
import os
drive.mount('/content/drive')
output_dir = '/content/drive/MyDrive/MELD/'
os.makedirs(output_dir, exist_ok=True)

# 5) Save to Drive
train_with_context.to_csv(os.path.join(output_dir, 'train_with_context.csv'), index=False)
dev_with_context.to_csv(os.path.join(output_dir, 'dev_with_context.csv'), index=False)
test_with_context.to_csv(os.path.join(output_dir, 'test_with_context.csv'), index=False)

print(f"Saved processed MELD CSVs to {output_dir}")
train_df1 = pd.read_csv(os.path.join(output_dir, 'train_with_context.csv'))
train_df1.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saved processed MELD CSVs to /content/drive/MyDrive/MELD/


Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,bert_input
0,1,also I was the point person on my company’s tr...,Chandler,neutral,neutral,0,0,8,21,"00:16:16,059","00:16:21,731",<s> Chandler <mask> says: also I was the point...
1,2,You must’ve had your hands full.,The Interviewer,neutral,neutral,0,1,8,21,"00:16:21,940","00:16:23,442",<s> The Interviewer <mask> says: You must’ve h...
2,3,That I did. That I did.,Chandler,neutral,neutral,0,2,8,21,"00:16:23,442","00:16:26,389",Chandler says: also I was the point person on ...
3,4,So let’s talk a little bit about your duties.,The Interviewer,neutral,neutral,0,3,8,21,"00:16:26,820","00:16:29,572",The Interviewer says: You must’ve had your han...
4,5,My duties? All right.,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917",Chandler says: That I did. That I did.<s> Chan...
