***Note: Codes in this notebook are based faces from `largest_face_extraction` and `clustered_faces`.***

Re-run from Step 5 if you would like to apply new models to the dataset.

1. Mount Google Drive environment

In [1]:
# Mount Google Drive
# Re-run this cell every time you open this notebook
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


2. Look at text embeddings

In [2]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import ast  # to safely evaluate the string representation

pd.set_option('display.max_colwidth', None)   # Don't truncate the data when printed

In [None]:
path = '/content/drive/MyDrive/CS5344 Project/Data and Codes/'
train_df = pd.read_csv(os.path.join(path, "train_embedding.csv"))
dev_df = pd.read_csv(os.path.join(path, "dev_embedding.csv"))
test_df = pd.read_csv(os.path.join(path, "test_embedding.csv"))

In [None]:
display(train_df.info())
display(dev_df.info())
display(test_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9989 entries, 0 to 9988
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  9989 non-null   int64 
 1   Utterance               9989 non-null   object
 2   Speaker                 9989 non-null   object
 3   Emotion                 9989 non-null   object
 4   Sentiment               9989 non-null   object
 5   Dialogue_ID             9989 non-null   int64 
 6   Utterance_ID            9989 non-null   int64 
 7   Season                  9989 non-null   int64 
 8   Episode                 9989 non-null   int64 
 9   StartTime               9989 non-null   object
 10  EndTime                 9989 non-null   object
 11  cleaned_text            9989 non-null   object
 12  cleaned_text_new        9989 non-null   object
 13  word2vec_embedding      9989 non-null   object
 14  word2vec_embedding_new  9989 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1109 entries, 0 to 1108
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  1109 non-null   int64 
 1   Utterance               1109 non-null   object
 2   Speaker                 1109 non-null   object
 3   Emotion                 1109 non-null   object
 4   Sentiment               1109 non-null   object
 5   Dialogue_ID             1109 non-null   int64 
 6   Utterance_ID            1109 non-null   int64 
 7   Season                  1109 non-null   int64 
 8   Episode                 1109 non-null   int64 
 9   StartTime               1109 non-null   object
 10  EndTime                 1109 non-null   object
 11  cleaned_text            1109 non-null   object
 12  cleaned_text_new        1109 non-null   object
 13  word2vec_embedding      1109 non-null   object
 14  word2vec_embedding_new  1109 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2610 entries, 0 to 2609
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  2610 non-null   int64 
 1   Utterance               2610 non-null   object
 2   Speaker                 2610 non-null   object
 3   Emotion                 2610 non-null   object
 4   Sentiment               2610 non-null   object
 5   Dialogue_ID             2610 non-null   int64 
 6   Utterance_ID            2610 non-null   int64 
 7   Season                  2610 non-null   int64 
 8   Episode                 2610 non-null   int64 
 9   StartTime               2610 non-null   object
 10  EndTime                 2610 non-null   object
 11  cleaned_text            2610 non-null   object
 12  cleaned_text_new        2610 non-null   object
 13  word2vec_embedding      2610 non-null   object
 14  word2vec_embedding_new  2610 non-null   object
 15  bert

None

In [None]:
# Preview 1 record
train_df.iloc[0]

Unnamed: 0,0
Sr No.,1
Utterance,also I was the point person on my companys transition from the KL-5 to GR-6 system.
Speaker,Chandler
Emotion,neutral
Sentiment,neutral
Dialogue_ID,0
Utterance_ID,0
Season,8
Episode,21
StartTime,"00:16:16,059"


In [None]:
# Print out one record from 'bert_embedding_new'
print(train_df['bert_embedding_new'][0])

[-4.43676353e-01 -6.17331713e-02  2.79465318e-01  1.58680454e-01
 -2.55131200e-02 -7.10380897e-02 -9.19703171e-02  4.01559651e-01
 -2.40905985e-01 -2.97873408e-01 -7.18223751e-02 -3.29356879e-01
  2.09269240e-01  4.73381013e-01 -5.52107729e-02  1.19903892e-01
  1.03990287e-01 -2.21819624e-01  3.01783592e-01  1.55977502e-01
  9.38857570e-02 -8.22402388e-02  1.17085353e-01  7.51721621e-01
  1.62539005e-01  1.85574040e-01 -2.37767264e-01  4.70825046e-01
  9.57242027e-03  6.28606901e-02  3.72407079e-01 -6.54949248e-02
 -3.95148098e-01  8.39643739e-03  2.00562671e-01 -1.69472173e-01
 -2.89948910e-01 -6.89031035e-02 -2.73447067e-01 -1.02734588e-01
 -3.84277046e-01 -2.23241925e-01 -8.38232785e-03  3.86075961e-04
 -2.52199411e-01  1.11613445e-01 -8.48886892e-02 -1.20177552e-01
  2.74581343e-01 -2.52737015e-01 -2.65635222e-01  6.57061040e-02
 -1.85704693e-01 -3.78064871e-01 -2.09713310e-01  5.99490106e-01
  2.62284726e-01 -5.40824294e-01 -2.84006834e-01 -4.49552992e-03
  1.67964056e-01 -2.56022

2a. Extract text embeddings

In [None]:
# Function to extract embeddings and return them as a NumPy array
def extract_embeddings(df):
    embeddings = []
    for embedding_str in df['bert_embedding_new']:
        # Clean the string by removing newlines, extra spaces, and brackets
        embedding_str_clean = embedding_str.replace('\n', ' ').strip()  # Replace newline with space
        embedding_str_clean = embedding_str_clean.strip('[]')  # Remove leading/trailing brackets

        # Convert the cleaned string into a NumPy array
        embedding_array = np.array([float(x) for x in embedding_str_clean.split()])  # Convert string to list of floats

        # Append the embedding to the list
        embeddings.append(embedding_array)

    # Convert the list of embeddings into a NumPy array (2D matrix)
    return np.stack(embeddings)

# Extract embeddings for train, dev, and test sets
train_text_embedding_matrix = extract_embeddings(train_df)
dev_text_embedding_matrix = extract_embeddings(dev_df)
test_text_embedding_matrix = extract_embeddings(test_df)

# Check the shapes of the embedding matrices
print("Shape of train_text_embedding_matrix:", train_text_embedding_matrix.shape)
print("Shape of dev_text_embedding_matrix:", dev_text_embedding_matrix.shape)
print("Shape of test_text_embedding_matrix:", test_text_embedding_matrix.shape)

Shape of train_text_embedding_matrix: (9989, 768)
Shape of dev_text_embedding_matrix: (1109, 768)
Shape of test_text_embedding_matrix: (2610, 768)


In [None]:
# Preview a record in the 'train_text_embedding_matrix'
print("Shape of a record in the train_text_embedding_matrix:", train_text_embedding_matrix[0].shape)
train_text_embedding_matrix[0]

Shape of a record in the train_text_embedding_matrix: (768,)


array([-4.43676353e-01, -6.17331713e-02,  2.79465318e-01,  1.58680454e-01,
       -2.55131200e-02, -7.10380897e-02, -9.19703171e-02,  4.01559651e-01,
       -2.40905985e-01, -2.97873408e-01, -7.18223751e-02, -3.29356879e-01,
        2.09269240e-01,  4.73381013e-01, -5.52107729e-02,  1.19903892e-01,
        1.03990287e-01, -2.21819624e-01,  3.01783592e-01,  1.55977502e-01,
        9.38857570e-02, -8.22402388e-02,  1.17085353e-01,  7.51721621e-01,
        1.62539005e-01,  1.85574040e-01, -2.37767264e-01,  4.70825046e-01,
        9.57242027e-03,  6.28606901e-02,  3.72407079e-01, -6.54949248e-02,
       -3.95148098e-01,  8.39643739e-03,  2.00562671e-01, -1.69472173e-01,
       -2.89948910e-01, -6.89031035e-02, -2.73447067e-01, -1.02734588e-01,
       -3.84277046e-01, -2.23241925e-01, -8.38232785e-03,  3.86075961e-04,
       -2.52199411e-01,  1.11613445e-01, -8.48886892e-02, -1.20177552e-01,
        2.74581343e-01, -2.52737015e-01, -2.65635222e-01,  6.57061040e-02,
       -1.85704693e-01, -

3. Look at face image embeddings

In [None]:
# Check no. of embedding files
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/dev_frames_speaker_embeddings" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/test_frames_speaker_embeddings" -type f | wc -l

8871
962
2357


In [None]:
# Paths to embedding directories
train_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings"
dev_dir   = "/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/dev_frames_speaker_embeddings"
test_dir  = "/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/test_frames_speaker_embeddings"

def load_embeddings(embedding_dir):
    files = sorted([f for f in os.listdir(embedding_dir) if f.endswith(".npy")])
    embeddings = []
    filenames = []

    # Use tqdm to show progress for the files being processed
    for file in tqdm(files, desc=f"Loading embeddings from {embedding_dir}", unit="file"):
        path = os.path.join(embedding_dir, file)
        if os.path.getsize(path) > 0:
            try:
                embedding = np.load(path)
                embeddings.append(embedding)
                filenames.append(file.replace(".npy", ""))
            except Exception as e:
                print(f"Error loading {file}: {e}")
        else:
            print(f"Skipping empty file: {file}")

    return np.stack(embeddings), filenames

# Load all three sets with progress bars
train_embedding_matrix, train_filenames = load_embeddings(train_dir)
dev_embedding_matrix, dev_filenames     = load_embeddings(dev_dir)
test_embedding_matrix, test_filenames   = load_embeddings(test_dir)

# Check shapes
print("Train embedding shape:", train_embedding_matrix.shape)
print("Dev embedding shape:", dev_embedding_matrix.shape)
print("Test embedding shape:", test_embedding_matrix.shape)

Loading embeddings from /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings: 100%|██████████| 8871/8871 [03:30<00:00, 42.12file/s] 
Loading embeddings from /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/dev_frames_speaker_embeddings: 100%|██████████| 962/962 [00:16<00:00, 56.97file/s] 
Loading embeddings from /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/test_frames_speaker_embeddings: 100%|██████████| 2357/2357 [01:16<00:00, 30.63file/s] 

Train embedding shape: (8871, 512)
Dev embedding shape: (962, 512)
Test embedding shape: (2357, 512)





In [None]:
# Preview a face embedding
print("Shape of a face embedding:", train_embedding_matrix[0].shape)
train_embedding_matrix[0]

Shape of a face embedding: (512,)


array([ 3.52571011e-02, -1.16681366e-03, -8.03913698e-02, -2.60037836e-02,
        2.93896198e-02, -5.56406230e-02, -2.63683754e-03,  8.99192989e-02,
        4.61612940e-02,  7.12634325e-02,  1.44226914e-02, -6.62601367e-03,
        2.76612248e-02, -3.67297307e-02,  7.67826382e-03,  4.43019792e-02,
       -3.01454999e-02, -9.92868841e-02, -1.41271316e-02,  8.00280832e-03,
       -5.20792417e-02,  2.09805742e-02, -3.65079306e-02, -1.07695488e-02,
        1.97831262e-02,  8.53650458e-03,  1.15803368e-02, -6.81540966e-02,
        1.59860663e-02,  9.81888850e-04, -1.63855739e-02, -1.84671581e-02,
        1.56686474e-02,  4.15405221e-02, -4.81996015e-02,  2.85862829e-04,
        5.73319905e-02,  2.61600856e-02,  2.16038749e-02, -6.54685423e-02,
       -3.97270769e-02, -2.47791759e-03,  6.41441941e-02,  3.32436636e-02,
       -8.33166316e-02,  4.72144932e-02,  5.61041478e-03, -3.61079611e-02,
        7.81312771e-03,  7.83310086e-03,  8.65176693e-02, -9.15803853e-03,
       -2.27511656e-02, -

4. Append face embeddings into the corresponding record in the .csv file

In [None]:
path = '/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/'
train_df = pd.read_csv(os.path.join(path, "train_fused_embeddings.csv"))
dev_df = pd.read_csv(os.path.join(path, "dev_fused_embeddings.csv"))
test_df = pd.read_csv(os.path.join(path, "test_fused_embeddings.csv"))

In [None]:
# Function to load face embeddings
def load_face_embedding(dialogue_id, utterance_id, embedding_dir):
    # Create the filename from Dialogue_ID and Utterance_ID
    filename = f"dia{dialogue_id}_utt{utterance_id}_speaker_face.npy"
    filepath = os.path.join(embedding_dir, filename)

    # Check if the file exists and load it
    if os.path.exists(filepath):
        return np.load(filepath)
    else:
        print(f"File not found: {filepath}")
        return None  # Return None if the file doesn't exist

# Function to append face embeddings to the DataFrame
def append_face_embeddings(df, embedding_dir):
    face_embeddings = []

    for index, row in df.iterrows():
        dialogue_id = row['Dialogue_ID']
        utterance_id = row['Utterance_ID']

        # Load the face embedding for this record
        face_embedding = load_face_embedding(dialogue_id, utterance_id, embedding_dir)

        if face_embedding is not None:
            face_embeddings.append(face_embedding)
        else:
            # If no embedding is found, append None (this will be left blank in the DataFrame)
            face_embeddings.append(None)

    # Add the face embeddings as a new column in the DataFrame
    df['face_embedding_cluster'] = face_embeddings
    return df

# Append face embeddings for train, dev, and test DataFrames
train_df = append_face_embeddings(train_df, train_dir)
dev_df = append_face_embeddings(dev_df, dev_dir)
test_df = append_face_embeddings(test_df, test_dir)

# Check the result
print("Shape of train_df with face embeddings:", train_df.shape)
print("Shape of dev_df with face embeddings:", dev_df.shape)
print("Shape of test_df with face embeddings:", test_df.shape)

# Save the DataFrames with added face embeddings to new CSV files (overwrite if they already exist)
train_df.to_csv("/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_final_embeddings.csv", index=False)
dev_df.to_csv("/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/dev_final_embeddings.csv", index=False)
test_df.to_csv("/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/test_final_embeddings.csv", index=False)

File not found: /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings/dia1_utt2_speaker_face.npy
File not found: /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings/dia4_utt3_speaker_face.npy
File not found: /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings/dia4_utt10_speaker_face.npy
File not found: /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings/dia6_utt5_speaker_face.npy
File not found: /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings/dia6_utt15_speaker_face.npy
File not found: /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings/dia6_utt17_speaker_face.npy
File not found: /content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_frames_speaker_embeddings/dia10_utt0_speaker_face

5. Extract out text + face embeddings for records with both non-empty

In [3]:
# Read the saved train CSV file
train_df = pd.read_csv("/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/train_final_embeddings.csv")
dev_df = pd.read_csv("/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/dev_final_embeddings.csv")
test_df = pd.read_csv("/content/drive/MyDrive/CS5344 Project/Data and Codes/frames_speaker/test_final_embeddings.csv")

In [None]:
display(train_df.info())
display(dev_df.info())
display(test_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9989 entries, 0 to 9988
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  9989 non-null   int64 
 1   Utterance               9989 non-null   object
 2   Speaker                 9989 non-null   object
 3   Emotion                 9989 non-null   object
 4   Sentiment               9989 non-null   object
 5   Dialogue_ID             9989 non-null   int64 
 6   Utterance_ID            9989 non-null   int64 
 7   Season                  9989 non-null   int64 
 8   Episode                 9989 non-null   int64 
 9   StartTime               9989 non-null   object
 10  EndTime                 9989 non-null   object
 11  cleaned_text            9989 non-null   object
 12  cleaned_text_new        9989 non-null   object
 13  word2vec_embedding      9989 non-null   object
 14  word2vec_embedding_new  9989 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1109 entries, 0 to 1108
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  1109 non-null   int64 
 1   Utterance               1109 non-null   object
 2   Speaker                 1109 non-null   object
 3   Emotion                 1109 non-null   object
 4   Sentiment               1109 non-null   object
 5   Dialogue_ID             1109 non-null   int64 
 6   Utterance_ID            1109 non-null   int64 
 7   Season                  1109 non-null   int64 
 8   Episode                 1109 non-null   int64 
 9   StartTime               1109 non-null   object
 10  EndTime                 1109 non-null   object
 11  cleaned_text            1109 non-null   object
 12  cleaned_text_new        1109 non-null   object
 13  word2vec_embedding      1109 non-null   object
 14  word2vec_embedding_new  1109 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2610 entries, 0 to 2609
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  2610 non-null   int64 
 1   Utterance               2610 non-null   object
 2   Speaker                 2610 non-null   object
 3   Emotion                 2610 non-null   object
 4   Sentiment               2610 non-null   object
 5   Dialogue_ID             2610 non-null   int64 
 6   Utterance_ID            2610 non-null   int64 
 7   Season                  2610 non-null   int64 
 8   Episode                 2610 non-null   int64 
 9   StartTime               2610 non-null   object
 10  EndTime                 2610 non-null   object
 11  cleaned_text            2610 non-null   object
 12  cleaned_text_new        2610 non-null   object
 13  word2vec_embedding      2610 non-null   object
 14  word2vec_embedding_new  2610 non-null   object
 15  bert

None

In [None]:
# Output the first record to preview
print("First record of the train set:")
train_df.iloc[0]

First record of the train set:


Unnamed: 0,0
Sr No.,1
Utterance,also I was the point person on my companys transition from the KL-5 to GR-6 system.
Speaker,Chandler
Emotion,neutral
Sentiment,neutral
Dialogue_ID,0
Utterance_ID,0
Season,8
Episode,21
StartTime,"00:16:16,059"


In [None]:
# Filter the DataFrame to find rows where 'face_embedding' is empty (NaN or None)
empty_face_embeddings = train_df[train_df['face_embedding_cluster'].isna()]

# Display the first records where the 'face_embedding' is empty
print("One record with empty 'face_embedding_cluster':")
empty_face_embeddings.iloc[0]  # Display the first record

One record with empty 'face_embedding_cluster':


Unnamed: 0,16
Sr No.,17
Utterance,"No-no-no-no, no! Who, who were you talking about?"
Speaker,Joey
Emotion,surprise
Sentiment,negative
Dialogue_ID,1
Utterance_ID,2
Season,9
Episode,23
StartTime,"00:36:44,368"


*Note: Most of the records with blank `face_embedding` is because no face was detected/extracted from the video frame.*

In [4]:
# Create "final_face_embedding", which will take the value of "face_embedding_cluster" if it exists.
# Otherwise, it will take the value of "face_embedding".
# Otherwise, it will remain blank.

train_df["final_face_embedding"] = train_df.apply(
    lambda row: row["face_embedding_cluster"]
    if pd.notna(row["face_embedding_cluster"])
    else row["face_embedding"]
    if pd.notna(row["face_embedding"])
    else None,
    axis=1
)

dev_df["final_face_embedding"] = dev_df.apply(
    lambda row: row["face_embedding_cluster"]
    if pd.notna(row["face_embedding_cluster"])
    else row["face_embedding"]
    if pd.notna(row["face_embedding"])
    else None,
    axis=1
)

test_df["final_face_embedding"] = test_df.apply(
    lambda row: row["face_embedding_cluster"]
    if pd.notna(row["face_embedding_cluster"])
    else row["face_embedding"]
    if pd.notna(row["face_embedding"])
    else None,
    axis=1
)

In [None]:
display(train_df.info())
display(dev_df.info())
display(test_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9989 entries, 0 to 9988
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  9989 non-null   int64 
 1   Utterance               9989 non-null   object
 2   Speaker                 9989 non-null   object
 3   Emotion                 9989 non-null   object
 4   Sentiment               9989 non-null   object
 5   Dialogue_ID             9989 non-null   int64 
 6   Utterance_ID            9989 non-null   int64 
 7   Season                  9989 non-null   int64 
 8   Episode                 9989 non-null   int64 
 9   StartTime               9989 non-null   object
 10  EndTime                 9989 non-null   object
 11  cleaned_text            9989 non-null   object
 12  cleaned_text_new        9989 non-null   object
 13  word2vec_embedding      9989 non-null   object
 14  word2vec_embedding_new  9989 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1109 entries, 0 to 1108
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  1109 non-null   int64 
 1   Utterance               1109 non-null   object
 2   Speaker                 1109 non-null   object
 3   Emotion                 1109 non-null   object
 4   Sentiment               1109 non-null   object
 5   Dialogue_ID             1109 non-null   int64 
 6   Utterance_ID            1109 non-null   int64 
 7   Season                  1109 non-null   int64 
 8   Episode                 1109 non-null   int64 
 9   StartTime               1109 non-null   object
 10  EndTime                 1109 non-null   object
 11  cleaned_text            1109 non-null   object
 12  cleaned_text_new        1109 non-null   object
 13  word2vec_embedding      1109 non-null   object
 14  word2vec_embedding_new  1109 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2610 entries, 0 to 2609
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  2610 non-null   int64 
 1   Utterance               2610 non-null   object
 2   Speaker                 2610 non-null   object
 3   Emotion                 2610 non-null   object
 4   Sentiment               2610 non-null   object
 5   Dialogue_ID             2610 non-null   int64 
 6   Utterance_ID            2610 non-null   int64 
 7   Season                  2610 non-null   int64 
 8   Episode                 2610 non-null   int64 
 9   StartTime               2610 non-null   object
 10  EndTime                 2610 non-null   object
 11  cleaned_text            2610 non-null   object
 12  cleaned_text_new        2610 non-null   object
 13  word2vec_embedding      2610 non-null   object
 14  word2vec_embedding_new  2610 non-null   object
 15  bert

None

In [12]:
# Filter records where both bert_embedding_new and face_embedding are non-empty
train_valid_records = train_df[train_df['final_face_embedding'].notna() & train_df['bert_embedding_new'].notna()]
dev_valid_records = dev_df[dev_df['final_face_embedding'].notna() & dev_df['bert_embedding_new'].notna()]
test_valid_records = test_df[test_df['final_face_embedding'].notna() & test_df['bert_embedding_new'].notna()]

In [None]:
train_valid_records.info()
dev_valid_records.info()
test_valid_records.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9833 entries, 0 to 9988
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  9833 non-null   int64 
 1   Utterance               9833 non-null   object
 2   Speaker                 9833 non-null   object
 3   Emotion                 9833 non-null   object
 4   Sentiment               9833 non-null   object
 5   Dialogue_ID             9833 non-null   int64 
 6   Utterance_ID            9833 non-null   int64 
 7   Season                  9833 non-null   int64 
 8   Episode                 9833 non-null   int64 
 9   StartTime               9833 non-null   object
 10  EndTime                 9833 non-null   object
 11  cleaned_text            9833 non-null   object
 12  cleaned_text_new        9833 non-null   object
 13  word2vec_embedding      9833 non-null   object
 14  word2vec_embedding_new  9833 non-null   object
 15  bert_embe

In [13]:
# Function to convert string to NumPy array
def convert_str_to_array(s):
    return np.fromstring(s.strip('[]'), sep=' ')

# Function to preprocess a DataFrame
def preprocess_embeddings(df):
    df = df.copy()
    df['bert_embedding_new'] = df['bert_embedding_new'].apply(convert_str_to_array)
    df['final_face_embedding'] = df['final_face_embedding'].apply(convert_str_to_array)
    df['fused_embedding'] = df.apply(
        lambda row: np.concatenate((row['bert_embedding_new'], row['final_face_embedding'])),
        axis=1
    )
    return df

train_valid_records = preprocess_embeddings(train_valid_records)
dev_valid_records = preprocess_embeddings(dev_valid_records)
test_valid_records = preprocess_embeddings(test_valid_records)

In [14]:
# Check shapes after fuse
print(train_valid_records['bert_embedding_new'].iloc[0].shape)
print(train_valid_records['final_face_embedding'].iloc[0].shape)
print(train_valid_records['fused_embedding'].iloc[0].shape)

print(dev_valid_records['bert_embedding_new'].iloc[0].shape)
print(dev_valid_records['final_face_embedding'].iloc[0].shape)
print(dev_valid_records['fused_embedding'].iloc[0].shape)

print(test_valid_records['bert_embedding_new'].iloc[0].shape)
print(test_valid_records['final_face_embedding'].iloc[0].shape)
print(test_valid_records['fused_embedding'].iloc[0].shape)

(768,)
(512,)
(1280,)
(768,)
(512,)
(1280,)
(768,)
(512,)
(1280,)


6. Run the fused embeddings on some standard models

In [15]:
train_df = train_valid_records.copy()
dev_df = dev_valid_records.copy()
test_df = test_valid_records.copy()

In [16]:
# Feature matrices (X) and label vectors (y)
X_train = np.stack(train_df['fused_embedding'].values)
y_train = train_df['Sentiment'].values

X_dev = np.stack(dev_df['fused_embedding'].values)
y_dev = dev_df['Sentiment'].values

X_test = np.stack(test_df['fused_embedding'].values)
y_test = test_df['Sentiment'].values

# Encode sentiments into integers
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_dev = label_encoder.transform(y_dev)
y_test = label_encoder.transform(y_test)

6a. Logistic Regression

In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_dev)
print(classification_report(y_dev, y_pred, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

    negative       0.66      0.55      0.60       401
     neutral       0.69      0.82      0.75       466
    positive       0.53      0.49      0.51       225

    accuracy                           0.65      1092
   macro avg       0.63      0.62      0.62      1092
weighted avg       0.65      0.65      0.64      1092



In [None]:
y_test_pred = clf.predict(X_test)
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

    negative       0.64      0.54      0.59       815
     neutral       0.73      0.81      0.77      1226
    positive       0.59      0.57      0.58       514

    accuracy                           0.68      2555
   macro avg       0.65      0.64      0.64      2555
weighted avg       0.67      0.68      0.67      2555



6b. Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_dev)
print(classification_report(y_dev, y_pred_rf, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

    negative       0.66      0.47      0.55       401
     neutral       0.61      0.91      0.73       466
    positive       0.71      0.34      0.46       225

    accuracy                           0.63      1092
   macro avg       0.66      0.57      0.58      1092
weighted avg       0.65      0.63      0.61      1092



In [None]:
y_test_pred = clf.predict(X_test)
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

    negative       0.64      0.54      0.59       815
     neutral       0.73      0.81      0.77      1226
    positive       0.59      0.57      0.58       514

    accuracy                           0.68      2555
   macro avg       0.65      0.64      0.64      2555
weighted avg       0.67      0.68      0.67      2555



6c. Fully-connected neural networks with dropouts

In [18]:
from tensorflow.keras.utils import to_categorical

# One-hot encode for Keras
y_train_cat = to_categorical(y_train)
y_dev_cat = to_categorical(y_dev)
y_test_cat = to_categorical(y_test)

num_classes = y_train_cat.shape[1]
input_dim = X_train.shape[1]

In [19]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, Activation, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
model = Sequential([
    Input(shape=(input_dim,)),

    Dense(256),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.1),

    Dense(128),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.1),

    Dense(num_classes, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Early stopping to avoid overfitting
early_stop = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(X_train, y_train_cat,
                    validation_data=(X_dev, y_dev_cat),
                    epochs=50,
                    batch_size=32,
                    callbacks=[early_stop],
                    verbose=1)

test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.5851 - loss: 0.9393 - val_accuracy: 0.6603 - val_loss: 0.7728
Epoch 2/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.7008 - loss: 0.7104 - val_accuracy: 0.6502 - val_loss: 0.8386
Epoch 3/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.7258 - loss: 0.6540 - val_accuracy: 0.6264 - val_loss: 0.8736
Epoch 4/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.7436 - loss: 0.6213 - val_accuracy: 0.6346 - val_loss: 0.8592
Epoch 5/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.7692 - loss: 0.5609 - val_accuracy: 0.6392 - val_loss: 0.8633
Epoch 6/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.7995 - loss: 0.5095 - val_accuracy: 0.6310 - val_loss: 0.8895
Test accuracy: 0.6853


In [None]:
X_train_cnn = X_train.reshape(-1, 1280, 1)
X_dev_cnn   = X_dev.reshape(-1, 1280, 1)
X_test_cnn  = X_test.reshape(-1, 1280, 1)

model = Sequential([
    Conv1D(64, 5, activation='relu', input_shape=(1280, 1)),
    MaxPooling1D(2),
    Conv1D(128, 5, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

early_stop = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(X_train_cnn, y_train_cat,
                    validation_data=(X_dev_cnn, y_dev_cat),
                    epochs=50,
                    batch_size=32,
                    callbacks=[early_stop],
                    verbose=1)

test_loss, test_acc = model.evaluate(X_test_cnn, y_test_cat, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 259ms/step - accuracy: 0.5412 - loss: 0.9707 - val_accuracy: 0.6639 - val_loss: 0.7944
Epoch 2/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 270ms/step - accuracy: 0.6672 - loss: 0.7929 - val_accuracy: 0.6548 - val_loss: 0.7803
Epoch 3/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 256ms/step - accuracy: 0.6886 - loss: 0.7517 - val_accuracy: 0.6740 - val_loss: 0.7679
Epoch 4/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 258ms/step - accuracy: 0.6919 - loss: 0.7248 - val_accuracy: 0.6456 - val_loss: 0.7839
Epoch 5/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 259ms/step - accuracy: 0.7077 - loss: 0.6934 - val_accuracy: 0.6630 - val_loss: 0.7741
Epoch 6/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 254ms/step - accuracy: 0.7244 - loss: 0.6586 - val_accuracy: 0.6557 - val_loss: 0.7820
Epoch 7/5

In [None]:
# Prepare input data by splitting them
X_train_text = X_train[:, :512]
X_train_face = X_train[:, 512:]

X_dev_text = X_dev[:, :512]
X_dev_face = X_dev[:, 512:]

X_test_text = X_test[:, :512]
X_test_face = X_test[:, 512:]

# Text input branch (BERT)
input_text = Input(shape=(512,), name='bert_input')
x_text = Dense(256, activation='relu')(input_text)
x_text = Dropout(0.3)(x_text)
x_text = Dense(128, activation='relu')(x_text)

# Face input branch (FaceNet)
input_face = Input(shape=(768,), name='facenet_input')
x_face = Dense(384, activation='relu')(input_face)
x_face = Dropout(0.3)(x_face)
x_face = Dense(128, activation='relu')(x_face)

# Fusion layer
x = Concatenate()([x_text, x_face])  # Output shape: (256,)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)

# Final output layer
output = Dense(num_classes, activation='softmax')(x)

# Build model
model = Model(inputs=[input_text, input_face], outputs=output)

# Compile model
model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Fit the model
early_stop = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(
    {'bert_input': X_train_text, 'facenet_input': X_train_face},
    y_train_cat,
    validation_data=(
        {'bert_input': X_dev_text, 'facenet_input': X_dev_face},
        y_dev_cat
    ),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Evaluation
test_loss, test_acc = model.evaluate(
    {'bert_input': X_test_text, 'facenet_input': X_test_face},
    y_test_cat,
    verbose=0
)
print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.5681 - loss: 0.9211 - val_accuracy: 0.6529 - val_loss: 0.7856
Epoch 2/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.6707 - loss: 0.7722 - val_accuracy: 0.6676 - val_loss: 0.7664
Epoch 3/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.6790 - loss: 0.7641 - val_accuracy: 0.6603 - val_loss: 0.7729
Epoch 4/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.6923 - loss: 0.7367 - val_accuracy: 0.6703 - val_loss: 0.7626
Epoch 5/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.6971 - loss: 0.7248 - val_accuracy: 0.6676 - val_loss: 0.7570
Epoch 6/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.7075 - loss: 0.6971 - val_accuracy: 0.6493 - val_loss: 0.7838
Epoch 7/50
[1m308/308

7. Compare results with baseline model (text-only)