1. Mount Google Drive environment

In [2]:
# Mount Google Drive
# Re-run this cell every time you open this notebook
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


2. Look at text data

In [3]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array

pd.set_option('display.max_colwidth', None)   # Don't truncate the data when printed

In [3]:
path = '/content/drive/MyDrive/CS5344 Project/Data and Codes/'
train_df = pd.read_csv(os.path.join(path, "train_embedding.csv"))
dev_df = pd.read_csv(os.path.join(path, "dev_embedding.csv"))
test_df = pd.read_csv(os.path.join(path, "test_embedding.csv"))

In [4]:
display(train_df.info())
display(dev_df.info())
display(test_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9989 entries, 0 to 9988
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  9989 non-null   int64 
 1   Utterance               9989 non-null   object
 2   Speaker                 9989 non-null   object
 3   Emotion                 9989 non-null   object
 4   Sentiment               9989 non-null   object
 5   Dialogue_ID             9989 non-null   int64 
 6   Utterance_ID            9989 non-null   int64 
 7   Season                  9989 non-null   int64 
 8   Episode                 9989 non-null   int64 
 9   StartTime               9989 non-null   object
 10  EndTime                 9989 non-null   object
 11  cleaned_text            9989 non-null   object
 12  cleaned_text_new        9989 non-null   object
 13  word2vec_embedding      9989 non-null   object
 14  word2vec_embedding_new  9989 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1109 entries, 0 to 1108
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  1109 non-null   int64 
 1   Utterance               1109 non-null   object
 2   Speaker                 1109 non-null   object
 3   Emotion                 1109 non-null   object
 4   Sentiment               1109 non-null   object
 5   Dialogue_ID             1109 non-null   int64 
 6   Utterance_ID            1109 non-null   int64 
 7   Season                  1109 non-null   int64 
 8   Episode                 1109 non-null   int64 
 9   StartTime               1109 non-null   object
 10  EndTime                 1109 non-null   object
 11  cleaned_text            1109 non-null   object
 12  cleaned_text_new        1109 non-null   object
 13  word2vec_embedding      1109 non-null   object
 14  word2vec_embedding_new  1109 non-null   object
 15  bert

None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2610 entries, 0 to 2609
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Sr No.                  2610 non-null   int64 
 1   Utterance               2610 non-null   object
 2   Speaker                 2610 non-null   object
 3   Emotion                 2610 non-null   object
 4   Sentiment               2610 non-null   object
 5   Dialogue_ID             2610 non-null   int64 
 6   Utterance_ID            2610 non-null   int64 
 7   Season                  2610 non-null   int64 
 8   Episode                 2610 non-null   int64 
 9   StartTime               2610 non-null   object
 10  EndTime                 2610 non-null   object
 11  cleaned_text            2610 non-null   object
 12  cleaned_text_new        2610 non-null   object
 13  word2vec_embedding      2610 non-null   object
 14  word2vec_embedding_new  2610 non-null   object
 15  bert

None

In [5]:
# Preview 1 record
train_df.iloc[0]

Unnamed: 0,0
Sr No.,1
Utterance,also I was the point person on my companys transition from the KL-5 to GR-6 system.
Speaker,Chandler
Emotion,neutral
Sentiment,neutral
Dialogue_ID,0
Utterance_ID,0
Season,8
Episode,21
StartTime,"00:16:16,059"


3. Look at extracted face images

In [13]:
# Check no. of extracted frames (file counts)
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/train_frames" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/dev_frames" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/test_frames" -type f | wc -l

9987
1112
2747


In [14]:
# Check no. of extracted facial images (file counts)
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/ExtractTestframe0331/OutputTestFrames/1LeadSpeaking/" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/ExtractTestframe0331/OutputTestFrames/2NoLeadUpdate/" -type f | wc -l

2047
676


In [16]:
# Check no. of extracted facial images for largest_face_extraction (file counts)
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/train_face/" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/dev_face/" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/test_face/" -type f | wc -l

9780
1088
2545


**Follow-up required: To extract 1 face from each frame.**

**[below is currently done using only `largest_face_extraction`. to replace with the clustered face]**


4a. To resize extracted faces, and extract features (train set).


In [4]:
!pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting numpy<2.0.0,>=1.24.0 (from facenet-pytorch)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Pillow<10.3.0,>=10.2.0 (from facenet-pytorch)
  Downloading pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet-pytorch)
  Downloading torch-2.2.2-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet-pytorch)
  Downloading torchvision-0.17.2-cp311-cp311-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia

In [1]:
from facenet_pytorch import InceptionResnetV1
import torch

# Load FaceNet (InceptionResnetV1) pretrained on VGGFace2
facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda' if torch.cuda.is_available() else 'cpu')

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [4]:
from torchvision import transforms

# Define FaceNet preprocessing
preprocess = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  # Normalize to [-1, 1] range
])

def get_facenet_embedding(image_path):
    img = Image.open(image_path).convert('RGB')
    img_tensor = preprocess(img).unsqueeze(0)  # Shape: (1, 3, 160, 160)
    img_tensor = img_tensor.to('cuda' if torch.cuda.is_available() else 'cpu')

    with torch.no_grad():
        embedding = facenet(img_tensor)  # Shape: (1, 512)

    return embedding.cpu().numpy()[0]  # Return as 1D numpy array

In [10]:
input_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/train_face/"
output_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/train_face_embeddings/"
os.makedirs(output_dir, exist_ok=True)

image_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.jpg')])

for filename in tqdm(image_files, desc="Extracting FaceNet embeddings"):
    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename.replace(".jpg", ".npy"))

    if os.path.exists(output_path):
        continue  # Skip if already done

    try:
        embedding = get_facenet_embedding(input_path)
        np.save(output_path, embedding)
    except Exception as e:
        print(f"\nError processing {filename}: {e}")

Extracting FaceNet embeddings: 100%|██████████| 9780/9780 [24:25<00:00,  6.68it/s]



4b. To resize extracted faces, and extract features (dev set).


In [14]:
input_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/dev_face/"
output_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/dev_face_embeddings/"
os.makedirs(output_dir, exist_ok=True)

image_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.jpg')])

for filename in tqdm(image_files, desc="Extracting FaceNet embeddings"):
    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename.replace(".jpg", ".npy"))

    if os.path.exists(output_path):
        continue  # Skip if already done

    try:
        embedding = get_facenet_embedding(input_path)
        np.save(output_path, embedding)
    except Exception as e:
        print(f"\nError processing {filename}: {e}")

Extracting FaceNet embeddings: 100%|██████████| 1088/1088 [03:08<00:00,  5.77it/s]



4c. To resize extracted faces, and extract features (test set).


In [15]:
input_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/test_face/"
output_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/test_face_embeddings/"
os.makedirs(output_dir, exist_ok=True)

image_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.jpg')])

for filename in tqdm(image_files, desc="Extracting FaceNet embeddings"):
    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename.replace(".jpg", ".npy"))

    if os.path.exists(output_path):
        continue  # Skip if already done

    try:
        embedding = get_facenet_embedding(input_path)
        np.save(output_path, embedding)
    except Exception as e:
        print(f"\nError processing {filename}: {e}")

Extracting FaceNet embeddings: 100%|██████████| 2545/2545 [06:37<00:00,  6.40it/s]


In [17]:
# Check no. of embedding files
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/train_face_embeddings/" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/dev_face_embeddings/" -type f | wc -l
!find "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/test_face_embeddings/" -type f | wc -l

9780
1088
2545


5. Load all face embeddings

In [32]:
# Paths to embedding directories
train_dir = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/train_face_embeddings/"
dev_dir   = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/dev_face_embeddings/"
test_dir  = "/content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/test_face_embeddings/"

def load_embeddings(embedding_dir):
    files = sorted([f for f in os.listdir(embedding_dir) if f.endswith(".npy")])
    embeddings = []
    filenames = []

    # Use tqdm to show progress for the files being processed
    for file in tqdm(files, desc=f"Loading embeddings from {embedding_dir}", unit="file"):
        path = os.path.join(embedding_dir, file)
        if os.path.getsize(path) > 0:
            try:
                embedding = np.load(path)
                embeddings.append(embedding)
                filenames.append(file.replace(".npy", ""))
            except Exception as e:
                print(f"Error loading {file}: {e}")
        else:
            print(f"Skipping empty file: {file}")

    return np.stack(embeddings), filenames

# Load all three sets with progress bars
train_embedding_matrix, train_filenames = load_embeddings(train_dir)
dev_embedding_matrix, dev_filenames     = load_embeddings(dev_dir)
test_embedding_matrix, test_filenames   = load_embeddings(test_dir)

# Check shapes
print("Train embedding shape:", train_embedding_matrix.shape)
print("Dev embedding shape:", dev_embedding_matrix.shape)
print("Test embedding shape:", test_embedding_matrix.shape)

Loading embeddings from /content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/train_face_embeddings/: 100%|██████████| 9780/9780 [03:31<00:00, 46.13file/s] 
Loading embeddings from /content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/dev_face_embeddings/: 100%|██████████| 1088/1088 [00:12<00:00, 88.17file/s] 
Loading embeddings from /content/drive/MyDrive/CS5344 Project/Data and Codes/largest_face_extraction/test_face_embeddings/: 100%|██████████| 2545/2545 [00:48<00:00, 52.79file/s] 

Train embedding shape: (9780, 512)
Dev embedding shape: (1088, 512)
Test embedding shape: (2545, 512)





In [34]:
# Preview a face embedding
print(train_embedding_matrix[0].shape)
train_embedding_matrix[0]

(512,)


array([ 1.01675615e-02, -5.20633347e-03, -8.79966542e-02, -2.95745954e-02,
        3.95217054e-02, -7.78048038e-02,  4.11245134e-03,  7.76482746e-02,
        5.76562732e-02,  7.28948638e-02, -4.62316908e-03,  2.37834663e-03,
       -8.38192087e-03, -3.15152258e-02,  2.47046687e-02,  3.28805745e-02,
       -3.32478471e-02, -1.00150801e-01, -4.44595749e-03,  4.38431650e-03,
       -4.70193215e-02,  3.09520401e-02, -4.85073328e-02,  1.06837926e-02,
        5.65144187e-03, -9.31808166e-03,  2.85156141e-03, -6.86211735e-02,
        5.26617951e-05,  1.37021542e-02, -2.00342461e-02, -3.31150666e-02,
        2.53708735e-02,  5.16924635e-02, -1.86482025e-03, -6.32485515e-03,
        3.92663665e-02,  2.03678757e-02,  2.47562900e-02, -2.81787626e-02,
       -6.04893900e-02,  4.22001909e-03,  5.64891025e-02,  1.16157793e-02,
       -9.36346352e-02,  4.67403755e-02,  1.94409229e-02, -4.94056232e-02,
        4.10586335e-02,  3.54830846e-02,  7.28997663e-02,  2.51177512e-02,
       -1.03426902e-02, -