In [1]:
import os
import re
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
run = 1 # example run

In [3]:
deriv_dir = "/home/exp-psy/Desktop/study_face_tracks/derivatives"
df_path = os.path.join(deriv_dir, "reference_face-emotions", f"emotions_av_1s_events_run-{run}_events.tsv")

## First: Use the Annotation to create Character Events

In [4]:
in_df = pd.read_csv(df_path, sep="\t", index_col=0)
in_df.head(10)

Unnamed: 0_level_0,duration,character,arousal,valence_positive,valence_negative,c_audio,c_context,c_face,c_gesture,c_narrator,...,e_hope,e_love,e_pity/compassion,e_pride,e_relief,e_remorse,e_resent,e_sadness,e_satisfaction,e_shame
onset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
192.0,12.0,FORREST,-0.666667,0.666667,0.0,0.111111,0.0,0.444444,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204.0,2.0,FORREST,-0.777778,0.777778,0.0,0.111111,0.0,0.444444,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
206.0,3.0,FORREST,-0.666667,0.888889,0.0,0.222222,0.0,0.444444,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
209.0,16.0,FORREST,-0.333333,0.555556,0.0,0.111111,0.0,0.333333,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
225.0,15.0,FORREST,-0.555556,0.555556,0.0,0.111111,0.0,0.222222,0.222222,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
238.0,1.0,WOMAN,-0.555556,0.0,0.555556,0.0,0.0,0.222222,0.222222,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
239.0,1.0,WOMAN,-0.555556,0.0,0.555556,0.0,0.0,0.222222,0.222222,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
240.0,1.0,WOMAN,-0.666667,0.0,0.666667,0.0,0.0,0.222222,0.222222,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
241.0,1.0,WOMAN,-0.555556,0.0,0.555556,0.0,0.0,0.222222,0.222222,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
245.0,15.0,FORREST,-0.555556,0.555556,0.0,0.111111,0.0,0.333333,0.333333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# correct faulty onset column
if in_df.index.name == "onset" or "onset" not in in_df.columns:
    in_df = in_df.reset_index()

In [6]:
in_df.drop(in_df[in_df["duration"] <= 2].index, inplace=True)
in_df.head(10)

Unnamed: 0,onset,duration,character,arousal,valence_positive,valence_negative,c_audio,c_context,c_face,c_gesture,...,e_hope,e_love,e_pity/compassion,e_pride,e_relief,e_remorse,e_resent,e_sadness,e_satisfaction,e_shame
0,192.0,12.0,FORREST,-0.666667,0.666667,0.0,0.111111,0.0,0.444444,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,206.0,3.0,FORREST,-0.666667,0.888889,0.0,0.222222,0.0,0.444444,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,209.0,16.0,FORREST,-0.333333,0.555556,0.0,0.111111,0.0,0.333333,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,225.0,15.0,FORREST,-0.555556,0.555556,0.0,0.111111,0.0,0.222222,0.222222,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,245.0,15.0,FORREST,-0.555556,0.555556,0.0,0.111111,0.0,0.333333,0.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,253.0,7.0,WOMAN,-0.555556,0.444444,0.111111,0.0,0.0,0.444444,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,275.0,3.0,FORREST,0.111111,0.111111,0.444444,0.0,0.111111,0.555556,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,278.0,7.0,FORREST,0.111111,0.222222,0.555556,0.0,0.222222,0.777778,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,285.0,3.0,FORREST,0.111111,0.333333,0.444444,0.0,0.111111,0.777778,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,308.0,3.0,MRSGUMP,-0.111111,0.333333,0.222222,0.0,0.0,0.555556,0.111111,...,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
event_dfs = []

In [8]:
df_events = in_df[["onset", "duration", "character"]].copy()

In [9]:
for character, group in df_events.groupby("character"):
    group = group.reset_index(drop=True)
    group["trial_type"] = [f"{character}{i+1}" for i in range(len(group))]
    event_dfs.append(group[["onset", "duration", "trial_type"]])

In [10]:
individual_faces_df = pd.concat(event_dfs).sort_values("onset").reset_index(drop=True)
individual_faces_df.to_csv(
    os.path.join(deriv_dir, "reference_face-emotions", f"run-0{run}_adjusted-single-characters.tsv"), 
    sep="\t", 
    index=False
)

In [11]:
individual_faces_df.head(10)

Unnamed: 0,onset,duration,trial_type
0,192.0,12.0,FORREST1
1,206.0,3.0,FORREST2
2,209.0,16.0,FORREST3
3,225.0,15.0,FORREST4
4,245.0,15.0,FORREST5
5,253.0,7.0,WOMAN1
6,275.0,3.0,FORREST6
7,278.0,7.0,FORREST7
8,285.0,3.0,FORREST8
9,308.0,3.0,MRSGUMP1


In [12]:
# loop over all to get character events
for run in range(1, 9):
    df_path = os.path.join(deriv_dir, "reference_face-emotions", f"emotions_av_1s_events_run-{run}_events.tsv")
    in_df = pd.read_csv(df_path, sep="\t", index_col=0)
    
    # correct faulty onset column
    if in_df.index.name == "onset" or "onset" not in in_df.columns:
        in_df = in_df.reset_index()
        
    event_dfs = []
    df_events = in_df[["onset", "duration", "character"]].copy()
    
    for character, group in df_events.groupby("character"):
        group = group.reset_index(drop=True)
        group["trial_type"] = [f"{character}{i+1}" for i in range(len(group))]
        event_dfs.append(group[["onset", "duration", "trial_type"]])

    individual_faces_df = pd.concat(event_dfs).sort_values("onset").reset_index(drop=True)
    
    # drop rows smaller than or equal to 2
    individual_faces_df.drop(individual_faces_df[individual_faces_df["duration"] <= 2].index, inplace=True)
    
    individual_faces_df.to_csv(
        os.path.join(deriv_dir, "reference_face-emotions", f"run-0{run}_adjusted-single-characters.tsv"), 
        sep="\t", 
        index=False
    )

## Calculate pairwise distances for Emotion Annotations

In [13]:
df_dropped = in_df.drop(columns=["character"])
columns = ["arousal", "valence_positive", "valence_negative", "e_sadness", "e_happiness"]

In [14]:
output_dir = "/home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms"
os.makedirs(output_dir, exist_ok=True)

In [15]:
# loop over all runs and all target columns
for run in range(1, 9):
    df_path = os.path.join(deriv_dir, "reference_face-emotions", f"emotions_av_1s_events_run-{run}_events.tsv")
    in_df = pd.read_csv(df_path, sep="\t", index_col=0)
    
    # correct faulty onset column
    if in_df.index.name == "onset" or "onset" not in in_df.columns:
        in_df = in_df.reset_index()
        
    # drop rows smaller than or equal to 2
    in_df.drop(in_df[in_df["duration"] <= 2].index, inplace=True)
    
    for col in columns:
        data = in_df[[col]]
        dist_array = pdist(data, metric="euclidean")
        dist_matrix = squareform(dist_array)
    
        npy_path = os.path.join(output_dir, f"run-{run}_{col}_distance-matrix.npy")
        np.save(npy_path, dist_matrix)
        print(f"saved at {npy_path}")
    
        # plot each heatmap
        plt.figure(figsize=(8, 6))
        sns.heatmap(dist_matrix, cmap="RdBu_r", square=True, cbar_kws={"label": "Euclidean distance"})
        plt.title(f"run-{run}: {col}")
        plt.xlabel("")
        plt.ylabel("")
    
        plot_path = os.path.join(output_dir, f"run-{run}_{col}_distance-matrix.png")
        plt.savefig(plot_path)
        plt.close()

saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-1_arousal_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-1_valence_positive_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-1_valence_negative_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-1_e_sadness_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-1_e_happiness_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-2_arousal_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-2_valence_positive_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-2_valence_negative_distance-matrix.npy
saved 

## Create single-character Hypothesis RDMs

In [16]:
def create_character_rdm(df, save_path="character_rdm.png"):
    """
    Create and plot a symmetric RDM where each character is maximally dissimilar
    from others and self-similar across their trials.

    Parameters
    ----------
    df : DataFrame
        Must contain a 'trial_type' column with string labels like 'FORREST25'.
    save_path : str
        Path to save the RDM image.

    Returns
    -------
    rdm : ndarray
        Symmetric RDM (n_trials x n_trials) with 0 for same character, 1 for different.
    """

    trial_types = df['trial_type'].tolist()
    characters = [re.match(r"[A-Z]+", t).group(0) for t in trial_types]

    # Create a (n x n) matrix: 0 if same character, 1 if different
    n = len(characters)
    rdm = np.zeros((n, n))

    for i in range(n):
        for j in range(n):
            rdm[i, j] = 0 if characters[i] == characters[j] else 1 
    return rdm, trial_types

In [17]:
for run in range(1, 9):
    # get face identity matrix
    npy_path = os.path.join(output_dir, f"run-{run}_face-identity_distance-matrix.npy")
    single_char_rdm, trial_types = create_character_rdm(
        pd.read_csv(
        os.path.join(deriv_dir, "reference_face-emotions", f"run-0{run}_adjusted-single-characters.tsv"), 
    sep="\t")
    )
    np.save(npy_path, dist_matrix)
    print(f"saved at {npy_path}")

    # save corresponding plot
    plot_path = os.path.join(output_dir, f"run-{run}_face-identity_distance-matrix.png")
    plt.figure(figsize=(12, 10))
    sns.heatmap(single_char_rdm, 
                square=True, 
                # annot=True, 
                cmap="RdBu_r",
                xticklabels=trial_types, 
                yticklabels=trial_types,
                cbar_kws={"label": "Dissimilarity"})
    plt.title("Character Identity RDM (0 = same, 1 = different)")
    plt.tight_layout()
    plt.savefig(plot_path, dpi=300)
    plt.close()

saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-1_face-identity_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-2_face-identity_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-3_face-identity_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-4_face-identity_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-5_face-identity_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-6_face-identity_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-7_face-identity_distance-matrix.npy
saved at /home/exp-psy/Desktop/study_face_tracks/derivatives/model_rdms/emotion_rdms/run-8_face-identity_distance-matrix.npy
