In [14]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from scipy import signal
from scipy import interpolate
from matplotlib import pyplot as plt
import os

##### AU BS lagMeasure
this file created the AU26-JawOpen-lagMeasure.csv
it compares blendshapes csv for each script from Miyawaki dataset and Action Unit.csv obtained with OpenFace and video in Miyawaki's folders
the lag is computed with cross correlation and stored for each script of each participant in this lagMeasure.csv


In [38]:
def compute_framerate(data) -> float:
    '''
    returns the framerate of timecode array that starts from 0
    '''
    n_seconds = np.sum(np.diff(data))
    n_frames = data.shape[0]-1
    framerate = n_frames / n_seconds
    return framerate

def low_pass_filter(csv_file: str, window_ms: int=100) -> pd.DataFrame:
    '''
    low pass filter the data to filter noise using a sliding windows of 100ms by default
    outputs a new csv file
    '''
    window_size = int(window_ms*60/1000)
    bs_df = pd.read_csv(csv_file, sep=', ')
    #drop the frame columnm, as it is not useful anymore
    rolling = bs_df.drop("frame", axis=1).rolling(window_size).mean()
    return rolling

def cross_corr_with_savgol_filter(signal1, signal2, window_size: int) -> tuple[list[float], list[float]]:
    '''
    returns the correlation and the lags in a list
    '''
    #Before cross correlation, a savgol filter is applied to smooth high frequencies
    y_au_filtered = signal.savgol_filter(signal1, window_size, 1)
    y_bs_filtered = signal.savgol_filter(signal2.to_numpy().flatten(), window_size, 1)
    correlation = signal.correlate(
        y_au_filtered-np.mean(y_au_filtered), y_bs_filtered-np.mean(y_bs_filtered), mode="full"
    ) #substracting the mean makes computing more accurate
    #The lag is the refers to how far the series are offset
    lags = signal.correlation_lags(len(y_au_filtered), len(y_bs_filtered), mode="full")
    return [correlation, lags]

In [39]:
#Header [
#     blendshape_scriptID,average_frame_rate_OF,average_frame_rate_ARKIT,
#     n_frames_OF,n_frame_ARKIT,delay_ms,max_corr
# ]
#Title of the .csv : AU26-JawOpen-lagMeasure.csv

path = "C:/Users/Tony/Documents/TestData"
list_participants_id_path = [f.path for f in os.scandir(path) if f.is_dir() and "." not in f.name]
#os.path.basename(list_participants_id_path[0])

def find_corresponding_blendshape_csv(participants_id_path, filepath):
    filename = os.path.basename(filepath)
    path_to_bs_csv = participants_id_path+"/csv_whole/individual"
    emotion_intensity = filename.split('!')[1]
    emotion_number = filename.split('!')[2]
    file = [f.path for f in os.scandir(path_to_bs_csv) if emotion_intensity in f.name and emotion_number in f.name]
    return file[0]


def compute_correlation_and_delay(y_au, y_bs, ws_low_pass_filter, ws_savgol_filter):
    #Low pass filter of a windows size of 100
    y1_tmp = y_au.rolling(ws_low_pass_filter).mean().dropna()
    y2_tmp = y_bs.rolling(ws_low_pass_filter).mean().dropna()

    y_au = y_au.rolling(ws_low_pass_filter).mean().fillna(np.mean(y1_tmp))
    y_bs = y_bs.rolling(ws_low_pass_filter).mean().fillna(np.mean(y2_tmp))

    #--Cross correlation--
    correlation, lags = cross_corr_with_savgol_filter(y_au, y_bs, ws_savgol_filter)
    #We get the lag at the peak of the correlation, when both signal correlate the best
    lag = lags[np.argmax(abs(correlation))]
    return [np.max(correlation), lag]

def create_lag_measure_dataset(participants_id_path, au_name, bs_name):
    columns = [
        'participant_id', 'blendshape_scriptID','average_frame_rate_OF','average_frame_rate_ARKIT',
        'n_frames_OF','n_frame_ARKIT','max_lag','delay_ms','max_corr'
    ]
    dataframe_res = pd.DataFrame(columns=columns)

    for participant_path in tqdm(participants_id_path):
        list_of_openface_csvs = [
            f.path for f in os.scandir(participant_path) if f.is_file and f.name.endswith('.csv')
        ]
        participant_id = os.path.basename(participant_path)

        for openface_csv in tqdm(list_of_openface_csvs):
            blendshape_csv = find_corresponding_blendshape_csv(participant_path, openface_csv)
            au_df = pd.read_csv(openface_csv, sep=", ", engine ='python')
            bs_df = pd.read_csv(blendshape_csv)
            x_au = au_df["timestamp"]
            y_au = au_df[au_name]
            x_bs = bs_df["Timecode"]
            y_bs = bs_df[bs_name]
            x_bs = pd.to_datetime(x_bs, format="%Y-%m-%d %H:%M:%S.%f")
            x_bs -= x_bs[0]
            y_au/=5 # normalization between 0 and 1

            framerate_au = compute_framerate(x_au)
            framerate_bs = compute_framerate(x_bs.dt.total_seconds())
            n_frames_au = x_au.shape[0]
            n_frames_bs = x_bs.shape[0]
            max_corr, max_lag = compute_correlation_and_delay(y_au, y_bs, 6, 4)
            delay_ms = abs(max_lag)/framerate_bs
            scriptId = os.path.basename(blendshape_csv).rsplit("_", 1)[0]

            dictionary = {
                "participant_id":participant_id, "blendshape_scriptID": scriptId,
                'average_frame_rate_OF': framerate_au, 'average_frame_rate_ARKIT': framerate_bs,
                'n_frames_OF': n_frames_au, 'n_frame_ARKIT': n_frames_bs, 'max_lag': max_lag,
                'delay_ms': delay_ms, 'max_corr': max_corr
            }
            dataframe_res = pd.concat(
                [dataframe_res, pd.DataFrame.from_records([dictionary])], ignore_index=True
            ) #append is deprecated, concat is now used
    
    dataframe_name = "%s-%s-lagMeasure.csv" % (au_name, bs_name)
    dataframe_res.to_csv(
        dataframe_name, index=False
    ) #Index=False removes the unamed index column that is added by default
    

create_lag_measure_dataset(list_participants_id_path, "AU26_r", "JawOpen")

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]

  0%|          | 0/420 [00:00<?, ?it/s]