In [1]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Initialize an empty DataFrame to store processed data
processed_data = pd.DataFrame(columns=["InterviewID", "Smile Intensity", "Head Nod", "Head Shake"])

# Initialize a StandardScaler
scaler = StandardScaler()

def get_df(directory_path):
    # Iterate through all files in the directory
    for filename in os.listdir(directory_path):
        if os.path.isfile(os.path.join(directory_path, filename)):
            # Load the file into a DataFrame
            file_path = os.path.join(directory_path, filename)
            header_columns = ["Smile Intensity", "Head Nod", "Head Shake", "Unknown1", "Unknown2"]
            df = pd.read_csv(file_path, delimiter=' ', header=None)
            df.columns = header_columns
            df.drop(["Unknown1", "Unknown2"], axis=1, inplace=True)
            smile_intensity_mean = df["Smile Intensity"].mean()
            head_nod_mean = df["Head Nod"].mean()
            head_shake_mean = df["Head Shake"].mean()
            interview_id = filename.split("-")[-1][:-4]
            if "post" in directory_path:
                interview_id = "P" + interview_id
            new_row = pd.DataFrame([{"InterviewID": interview_id, "Smile Intensity": smile_intensity_mean, "Head Nod": head_nod_mean, "Head Shake": head_shake_mean}])
            global processed_data
            processed_data = pd.concat([processed_data, new_row], ignore_index=True)


# Specify the directory path
directory_path_pre = "data/SmileData/pre"
directory_path_post = "data/SmileData/post"
get_df(directory_path_pre)
get_df(directory_path_post)

# Scale the data
processed_data[["Smile Intensity", "Head Nod", "Head Shake"]] = scaler.fit_transform(processed_data[["Smile Intensity", "Head Nod", "Head Shake"]])
processed_data

Unnamed: 0,InterviewID,Smile Intensity,Head Nod,Head Shake
0,P56,0.495387,-0.061620,0.206531
1,P89,0.243248,-0.901884,1.377121
2,P77,-1.292251,-1.034380,-0.296351
3,P67,-0.772395,0.208545,-0.154197
4,P7,-1.143194,-0.681835,-1.034843
...,...,...,...,...
133,PP49,-0.447756,0.820183,0.232010
134,PP79,1.318404,4.508461,0.004228
135,PP25,-0.896228,-0.143658,-0.264959
136,PP37,0.756104,0.281020,-1.007829


In [2]:
output_csv_path = "pp_data/smile_data.csv"
processed_data.to_csv(output_csv_path, index=False)
print(f"Processed data saved to {output_csv_path}")
 

Processed data saved to pp_data/smile_data.csv
