<h1>All Respondents Analysis with Grades</h1>

<h2>Read All Respondents Data</h2>

In [22]:
import os
import pandas as pd

# Specify the folder path where the CSV files are located
folder_path = "all respondents data"

# Initialize an empty dataframe to store the combined data
combined_data = pd.DataFrame()

# Loop through each file in the folder with .csv extension and append to the combined_data dataframe
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, low_memory=False)
        respondent_num = filename.split('_')[0]  # Get the first part of the filename before the first '_'
        df['Respondent'] = respondent_num  # Add a new column with the respondent number
        combined_data = pd.concat([combined_data, df])

columns_to_remove = ['Row','Timestamp','EventSource','SlideEvent','StimType','Duration',
                     'CollectionPhase','SourceStimuliName','EventSource','SampleNumber','EventSource.1',
                     'Blink', 'BlinkRate', 'EventSource.2', 'ET_TimeSignal', 'EventSource.3']
combined_data.drop(columns_to_remove, axis=1, inplace=True)

# Print the combined data
print(combined_data)


           Anger  Contempt   Disgust      Fear       Joy   Sadness  Surprise  \
0            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
2            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
3            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
4            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
...          ...       ...       ...       ...       ...       ...       ...   
147733       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
147734  0.195341  0.187637  0.027562  0.130503  0.043328  0.117269  0.057809   
147735  0.245047  0.204197  0.028027  0.133038  0.041089  0.118906  0.057576   
147736       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
147737       NaN       NaN       NaN       NaN       NaN       NaN       NaN   

        Engagement  Valence  Sentimenta

In [23]:
import pandas as pd

# Read in the Grades.csv file
grades_df = pd.read_csv('Grades.csv')

# Remove the trailing underscore from the "Respondent" column in the grades_df dataframe
grades_df['Respondent'] = grades_df['Respondent'].str.rstrip('_')

# Merge the combined_data and grades_df dataframes based on the "Respondent" column
combined_data_with_grades = pd.merge(combined_data, grades_df[['Respondent', 'Grade']], on='Respondent')

# Print the resulting dataframe with the added "Grade" column
print(combined_data_with_grades)


            Anger  Contempt   Disgust      Fear       Joy   Sadness  Surprise  \
0             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
2             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
3             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
4             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
...           ...       ...       ...       ...       ...       ...       ...   
1533697       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1533698  0.195341  0.187637  0.027562  0.130503  0.043328  0.117269  0.057809   
1533699  0.245047  0.204197  0.028027  0.133038  0.041089  0.118906  0.057576   
1533700       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1533701       NaN       NaN       NaN       NaN       NaN       NaN       NaN   

         Engagement  Valenc

In [24]:
print(combined_data_with_grades.columns)

Index(['Anger', 'Contempt', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise',
       'Engagement', 'Valence', 'Sentimentality', 'Confusion', 'Neutral',
       'Attention', 'Brow Furrow', 'Brow Raise', 'Cheek Raise', 'Chin Raise',
       'Dimpler', 'Eye Closure', 'Eye Widen', 'Inner Brow Raise', 'Jaw Drop',
       'Lip Corner Depressor', 'Lip Press', 'Lip Pucker', 'Lip Stretch',
       'Lip Suck', 'Lid Tighten', 'Mouth Open', 'Nose Wrinkle', 'Smile',
       'Smirk', 'Upper Lip Raise', 'Pitch', 'Yaw', 'Roll',
       'Interocular Distance', 'ET_PupilLeft', 'ET_PupilRight',
       'ET_DistanceLeft', 'ET_DistanceRight', 'ET_GazeLeftx', 'ET_GazeLefty',
       'ET_GazeRightx', 'ET_GazeRighty', 'ET_ValidityLeftEye',
       'ET_ValidityRightEye', 'ET_CameraLeftX', 'ET_CameraLeftY',
       'ET_CameraRightX', 'ET_CameraRightY', 'Gaze X', 'Gaze Y',
       'Interpolated Gaze X', 'Interpolated Gaze Y', 'Interpolated Distance',
       'Gaze Velocity', 'Gaze Acceleration', 'Fixation Index',
       'Fix

<h2>Facial Expressions Analysis</h2>

<h3>Data Pre-processing</h3>

In [25]:
import pandas as pd

facial_expressions_columns = ['Respondent', 'Grade','Anger', 'Contempt', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise',
       'Engagement', 'Valence', 'Sentimentality', 'Confusion', 'Neutral',
       'Attention', 'Brow Furrow', 'Brow Raise', 'Cheek Raise', 'Chin Raise',
       'Dimpler', 'Eye Closure', 'Eye Widen', 'Inner Brow Raise', 'Jaw Drop',
       'Lip Corner Depressor', 'Lip Press', 'Lip Pucker', 'Lip Stretch',
       'Lip Suck', 'Lid Tighten', 'Mouth Open', 'Nose Wrinkle', 'Smile',
       'Smirk', 'Upper Lip Raise', 'Pitch', 'Yaw',
       'Roll']
# Creating a copy df with only the facial expressions data
facial_expressions_data = combined_data_with_grades[facial_expressions_columns].copy()

# Remove rows with all NaN values, except in the 'Respondent' column
facial_expressions_data = facial_expressions_data.dropna(how='all', 
                                                         subset=facial_expressions_data.columns.difference(['Respondent']))

facial_expressions_data.head()



Unnamed: 0,Respondent,Grade,Anger,Contempt,Disgust,Fear,Joy,Sadness,Surprise,Engagement,...,Lip Suck,Lid Tighten,Mouth Open,Nose Wrinkle,Smile,Smirk,Upper Lip Raise,Pitch,Yaw,Roll
0,1,100.0,,,,,,,,,...,,,,,,,,,,
1,1,100.0,,,,,,,,,...,,,,,,,,,,
2,1,100.0,,,,,,,,,...,,,,,,,,,,
3,1,100.0,,,,,,,,,...,,,,,,,,,,
4,1,100.0,,,,,,,,,...,,,,,,,,,,


In [26]:

# Add a new column to the combined_data_with_grades dataframe that indicates whether the grade is above or below 55
combined_data_with_grades['Grade Group'] = combined_data_with_grades['Grade'].apply(lambda x: 'Above 55' if x >= 55 else 'Below 55')

# Describe statistics for each selected column by grade group
selected_columns = ['Anger', 'Contempt', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise',
       'Engagement', 'Valence', 'Sentimentality', 'Confusion']
grouped_stats = combined_data_with_grades.groupby(['Grade Group'])[selected_columns].describe()

# Print the resulting statistics dataframe
print(grouped_stats)

                Anger                                                         \
                count      mean       std  min       25%       50%       75%   
Grade Group                                                                    
Above 55     772163.0  1.857466  6.756095  0.0  0.139024  0.279097  1.192089   
Below 55     164754.0  1.469313  4.383644  0.0  0.141767  0.276596  1.190347   

                        Contempt            ... Sentimentality             \
                   max     count      mean  ...            75%        max   
Grade Group                                 ...                             
Above 55     97.433800  772163.0  1.751553  ...       0.386151  99.858139   
Below 55     95.615936  164754.0  2.367286  ...       0.295878  93.977684   

            Confusion                                                         \
                count      mean       std  min       25%       50%       75%   
Grade Group                                          

In [28]:
import seaborn as sns

# Reshape the dataframe for visualization using a heatmap
grouped_stats_visual = pd.melt(grouped_stats.T.reset_index(), id_vars=['level_1'], value_vars=selected_columns)
grouped_stats_visual['value'] = grouped_stats_visual['value'].astype(float)
grouped_stats_visual['variable'] = grouped_stats_visual['level_1'] + ' (' + grouped_stats_visual['Grade Group'] + ')'
grouped_stats_visual = grouped_stats_visual[['variable', 'value']]
grouped_stats_visual = grouped_stats_visual.pivot(index='variable', columns='Grade Group', values='value')

# Create a heatmap of the statistics using seaborn
sns.heatmap(grouped_stats_visual, cmap='coolwarm', annot=True, fmt='.2f', center=0)


KeyError: "The following 'value_vars' are not present in the DataFrame: ['Anger', 'Confusion', 'Contempt', 'Disgust', 'Engagement', 'Fear', 'Joy', 'Sadness', 'Sentimentality', 'Surprise', 'Valence']"