<h1>All Respondents Analysis with Grades</h1>

<h2>Read All Respondents Data</h2>

In [22]:
import os
import pandas as pd

# Specify the folder path where the CSV files are located
folder_path = "all respondents data"

# Initialize an empty dataframe to store the combined data
combined_data = pd.DataFrame()

# Loop through each file in the folder with .csv extension and append to the combined_data dataframe
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, low_memory=False)
        respondent_num = filename.split('_')[0]  # Get the first part of the filename before the first '_'
        df['Respondent'] = respondent_num  # Add a new column with the respondent number
        combined_data = pd.concat([combined_data, df])

columns_to_remove = ['Row','Timestamp','EventSource','SlideEvent','StimType','Duration',
                     'CollectionPhase','SourceStimuliName','EventSource','SampleNumber','EventSource.1',
                     'Blink', 'BlinkRate', 'EventSource.2', 'ET_TimeSignal', 'EventSource.3']
combined_data.drop(columns_to_remove, axis=1, inplace=True)

# Print the combined data
print(combined_data)


           Anger  Contempt   Disgust      Fear       Joy   Sadness  Surprise  \
0            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
2            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
3            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
4            NaN       NaN       NaN       NaN       NaN       NaN       NaN   
...          ...       ...       ...       ...       ...       ...       ...   
147733       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
147734  0.195341  0.187637  0.027562  0.130503  0.043328  0.117269  0.057809   
147735  0.245047  0.204197  0.028027  0.133038  0.041089  0.118906  0.057576   
147736       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
147737       NaN       NaN       NaN       NaN       NaN       NaN       NaN   

        Engagement  Valence  Sentimenta

In [23]:
import pandas as pd

# Read in the Grades.csv file
grades_df = pd.read_csv('Grades.csv')

# Remove the trailing underscore from the "Respondent" column in the grades_df dataframe
grades_df['Respondent'] = grades_df['Respondent'].str.rstrip('_')

# Merge the combined_data and grades_df dataframes based on the "Respondent" column
combined_data_with_grades = pd.merge(combined_data, grades_df[['Respondent', 'Grade']], on='Respondent')

# Print the resulting dataframe with the added "Grade" column
print(combined_data_with_grades)


            Anger  Contempt   Disgust      Fear       Joy   Sadness  Surprise  \
0             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
2             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
3             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
4             NaN       NaN       NaN       NaN       NaN       NaN       NaN   
...           ...       ...       ...       ...       ...       ...       ...   
1533697       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1533698  0.195341  0.187637  0.027562  0.130503  0.043328  0.117269  0.057809   
1533699  0.245047  0.204197  0.028027  0.133038  0.041089  0.118906  0.057576   
1533700       NaN       NaN       NaN       NaN       NaN       NaN       NaN   
1533701       NaN       NaN       NaN       NaN       NaN       NaN       NaN   

         Engagement  Valenc

In [24]:
print(combined_data_with_grades.columns)

Index(['Anger', 'Contempt', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise',
       'Engagement', 'Valence', 'Sentimentality', 'Confusion', 'Neutral',
       'Attention', 'Brow Furrow', 'Brow Raise', 'Cheek Raise', 'Chin Raise',
       'Dimpler', 'Eye Closure', 'Eye Widen', 'Inner Brow Raise', 'Jaw Drop',
       'Lip Corner Depressor', 'Lip Press', 'Lip Pucker', 'Lip Stretch',
       'Lip Suck', 'Lid Tighten', 'Mouth Open', 'Nose Wrinkle', 'Smile',
       'Smirk', 'Upper Lip Raise', 'Pitch', 'Yaw', 'Roll',
       'Interocular Distance', 'ET_PupilLeft', 'ET_PupilRight',
       'ET_DistanceLeft', 'ET_DistanceRight', 'ET_GazeLeftx', 'ET_GazeLefty',
       'ET_GazeRightx', 'ET_GazeRighty', 'ET_ValidityLeftEye',
       'ET_ValidityRightEye', 'ET_CameraLeftX', 'ET_CameraLeftY',
       'ET_CameraRightX', 'ET_CameraRightY', 'Gaze X', 'Gaze Y',
       'Interpolated Gaze X', 'Interpolated Gaze Y', 'Interpolated Distance',
       'Gaze Velocity', 'Gaze Acceleration', 'Fixation Index',
       'Fix

<h2>Facial Expressions Analysis</h2>

<h3>Data Pre-processing</h3>

In [43]:
import pandas as pd

facial_expressions_columns = ['Respondent', 'Grade','Anger', 'Contempt', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise',
       'Engagement', 'Valence', 'Sentimentality', 'Confusion', 'Neutral',
       'Attention', 'Brow Furrow', 'Brow Raise', 'Cheek Raise', 'Chin Raise',
       'Dimpler', 'Eye Closure', 'Eye Widen', 'Inner Brow Raise', 'Jaw Drop',
       'Lip Corner Depressor', 'Lip Press', 'Lip Pucker', 'Lip Stretch',
       'Lip Suck', 'Lid Tighten', 'Mouth Open', 'Nose Wrinkle', 'Smile',
       'Smirk', 'Upper Lip Raise', 'Pitch', 'Yaw',
       'Roll']
# Creating a copy df with only the facial expressions data
facial_expressions_data = combined_data_with_grades[facial_expressions_columns].copy()

# Remove rows with all NaN values, except in the 'Respondent' column
facial_expressions_data = facial_expressions_data.dropna(how='all', 
                                                         subset=facial_expressions_data.columns.difference(['Respondent', 'Grade']))

facial_expressions_data.head()



Unnamed: 0,Respondent,Grade,Anger,Contempt,Disgust,Fear,Joy,Sadness,Surprise,Engagement,...,Lip Suck,Lid Tighten,Mouth Open,Nose Wrinkle,Smile,Smirk,Upper Lip Raise,Pitch,Yaw,Roll
5,1,100.0,0.125566,0.186393,0.023706,0.125796,0.025453,0.125512,0.055428,0.328766,...,0.000806,0.002685,0.027488,0.008689,0.443331,0.0,0.000345,6.757946,-1.150817,-0.977193
7,1,100.0,0.125833,0.186909,0.023738,0.126169,0.025312,0.125694,0.05551,0.328766,...,0.002314,0.002068,0.027396,0.013201,0.406091,0.0,0.000376,3.458502,-0.160348,0.26783
9,1,100.0,0.126258,0.187818,0.023798,0.126605,0.025114,0.12597,0.055607,0.328766,...,0.003132,0.002222,0.030377,0.019631,0.341483,0.0,0.000493,3.753664,-0.737176,0.388438
11,1,100.0,0.12629,0.188413,0.023834,0.126583,0.024996,0.126124,0.055529,0.328766,...,0.003382,0.002643,0.029414,0.022291,0.300317,0.0,0.000636,4.180299,-0.449494,0.456045
13,1,100.0,0.126344,0.188792,0.023866,0.126641,0.024909,0.126202,0.055519,0.328766,...,0.003895,0.002665,0.025747,0.033456,0.273545,0.0,0.000934,4.626769,-0.389839,0.613328


<h2>Compare Facial Expressions by Grade Group</h2>

In [97]:

from tabulate import tabulate

# Add a new column to the combined_data_with_grades dataframe that indicates whether the grade is above or below 55
facial_expressions_data['Grade Group'] = facial_expressions_data['Grade'].apply(lambda x: 'Above 55' if x >= 55 else 'Below 55')

# Describe statistics for each selected column by grade group
selected_columns = ['Anger', 'Contempt', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise',
       'Engagement', 'Valence', 'Sentimentality', 'Confusion']


<h2>Table for Each Emotion Describe</h2>

In [105]:
from scipy.stats import ttest_ind
from selenium import webdriver

# create a new Firefox browser window
driver = webdriver.Firefox()

# Define a function to add a star to indicate statistical significance
def add_star(pval):
    if pval < 0.05:
        return '*'
    else:
        return ''

# Loop through the emotions and perform a t-test between the grade groups
for col in selected_columns:
    # Create a DataFrame for the current emotion only
    emotion_data = facial_expressions_data.loc[:, ['Grade Group', col]]

    above_55 = emotion_data.loc[emotion_data['Grade Group'] == 'Above 55', col]
    below_55 = emotion_data.loc[emotion_data['Grade Group'] == 'Below 55', col]
    t, pval = ttest_ind(above_55, below_55, equal_var=False)
    pval_star = add_star(pval)

    # Calculate the statistics for the current emotion
    emotion_stats = emotion_data.groupby('Grade Group')[col].describe()
    emotion_stats['mean_diff'] = emotion_stats['mean'].diff().fillna(0)
    emotion_stats['median_diff'] = emotion_stats['50%'].diff().fillna(0)
    # emotion_stats.loc['', 'p-value'] = pval

    # Create a formatted table for the current emotion
    emotion_stats_formatted = emotion_stats.style\
        .set_table_styles([{'selector': 'th', 'props': [('text-align', 'center')]}])\
        .set_caption(col)\
        .format('{:.2f}')\
        .set_properties(**{'width': '50px', 'text-align': 'center'})\
        .set_table_attributes('style="border-collapse: separate; border-spacing: 20px;"')\
        .applymap(lambda x: 'background-color: #f2f2f2' if x == emotion_stats['mean'].max() else '')

    # Display the table for the current emotion
    display(emotion_stats_formatted)

    # Save the table as an HTML file and take a screenshot
    filename = col + '_grades_stats.html'
    emotion_stats_formatted.to_html(filename)
    driver.get('C:\\projects\\classifiers\\' + filename)
    driver.save_screenshot(filename + "_screenshot.png")

# close the browser window
driver.quit()


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,1.86,6.76,0.0,0.14,0.28,1.19,97.43,0.0,0.0,
Below 55,164754.0,1.47,4.38,0.0,0.14,0.28,1.19,95.62,-0.39,-0.0,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,1.75,10.14,0.0,0.19,0.19,0.22,99.81,0.0,0.0,
Below 55,164754.0,2.37,11.97,0.0,0.19,0.21,0.32,99.89,0.62,0.01,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,0.54,3.84,0.0,0.02,0.04,0.43,99.33,0.0,0.0,
Below 55,164754.0,0.66,4.8,0.0,0.02,0.04,0.37,98.45,0.12,0.0,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,1.52,6.38,0.0,0.14,0.24,0.92,99.73,0.0,0.0,
Below 55,164754.0,1.01,5.12,0.0,0.13,0.19,0.85,98.17,-0.51,-0.05,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,1.78,12.31,0.0,0.02,0.02,0.03,99.98,0.0,0.0,
Below 55,164754.0,1.14,9.8,0.0,0.02,0.02,0.02,99.98,-0.64,-0.0,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,0.84,4.33,0.0,0.13,0.13,0.14,98.61,0.0,0.0,
Below 55,164754.0,2.19,7.68,0.0,0.13,0.14,0.32,99.03,1.34,0.01,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,1.61,8.29,0.0,0.06,0.07,0.23,99.99,0.0,0.0,
Below 55,164754.0,0.88,5.87,0.0,0.06,0.06,0.13,99.97,-0.73,-0.01,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,26.23,33.64,0.0,0.33,1.62,60.58,99.98,0.0,0.0,
Below 55,164754.0,26.86,33.57,0.0,0.33,2.24,60.28,99.98,0.63,0.62,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,-0.42,16.19,-98.87,0.0,0.0,0.0,99.97,0.0,0.0,
Below 55,164754.0,-5.11,17.36,-99.75,0.0,0.0,0.0,99.86,-4.69,0.0,
,,,,,,,,,,,0.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,1.13,5.16,0.0,0.01,0.08,0.39,99.86,0.0,0.0,
Below 55,164754.0,1.11,5.76,0.0,0.02,0.08,0.3,93.98,-0.02,0.0,
,,,,,,,,,,,0.23


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,mean_diff,median_diff,p-value
Grade Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Above 55,772163.0,1.59,6.95,0.0,0.03,0.12,0.53,99.23,0.0,0.0,
Below 55,164754.0,2.21,6.63,0.0,0.13,0.41,1.32,94.99,0.62,0.28,
,,,,,,,,,,,0.0


In [90]:
print(emotion_data.columns)

Index(['Grade Group', 'Confusion'], dtype='object')
