In [40]:
import pandas as pd
import altair as alt
from scipy import stats
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Descriptives forced-choice sentiment scores

In [2]:
forced_choice_long_df = pd.read_csv('../clean_data/forced_choice_emotion_uw_students_long_format_lmer.csv')

In [4]:
forced_choice_long_df.head()

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,-0.5,0.5,-0.5
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,0.5,-0.5,0.5
2,0,Q77.1,Disgust,bipoc,male,adult,disgust,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5994,0.5,0.5,-0.5
3,0,Q78.1,Disgust,bipoc,male,child,disgust,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5994,0.5,-0.5,-0.5
4,0,Q162.1,Happiness,white,male,adult,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,0.5,0.5,0.5


In [69]:
# Grouping by photo
forced_grouped_score = forced_choice_long_df.groupby(["photoId", "ethnicity", "sex","age"])['sentimentScore'].mean().to_frame().reset_index()

## Sentiment score histogram overall

In [71]:
source = forced_grouped_score

hist_forced_all_com = alt.Chart(source).mark_bar().encode(
    alt.X('sentimentScore:Q',bin=alt.Bin(maxbins=30), title="mean sentiment distribution"),
    y='count()')

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'))

hist_forced_all_com = hist_forced_all_com + rule
hist_forced_all_com.display(renderer='svg')

## Sentiment score by sex

In [72]:
source = forced_grouped_score
hist_forced_sex_com = alt.Chart(source).mark_area(
    opacity=0.5,
    interpolate='step'
).encode(
    alt.X('sentimentScore:Q', bin=alt.Bin(maxbins=30), title='mean sentiment distribution'),
    alt.Y('count()',  stack=None),
    alt.Color('sex'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    color=alt.Color('sex'))

hist_forced_sex_com = hist_forced_sex_com + rule
hist_forced_sex_com

## Sentiment score by ethnicity

In [73]:
source = forced_grouped_score
hist_forced_ethnicity_com = alt.Chart(source).mark_area(
    opacity=0.5,
    interpolate='step'
).encode(
    alt.X('sentimentScore:Q', bin=alt.Bin(maxbins=30), title='mean sentiment distribution'),
    alt.Y('count()',  stack=None),
    alt.Color('ethnicity'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    color=alt.Color('ethnicity'))

hist_forced_ethnicity_com = hist_forced_ethnicity_com + rule
hist_forced_ethnicity_com

## Sentiment score by age

In [75]:
source = forced_grouped_score
hist_forced_age_com = alt.Chart(source).mark_area(
    opacity=0.5,
    interpolate='step'
).encode(
    alt.X('sentimentScore:Q', bin=alt.Bin(maxbins=30), title='mean sentiment distribution'),
    alt.Y('count()',  stack=None),
    alt.Color('age'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    color=alt.Color('age'))

hist_forced_age_com = hist_forced_age_com + rule
hist_forced_age_com

## Paired T-test

In [77]:
v1 = forced_grouped_score[forced_grouped_score['sex'] == 'female']['sentimentScore']
v2 = forced_grouped_score[forced_grouped_score['sex'] == 'male']['sentimentScore']

v3 = forced_grouped_score[forced_grouped_score['ethnicity'] == 'bipoc']['sentimentScore']
v4 = forced_grouped_score[forced_grouped_score['ethnicity'] == 'white']['sentimentScore']

v5 = forced_grouped_score[forced_grouped_score['age'] == 'adult']['sentimentScore']
v6 = forced_grouped_score[forced_grouped_score['age'] == 'child']['sentimentScore']

In [78]:
print(f'Paired t-test by sex: t-value = {stats.ttest_rel(v1, v2)[0]}, p-value = {stats.ttest_rel(v1, v2)[1]}')
print(f'Paired t-test by ethnicity: t-value = {stats.ttest_rel(v3, v4)[0]}, p-value = {stats.ttest_rel(v3, v4)[1]}')
print(f'Paired t-test by age: t-value = {stats.ttest_rel(v5, v6)[0]}, p-value = {stats.ttest_rel(v5, v6)[1]}')

Paired t-test by sex: t-value = 0.5113825345910727, p-value = 0.6102697719386263
Paired t-test by ethnicity: t-value = -0.9032782803104746, p-value = 0.36866295221810297
Paired t-test by age: t-value = -0.9408549831378936, p-value = 0.34916545074344363


# Descriptives free-choice sentiment scores

In [16]:
free_choice_long_df = pd.read_csv('../clean_data/free_labeling_emotion_uw_students_long_format_lmer.csv')

In [138]:
free_choice_long_df['photoIdSplit'] = free_choice_long_df['photoId'].str.split('.').str[0]

In [139]:
free_choice_long_df.head()

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC,photoIdSplit
0,0,Q2.1_1,angry,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5106,-0.5,0.5,-0.5,Q2
1,0,Q102.1_1,bored,bipoc,male,child,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.2732,0.5,-0.5,-0.5,Q102
2,0,Q103.1_1,bored,white,female,adult,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.2732,-0.5,0.5,0.5,Q103
3,0,Q194.1_1,funny,white,male,adult,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.4404,0.5,0.5,0.5,Q194
4,0,Q104.1_1,scared,white,female,child,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.4404,-0.5,-0.5,0.5,Q104


In [140]:
# Grouping by photo
free_grouped_score = free_choice_long_df.groupby(["photoIdSplit", "ethnicity", "sex","age"])['sentimentScore'].mean().to_frame().reset_index()

In [131]:
# df_free = pd.read_csv('../clean_data/free_choice_emotion_uw_students.csv')
# df_emo_answers_free = df_free.loc[:, 'Q2.1_1':'Q195.1_4']
# s1 = free_grouped_score['photoId'].tolist()
# s2 = pd.Series(data=df_emo_answers_free.columns).tolist()

# set1 = set(s1)
# set2 = set(s2)

# list(sorted(set2 - set1))

## Sentiment score overall

In [145]:
source = free_grouped_score

hist_free_all_com = alt.Chart(source).mark_bar().encode(
    alt.X('sentimentScore:Q',bin=alt.Bin(maxbins=30), title="mean sentiment distribution"),
    y='count()')

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'))

hist_free_all_com = hist_free_all_com + rule
hist_free_all_com.display(renderer='svg')

## Sentiment by sex

In [146]:
source = free_grouped_score
hist_free_sex_com = alt.Chart(source).mark_area(
    opacity=0.5,
    interpolate='step'
).encode(
    alt.X('sentimentScore:Q', bin=alt.Bin(maxbins=30), title='mean sentiment distribution'),
    alt.Y('count()',  stack=None),
    alt.Color('sex'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    color=alt.Color('sex'))

hist_free_sex_com = hist_free_sex_com + rule
hist_free_sex_com

## Sentiment by ethnicity

In [147]:
source = free_grouped_score
hist_free_ethnicity_com = alt.Chart(source).mark_area(
    opacity=0.5,
    interpolate='step'
).encode(
    alt.X('sentimentScore:Q', bin=alt.Bin(maxbins=30), title='mean sentiment distribution'),
    alt.Y('count()',  stack=None),
    alt.Color('ethnicity'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    color=alt.Color('ethnicity'))

hist_free_ethnicity_com = hist_free_ethnicity_com + rule
hist_free_ethnicity_com

## Sentiment by age

In [148]:
source = free_grouped_score
hist_free_age_com = alt.Chart(source).mark_area(
    opacity=0.5,
    interpolate='step'
).encode(
    alt.X('sentimentScore:Q', bin=alt.Bin(maxbins=30), title='mean sentiment distribution'),
    alt.Y('count()',  stack=None),
    alt.Color('age'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    color=alt.Color('age'))

hist_free_age_com = hist_free_age_com + rule
hist_free_age_com

## Paired t-test 

In [149]:
v1 = free_grouped_score[free_grouped_score['sex'] == 'female']['sentimentScore']
v2 = free_grouped_score[free_grouped_score['sex'] == 'male']['sentimentScore']

v3 = free_grouped_score[free_grouped_score['ethnicity'] == 'bipoc']['sentimentScore']
v4 = free_grouped_score[free_grouped_score['ethnicity'] == 'white']['sentimentScore']

v5 = free_grouped_score[free_grouped_score['age'] == 'adult']['sentimentScore']
v6 = free_grouped_score[free_grouped_score['age'] == 'child']['sentimentScore']

In [150]:
print(f'Paired t-test by sex: t-value = {stats.ttest_rel(v1, v2)[0]}, p-value = {stats.ttest_rel(v1, v2)[1]}')
print(f'Paired t-test by ethnicity: t-value = {stats.ttest_rel(v3, v4)[0]}, p-value = {stats.ttest_rel(v3, v4)[1]}')
print(f'Paired t-test by age: t-value = {stats.ttest_rel(v5, v6)[0]}, p-value = {stats.ttest_rel(v5, v6)[1]}')

Paired t-test by sex: t-value = 1.1520639657014673, p-value = 0.2521857388920617
Paired t-test by ethnicity: t-value = -0.3258279255847241, p-value = 0.7452711589226413
Paired t-test by age: t-value = -1.2678281579680675, p-value = 0.2079578982653465


# Mean and error bar showing confidence interval

## Forced shoice survey

In [251]:
forced_grouped_score['groups'] = forced_grouped_score['ethnicity']+'_'+forced_grouped_score['sex']+'_'+forced_grouped_score['age']

In [252]:
source = forced_grouped_score

error_bars = alt.Chart(source).transform_calculate(
    categories="datum.ethnicity + '-' + datum.sex").mark_errorbar(extent='ci').encode(
    x=alt.X('sentimentScore:Q', scale=alt.Scale(zero=False), title='sentiment score - mean and std'),
    y=alt.Y('groups:N'),
    color=alt.Color('categories:N'))

points = alt.Chart(source).transform_calculate(
    categories="datum.ethnicity + '-' + datum.sex").mark_point(filled=True, color='black').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    y=alt.Y('groups:N'),
    color=alt.Color('categories:N'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'))

In [253]:
forced_error_bar_com = error_bars + points + rule

forced_error_bar_com

## Free choice survey

In [248]:
free_grouped_score['groups'] = free_grouped_score['ethnicity']+'_'+free_grouped_score['sex']+'_'+free_grouped_score['age']

In [249]:
source = free_grouped_score

error_bars = alt.Chart(source).transform_calculate(
    categories="datum.ethnicity + '-' + datum.sex").mark_errorbar(extent='ci').encode(
    x=alt.X('sentimentScore:Q', scale=alt.Scale(zero=False), title='sentiment score - mean and std'),
    y=alt.Y('groups:N'),
    color=alt.Color('categories:N'))

points = alt.Chart(source).transform_calculate(
    categories="datum.ethnicity + '-' + datum.sex").mark_point(filled=True, color='black').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'),
    y=alt.Y('groups:N'),
    color=alt.Color('categories:N'))

rule = alt.Chart(source).mark_rule(color='red').encode(
    x=alt.X('sentimentScore:Q', aggregate='mean'))

In [250]:
free_error_bar_com = error_bars + points + rule

free_error_bar_com