In [68]:
import pandas as pd

# Forced-choice 

In [69]:
df = pd.read_csv('../clean_data/forced_choice_emotion_uw_students.csv')
df_labels = pd.read_csv('../data/emotion_labels.csv')

In [70]:
df_emo_answers_wide = df.loc[:, 'Q2.1':'Q195.1'] # subset photos

## Long format DF

In [71]:
df_emo_answers_long = df_emo_answers_wide.reset_index()
df_emo_answers_long['index'] = df_emo_answers_long.index
df_emo_answers_long = df_emo_answers_long.rename(columns={'index':'participantId'})
df_emo_answers_long.head(2)

Unnamed: 0,participantId,Q2.1,Q3.1,Q4.1,Q5.1,Q6.1,Q7.1,Q8.1,Q9.1,Q10.1,...,Q186.1,Q187.1,Q188.1,Q189.1,Q190.1,Q191.1,Q192.1,Q193.1,Q194.1,Q195.1
0,0,Neutral,Anger,Anger,Anger,Anger,Anger,Anger,Happiness,Disgust,...,Surprise,Neutral,Happiness,Happiness,Other,Other,Uncertain,Uncertain,Other,Neutral
1,1,Anger,Anger,Anger,Sadness,Anger,Anger,Fear,Anger,Other,...,Surprise,Surprise,Happiness,Happiness,Disgust,Uncertain,Other,Anger,Uncertain,Uncertain


In [72]:
df_labels['photoId'] =  df_labels['photoId'].str.strip()
photoIds = df_labels['photoId'].tolist()

In [73]:
df_emo_answers_long = pd.melt(df_emo_answers_long, id_vars=['participantId'], value_vars=photoIds).reset_index(drop=True).sort_values(by=['participantId'])

In [74]:
df_emo_answers_long = df_emo_answers_long.rename(columns={'variable':'photoId', 'value':'emotion'})

In [75]:
df_emo_answers_long_m = pd.merge(df_emo_answers_long, df_labels, how="left", on=["photoId"], validate="many_to_one")

In [76]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...


## Sentiment score

In [77]:
from nltk.sentiment import SentimentIntensityAnalyzer

In [78]:
sia = SentimentIntensityAnalyzer()

In [79]:
df_emo_answers_long_m['sentimentScore'] = df_emo_answers_long_m['emotion'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [80]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574


## Center predictors

In [45]:
df_emo_answers_long_m['sexC'] = df_emo_answers_long_m['sex'].replace({'female': -0.5, 'male': 0.5})
df_emo_answers_long_m['ageC'] = df_emo_answers_long_m['age'].replace({'child': -0.5, 'adult': 0.5})
df_emo_answers_long_m['ethnicityC'] = df_emo_answers_long_m['ethnicity'].replace({'bipoc': -0.5, 'white': 0.5})

In [46]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,-0.5,0.5,-0.5
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,0.5,-0.5,0.5


In [47]:
df_emo_answers_long_m_fil = df_emo_answers_long_m[df_emo_answers_long_m['label'] != 'attention']
df_emo_answers_long_m_fil = df_emo_answers_long_m_fil.reset_index(drop=True)

In [48]:
df_emo_answers_long_m_fil.to_csv('../clean_data/forced_choice_emotion_uw_students_long_format_lmer.csv', index=False)

In [49]:
df_emo_answers_long_m_fil

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0000,-0.5,0.5,-0.5
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,0.5,-0.5,0.5
2,0,Q77.1,Disgust,bipoc,male,adult,disgust,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5994,0.5,0.5,-0.5
3,0,Q78.1,Disgust,bipoc,male,child,disgust,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5994,0.5,-0.5,-0.5
4,0,Q162.1,Happiness,white,male,adult,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,0.5,0.5,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...
9787,50,Q125.1,Other,bipoc,male,adult,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0000,0.5,0.5,-0.5
9788,50,Q124.1,Other,bipoc,female,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0000,-0.5,-0.5,-0.5
9789,50,Q123.1,Surprise,bipoc,female,adult,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.2732,-0.5,0.5,-0.5
9790,50,Q122.1,Anger,white,male,child,surprise,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5719,0.5,-0.5,0.5


# Free-labeling

In [106]:
df_free = pd.read_csv('../clean_data/free_choice_emotion_uw_students.csv')
df_labels_free = pd.read_csv('../data/emotion_labels_free_choice.csv')

In [107]:
df_emo_answers_free = df_free.loc[:, 'Q2.1_1':'Q195.1_4'] # get cols with words only

In [97]:
df_emo_answers_free.head(2)

Unnamed: 0,Q2.1_1,Q2.1_2,Q2.1_3,Q2.1_4,Q3.1_1,Q3.1_2,Q3.1_3,Q3.1_4,Q4.1_1,Q4.1_2,...,Q193.1_3,Q193.1_4,Q194.1_1,Q194.1_2,Q194.1_3,Q194.1_4,Q195.1_1,Q195.1_2,Q195.1_3,Q195.1_4
0,angry,,,,yelling,,,,yelling,,...,,,funny,,,,silly,,,
1,,,,,mad,,,,mad,,...,,,dazed,,,,silly,,,


In [116]:
df_labels_free['photoId'] = df_emo_answers_free.columns.tolist()
df_labels_free.tail(2)

Unnamed: 0,ethnicity,sex,age,label,url,photoId
774,white,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,Q195.1_3
775,white,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,Q195.1_4


## Long-form DF

In [117]:
df_emo_answers_free_long = df_emo_answers_free.reset_index()
df_emo_answers_free_long['index'] = df_emo_answers_free_long.index
df_emo_answers_free_long = df_emo_answers_free_long.rename(columns={'index':'participantId'})
df_emo_answers_free_long.head(2)

Unnamed: 0,participantId,Q2.1_1,Q2.1_2,Q2.1_3,Q2.1_4,Q3.1_1,Q3.1_2,Q3.1_3,Q3.1_4,Q4.1_1,...,Q193.1_3,Q193.1_4,Q194.1_1,Q194.1_2,Q194.1_3,Q194.1_4,Q195.1_1,Q195.1_2,Q195.1_3,Q195.1_4
0,0,angry,,,,yelling,,,,yelling,...,,,funny,,,,silly,,,
1,1,,,,,mad,,,,mad,...,,,dazed,,,,silly,,,


In [118]:
# df_labels['photoId'] =  df_labels['photoId'].str.strip()
photoIds = df_labels_free['photoId'].tolist()

In [119]:
df_emo_answers_free_long = pd.melt(df_emo_answers_free_long, id_vars=['participantId'], value_vars=photoIds).reset_index(drop=True).sort_values(by=['participantId'])

In [123]:
df_emo_answers_free_long = df_emo_answers_free_long.rename(columns={'variable':'photoId', 'value':'emotion'})

In [126]:
df_emo_answers_free_long.isna().sum(), df_emo_answers_free_long.shape

(participantId        0
 photoId              0
 emotion          24791
 dtype: int64,
 (39576, 3))

In [129]:
df_emo_answers_free_long_drop = df_emo_answers_free_long.dropna()

df_emo_answers_free_long_drop.isna().sum(), df_emo_answers_free_long_drop.shape

(participantId    0
 photoId          0
 emotion          0
 dtype: int64,
 (14785, 3))

In [131]:
df_emo_answers_free_long_m = pd.merge(df_emo_answers_free_long_drop, df_labels_free, how="left", on=["photoId"], validate="many_to_one")

In [132]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url
0,0,Q2.1_1,angry,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...
1,0,Q102.1_1,bored,bipoc,male,child,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...


## Sentiment score

In [133]:
df_emo_answers_free_long_m['sentimentScore'] = df_emo_answers_free_long_m['emotion'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [134]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore
0,0,Q2.1_1,angry,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5106
1,0,Q102.1_1,bored,bipoc,male,child,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.2732


## Center predictors

In [135]:
df_emo_answers_free_long_m['sexC'] = df_emo_answers_free_long_m['sex'].replace({'female': -0.5, 'male': 0.5})
df_emo_answers_free_long_m['ageC'] = df_emo_answers_free_long_m['age'].replace({'child': -0.5, 'adult': 0.5})
df_emo_answers_free_long_m['ethnicityC'] = df_emo_answers_free_long_m['ethnicity'].replace({'bipoc': -0.5, 'white': 0.5})

In [137]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1_1,angry,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5106,-0.5,0.5,-0.5
1,0,Q102.1_1,bored,bipoc,male,child,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.2732,0.5,-0.5,-0.5


In [138]:
df_emo_answers_free_long_m_fil = df_emo_answers_free_long_m[df_emo_answers_free_long_m['label'] != 'attention']
df_emo_answers_free_long_m_fil = df_emo_answers_free_long_m_fil.reset_index(drop=True)

In [139]:
df_emo_answers_free_long_m_fil.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1_1,angry,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5106,-0.5,0.5,-0.5
1,0,Q102.1_1,bored,bipoc,male,child,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.2732,0.5,-0.5,-0.5


In [140]:
df_emo_answers_free_long_m_fil.to_csv('../clean_data/free_labeling_emotion_uw_students_long_format_lmer.csv', index=False)