In [1]:
import pandas as pd

# Forced-choice 

In [2]:
df = pd.read_csv('../clean_data_mturk/forced_choice_emotion_mturk.csv')
df_labels = pd.read_csv('../data_mturk/emotion_labels.csv')

In [3]:
df_emo_answers_wide = df.loc[:, 'Q2.1':'Q195.1'] # subset photos

## Long format DF

In [4]:
df_emo_answers_long = df_emo_answers_wide.reset_index()
df_emo_answers_long['index'] = df_emo_answers_long.index
df_emo_answers_long = df_emo_answers_long.rename(columns={'index':'participantId'})
df_emo_answers_long.head(2)

Unnamed: 0,participantId,Q2.1,Q3.1,Q4.1,Q5.1,Q6.1,Q7.1,Q8.1,Q9.1,Q10.1,...,Q186.1,Q187.1,Q188.1,Q189.1,Q190.1,Q191.1,Q192.1,Q193.1,Q194.1,Q195.1
0,0,Anger,Anger,Anger,Sadness,Anger,Anger,Neutral,Anger,Other,...,Surprise,Fear,Happiness,Happiness,Fear,Uncertain,Uncertain,Anger,Uncertain,Uncertain
1,1,Anger,Anger,Anger,Fear,Anger,Anger,Anger,Anger,Disgust,...,Surprise,Fear,Happiness,Happiness,Other,Other,Anger,Anger,Other,Other


In [5]:
df_labels['photoId'] =  df_labels['photoId'].str.strip()
photoIds = df_labels['photoId'].tolist()

In [6]:
df_emo_answers_long = pd.melt(df_emo_answers_long, id_vars=['participantId'], value_vars=photoIds).reset_index(drop=True).sort_values(by=['participantId'])

In [7]:
df_emo_answers_long = df_emo_answers_long.rename(columns={'variable':'photoId', 'value':'emotion'})

In [8]:
df_emo_answers_long_m = pd.merge(df_emo_answers_long, df_labels, how="left", on=["photoId"], validate="many_to_one")

In [9]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url
0,0,Q2.1,Anger,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...
1,0,Q126.1,Neutral,bipoc,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...


## Sentiment score

In [10]:
from nltk.sentiment import SentimentIntensityAnalyzer

In [11]:
sia = SentimentIntensityAnalyzer()

In [12]:
df_emo_answers_long_m['sentimentScore'] = df_emo_answers_long_m['emotion'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [13]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore
0,0,Q2.1,Anger,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5719
1,0,Q126.1,Neutral,bipoc,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0


## Center predictors

In [14]:
df_emo_answers_long_m['sexC'] = df_emo_answers_long_m['sex'].replace({'female': -0.5, 'male': 0.5})
df_emo_answers_long_m['ageC'] = df_emo_answers_long_m['age'].replace({'child': -0.5, 'adult': 0.5})
df_emo_answers_long_m['ethnicityC'] = df_emo_answers_long_m['ethnicity'].replace({'bipoc': -0.5, 'white': 0.5})

In [15]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Anger,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5719,-0.5,0.5,-0.5
1,0,Q126.1,Neutral,bipoc,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,0.5,-0.5,-0.5


In [16]:
df_emo_answers_long_m_fil = df_emo_answers_long_m[df_emo_answers_long_m['label'] != 'attention']
df_emo_answers_long_m_fil = df_emo_answers_long_m_fil.reset_index(drop=True)

In [17]:
df_emo_answers_long_m_fil.to_csv('../clean_data_mturk/forced_choice_emotion_mturk_long_format_lmer.csv', index=False)

In [18]:
df_emo_answers_long_m_fil.head()

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Anger,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.5719,-0.5,0.5,-0.5
1,0,Q126.1,Neutral,bipoc,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,0.5,-0.5,-0.5
2,0,Q127.1,Surprise,white,female,adult,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.2732,-0.5,0.5,0.5
3,0,Q128.1,Fear,white,female,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.4939,-0.5,-0.5,0.5
4,0,Q129.1,Other,white,male,adult,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,0.5,0.5,0.5


# Free-labeling

In [19]:
df_free = pd.read_csv('../clean_data_mturk/free_choice_emotion_mturk.csv')
df_labels_free = pd.read_csv('../data_mturk/emotion_labels_free_choice.csv')

In [20]:
df_emo_answers_free = df_free.loc[:, 'Q2.1_1':'Q195.1_4'] # get cols with words only

In [21]:
df_emo_answers_free.head(2)

Unnamed: 0,Q2.1_1,Q2.1_2,Q2.1_3,Q2.1_4,Q3.1_1,Q3.1_2,Q3.1_3,Q3.1_4,Q4.1_1,Q4.1_2,...,Q193.1_3,Q193.1_4,Q194.1_1,Q194.1_2,Q194.1_3,Q194.1_4,Q195.1_1,Q195.1_2,Q195.1_3,Q195.1_4
0,boredom,,,,anger,,,,anger,annoyance,...,,,disgust,boredom,exasperation,,embarrassment,,,
1,mad,,,,mad,shouting,,,anger,yelling,...,,,playing,,,,,,,


In [22]:
df_labels_free['photoId'] = df_emo_answers_free.columns.tolist()
df_labels_free.tail(2)

Unnamed: 0,ethnicity,sex,age,label,url,photoId
774,white,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,Q195.1_3
775,white,male,child,uncertain,https://uwmadison.co1.qualtrics.com/ControlPan...,Q195.1_4


## Long-form DF

In [23]:
df_emo_answers_free_long = df_emo_answers_free.reset_index()
df_emo_answers_free_long['index'] = df_emo_answers_free_long.index
df_emo_answers_free_long = df_emo_answers_free_long.rename(columns={'index':'participantId'})
df_emo_answers_free_long.head(2)

Unnamed: 0,participantId,Q2.1_1,Q2.1_2,Q2.1_3,Q2.1_4,Q3.1_1,Q3.1_2,Q3.1_3,Q3.1_4,Q4.1_1,...,Q193.1_3,Q193.1_4,Q194.1_1,Q194.1_2,Q194.1_3,Q194.1_4,Q195.1_1,Q195.1_2,Q195.1_3,Q195.1_4
0,0,boredom,,,,anger,,,,anger,...,,,disgust,boredom,exasperation,,embarrassment,,,
1,1,mad,,,,mad,shouting,,,anger,...,,,playing,,,,,,,


In [24]:
# df_labels['photoId'] =  df_labels['photoId'].str.strip()
photoIds = df_labels_free['photoId'].tolist()

In [25]:
df_emo_answers_free_long = pd.melt(df_emo_answers_free_long, id_vars=['participantId'], value_vars=photoIds).reset_index(drop=True).sort_values(by=['participantId'])

In [26]:
df_emo_answers_free_long = df_emo_answers_free_long.rename(columns={'variable':'photoId', 'value':'emotion'})

In [27]:
df_emo_answers_free_long.isna().sum(), df_emo_answers_free_long.shape

(participantId        0
 photoId              0
 emotion          23349
 dtype: int64,
 (38800, 3))

In [28]:
df_emo_answers_free_long_drop = df_emo_answers_free_long.dropna()

df_emo_answers_free_long_drop.isna().sum(), df_emo_answers_free_long_drop.shape

(participantId    0
 photoId          0
 emotion          0
 dtype: int64,
 (15451, 3))

In [29]:
df_emo_answers_free_long_m = pd.merge(df_emo_answers_free_long_drop, df_labels_free, how="left", on=["photoId"], validate="many_to_one")

In [30]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url
0,0,Q2.1_1,boredom,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...
1,0,Q117.1_3,annoyance,bipoc,male,adult,surprise,https://uwmadison.co1.qualtrics.com/ControlPan...


## Sentiment score

In [31]:
df_emo_answers_free_long_m['sentimentScore'] = df_emo_answers_free_long_m['emotion'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [32]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore
0,0,Q2.1_1,boredom,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.3182
1,0,Q117.1_3,annoyance,bipoc,male,adult,surprise,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.3182


## Center predictors

In [33]:
df_emo_answers_free_long_m['sexC'] = df_emo_answers_free_long_m['sex'].replace({'female': -0.5, 'male': 0.5})
df_emo_answers_free_long_m['ageC'] = df_emo_answers_free_long_m['age'].replace({'child': -0.5, 'adult': 0.5})
df_emo_answers_free_long_m['ethnicityC'] = df_emo_answers_free_long_m['ethnicity'].replace({'bipoc': -0.5, 'white': 0.5})

In [34]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1_1,boredom,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.3182,-0.5,0.5,-0.5
1,0,Q117.1_3,annoyance,bipoc,male,adult,surprise,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.3182,0.5,0.5,-0.5


In [35]:
df_emo_answers_free_long_m_fil = df_emo_answers_free_long_m[df_emo_answers_free_long_m['label'] != 'attention']
df_emo_answers_free_long_m_fil = df_emo_answers_free_long_m_fil.reset_index(drop=True)

In [36]:
df_emo_answers_free_long_m_fil.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1_1,boredom,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.3182,-0.5,0.5,-0.5
1,0,Q117.1_3,annoyance,bipoc,male,adult,surprise,https://uwmadison.co1.qualtrics.com/ControlPan...,-0.3182,0.5,0.5,-0.5


In [37]:
df_emo_answers_free_long_m_fil.to_csv('../clean_data_mturk/free_labeling_emotion_mturk_long_format_lmer.csv', index=False)