In [27]:
import pandas as pd
from deep_translator import GoogleTranslator
from spellchecker import SpellChecker
import numpy as np

# Forced-choice 

In [28]:
df_esp = pd.read_csv('../clean_data_mturk_espanol/forced_choice_emotion_mturk_espanol.csv')
df_labels_esp = pd.read_csv('../data_mturk_espanol/emotion_labels_espanol.csv')

## Manual translation

**The VADER sentiment analyzer only works in English**. I had two options:

- Using a bilingual sentiment analyzer
- Translating to English and then use VADER 

Since I couldn't find a "widely used" bilingual sentiment analyzer, and translating was straightforward, I opted fot the latter 

In [29]:
def translation_emotions(df):
    df.replace(['Enfado'], 'Anger', inplace=True)
    df.replace(['Felicidad'], 'Happiness', inplace=True)
    df.replace(['Sorpresa'], 'Surprise', inplace=True)
    df.replace(['Tristeza'], 'Sadness', inplace=True)
    df.replace(['Asco'], 'Disgust', inplace=True)
    df.replace(['Incertidumbre'], 'Uncertain', inplace=True)
    df.replace(['Miedo'], 'Fear', inplace=True)
    df.replace(['Otra'], 'Other', inplace=True)
    
    return df

In [30]:
df = translation_emotions(df_esp).copy(deep=True)
df_labels = df_labels_esp.copy(deep=True)

In [31]:
df_emo_answers_wide = df.loc[:, 'Q2.1':'Q195.1'] # subset photos

## Long format DF

In [32]:
df_emo_answers_long = df_emo_answers_wide.reset_index()
df_emo_answers_long['index'] = df_emo_answers_long.index
df_emo_answers_long = df_emo_answers_long.rename(columns={'index':'participantId'})
df_emo_answers_long.head(2)

Unnamed: 0,participantId,Q2.1,Q3.1,Q4.1,Q5.1,Q6.1,Q7.1,Q8.1,Q9.1,Q10.1,...,Q186.1,Q187.1,Q188.1,Q189.1,Q190.1,Q191.1,Q192.1,Q193.1,Q194.1,Q195.1
0,0,Other,Uncertain,Anger,Other,Anger,Anger,Other,Anger,Disgust,...,Surprise,Neutral,Other,Uncertain,Disgust,Neutral,Other,Other,Uncertain,Neutral
1,1,Neutral,Anger,Anger,Uncertain,Anger,Anger,Neutral,Other,Uncertain,...,Surprise,Neutral,Other,Happiness,Uncertain,Neutral,Uncertain,Other,Other,Neutral


In [33]:
df_labels['photoId'] =  df_labels['photoId'].str.strip()
photoIds = df_labels['photoId'].tolist()

In [34]:
df_emo_answers_long = pd.melt(df_emo_answers_long, id_vars=['participantId'], value_vars=photoIds).reset_index(drop=True).sort_values(by=['participantId'])

In [35]:
df_emo_answers_long = df_emo_answers_long.rename(columns={'variable':'photoId', 'value':'emotion'})

In [36]:
df_emo_answers_long_m = pd.merge(df_emo_answers_long, df_labels, how="left", on=["photoId"], validate="many_to_one")

In [37]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url
0,0,Q2.1,Other,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...
1,0,Q99.1,Neutral,bipoc,female,adult,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...


## Sentiment score

In [38]:
from nltk.sentiment import SentimentIntensityAnalyzer

In [39]:
sia = SentimentIntensityAnalyzer()

In [40]:
df_emo_answers_long_m['sentimentScore'] = df_emo_answers_long_m['emotion'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [41]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore
0,0,Q2.1,Other,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0
1,0,Q99.1,Neutral,bipoc,female,adult,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0


## Center predictors

In [42]:
df_emo_answers_long_m['sexC'] = df_emo_answers_long_m['sex'].replace({'female': -0.5, 'male': 0.5})
df_emo_answers_long_m['ageC'] = df_emo_answers_long_m['age'].replace({'child': -0.5, 'adult': 0.5})
df_emo_answers_long_m['ethnicityC'] = df_emo_answers_long_m['ethnicity'].replace({'bipoc': -0.5, 'white': 0.5})

In [43]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Other,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,-0.5,0.5,-0.5
1,0,Q99.1,Neutral,bipoc,female,adult,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,-0.5,0.5,-0.5


In [44]:
df_emo_answers_long_m_fil = df_emo_answers_long_m[df_emo_answers_long_m['label'] != 'attention']
df_emo_answers_long_m_fil = df_emo_answers_long_m_fil.reset_index(drop=True)

In [45]:
df_emo_answers_long_m_fil.to_csv('../clean_data_mturk_espanol/forced_choice_emotion_mturk_long_format_lmer_espanol.csv', index=False)

In [46]:
df_emo_answers_long_m_fil.head()

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Other,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,-0.5,0.5,-0.5
1,0,Q99.1,Neutral,bipoc,female,adult,neutral,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,-0.5,0.5,-0.5
2,0,Q98.1,Happiness,white,male,child,felicidad,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,0.5,-0.5,0.5
3,0,Q97.1,Neutral,white,male,adult,felicidad,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,0.5,0.5,0.5
4,0,Q96.1,Happiness,white,female,child,felicidad,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,-0.5,-0.5,0.5


# Free-labeling

In [47]:
df_free_esp = pd.read_csv('../clean_data_mturk_espanol/free_choice_emotion_mturk_espanol.csv')
df_labels_free = pd.read_csv('../data_mturk_espanol/emotion_labels_free_choice_mturk_espanol.csv')

In [48]:
df_emo_answers_free = df_free_esp.loc[:, 'Q2.1_1':'Q195.1_4'] # get cols with words only

## Manual word clean up

In [49]:
df_emo_answers_free = df_emo_answers_free.transform(lambda x: x.str.strip(), axis = 1)
df_emo_answers_free = df_emo_answers_free.transform(lambda x: x.str.lower(), axis = 1)
df_emo_answers_free = df_emo_answers_free.transform(lambda x: x.replace({'na':'none'}) , axis = 1)
df_emo_answers_free = df_emo_answers_free.fillna('NAN')

In [50]:
df_emo_answers_free.head(2)

Unnamed: 0,Q2.1_1,Q2.1_2,Q2.1_3,Q2.1_4,Q3.1_1,Q3.1_2,Q3.1_3,Q3.1_4,Q4.1_1,Q4.1_2,...,Q193.1_3,Q193.1_4,Q194.1_1,Q194.1_2,Q194.1_3,Q194.1_4,Q195.1_1,Q195.1_2,Q195.1_3,Q195.1_4
0,molesta,NAN,NAN,NAN,molesta,NAN,NAN,NAN,molesto,NAN,...,NAN,NAN,molesto,NAN,NAN,NAN,molesto,NAN,NAN,NAN
1,enfado,NAN,NAN,NAN,enfado,NAN,NAN,NAN,enfado,NAN,...,NAN,NAN,NAN,NAN,NAN,NAN,neutral,NAN,NAN,NAN


## English translation

In [51]:
df_stack = df_emo_answers_free.stack().reset_index(drop=True) # stack as series
df_stack = df_stack.to_frame(name='emotion') # as DF
emotion_set = list(set(df_stack['emotion'])) # get distinc words

In [52]:
## dic for translation
emotion_dic = {}
for i in emotion_set:
    emotion_dic[i] = GoogleTranslator(source='spanish', target='english').translate(i)
    print(emotion_dic[i])


craziness
complicity
happiness
hostility
flirtatious
nuisance
doubtful
Bad smell
scared
reviewing
drunkenness
confusion
disgust
sensuality
annoying
nerve
indifferent
suffering
dazzled
shocked
provocation
neutral
joy
pissed off
bleak
spoiled
happiness
discomfort
disorientated
fun
approval
confidence
critical
intimidating
despair
aversion
interested
weeping
evaluation
restless
betrayed
caveat
irony
bewilderment
surprise
horrified
smelly
smiling
copy
fighting
inquisitive
threat
shock
man of steel in disgrace
apathetic
remembrance
nice to meet you
bored
Attention
pleasant
silly
introspection
claim
play
coldness
distressed
bitter
Superman
disgust
sympathetic
tearful
desire
fed up
disgust
hysteria
trance
fascination
impressed
Grace
I knew
disappointed
sentimental
irony
falsehood
simulation
pensive
euphoria
observer
annoyed
naive
inconsolable
illness
jocular
disoriented
vain
crying
surprised
nostalgia
sorry
fascinated
laughter
unmotivated
irritation
mystery
harmony
listless
concentration
comp

In [53]:
df_emo_answers_free_en = df_emo_answers_free.replace(emotion_dic)

In [54]:
df_labels_free['photoId'] = df_emo_answers_free.columns.tolist()
df_labels_free.tail(2)

Unnamed: 0,ethnicity,sex,age,label,url,photoId
774,white,male,child,incertidumbre,https://uwmadison.co1.qualtrics.com/ControlPan...,Q195.1_3
775,white,male,child,incertidumbre,https://uwmadison.co1.qualtrics.com/ControlPan...,Q195.1_4


## Long-form DF

In [55]:
df_emo_answers_free_long = df_emo_answers_free_en.reset_index()
df_emo_answers_free_long['index'] = df_emo_answers_free_long.index
df_emo_answers_free_long = df_emo_answers_free_long.rename(columns={'index':'participantId'})
df_emo_answers_free_long.head(2)

Unnamed: 0,participantId,Q2.1_1,Q2.1_2,Q2.1_3,Q2.1_4,Q3.1_1,Q3.1_2,Q3.1_3,Q3.1_4,Q4.1_1,...,Q193.1_3,Q193.1_4,Q194.1_1,Q194.1_2,Q194.1_3,Q194.1_4,Q195.1_1,Q195.1_2,Q195.1_3,Q195.1_4
0,0,upset,,,,upset,,,,upset,...,,,upset,,,,upset,,,
1,1,anger,,,,anger,,,,anger,...,,,,,,,neutral,,,


In [56]:
# df_labels['photoId'] =  df_labels['photoId'].str.strip()
photoIds = df_labels_free['photoId'].tolist()

In [57]:
df_emo_answers_free_long = pd.melt(df_emo_answers_free_long, 
                                   id_vars=['participantId'], 
                                   value_vars=photoIds).reset_index(drop=True).sort_values(by=['participantId'])

In [58]:
df_emo_answers_free_long = df_emo_answers_free_long.rename(columns={'variable':'photoId', 'value':'emotion'})

In [59]:
## replace with np.nan to drop it 
dic_nan = {'nan': np.nan, 'none': np.nan}

df_emo_answers_free_long['emotion'] = df_emo_answers_free_long['emotion'].replace(dic_nan)

In [60]:
df_emo_answers_free_long.head()

Unnamed: 0,participantId,photoId,emotion
0,0,Q2.1_1,upset
15279,0,Q117.1_4,
15312,0,Q118.1_1,afraid
15345,0,Q118.1_2,upset
15378,0,Q118.1_3,


## Check and drop NA

In [61]:
df_emo_answers_free_long.isna().sum(), df_emo_answers_free_long.shape

(participantId        0
 photoId              0
 emotion          16159
 dtype: int64,
 (25608, 3))

In [62]:
## drop np.nan
df_emo_answers_free_long_drop = df_emo_answers_free_long.dropna()

df_emo_answers_free_long_drop.isna().sum(), df_emo_answers_free_long_drop.shape

(participantId    0
 photoId          0
 emotion          0
 dtype: int64,
 (9449, 3))

In [63]:
df_emo_answers_free_long_m = pd.merge(df_emo_answers_free_long_drop, df_labels_free, how="left", on=["photoId"], validate="many_to_one")

In [64]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url
0,0,Q2.1_1,upset,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...
1,0,Q118.1_1,afraid,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...


In [65]:
df_emo_answers_free_long_m.isna().sum(), df_emo_answers_free_long_m.shape

(participantId      0
 photoId            0
 emotion            0
 ethnicity        112
 sex              112
 age              112
 label              0
 url                0
 dtype: int64,
 (9449, 8))

## Manual english correction and removing sentences

In [66]:
## we want to get rid of sentences and leave single words

## count length sentences 
df_emo_answers_free_long_m['len_words'] = df_emo_answers_free_long_m['emotion'].str.split().apply(len)

## get list of distinct sentences (>= 2 words)
list_sentences = df_emo_answers_free_long_m[df_emo_answers_free_long_m['len_words'] >= 2].value_counts('emotion').index.tolist()

list_sentences

['Calm down',
 'I laughed',
 'would be',
 'to weigh',
 'sassy Girl',
 'I long',
 'calmed down',
 'hoping for',
 'I disapprove',
 'pissed off',
 'nice to meet you',
 'silly faces',
 'poor appetite',
 'in love',
 'something sucks',
 'to persuade',
 'man of steel in disgrace',
 'Bad smell',
 'in agreement',
 'bowled over',
 'bad move',
 'bad gal',
 'bad attitude',
 'Waiting answer',
 'I knew',
 "I didn't expect it from you",
 'Clark Kent',
 'fed up']

In [67]:
## dic for word replacement

dic_sentences = {}

for i in list_sentences:
    dic_sentences[i] = None
    
dic_sentences

{'Calm down': None,
 'I laughed': None,
 'would be': None,
 'to weigh': None,
 'sassy Girl': None,
 'I long': None,
 'calmed down': None,
 'hoping for': None,
 'I disapprove': None,
 'pissed off': None,
 'nice to meet you': None,
 'silly faces': None,
 'poor appetite': None,
 'in love': None,
 'something sucks': None,
 'to persuade': None,
 'man of steel in disgrace': None,
 'Bad smell': None,
 'in agreement': None,
 'bowled over': None,
 'bad move': None,
 'bad gal': None,
 'bad attitude': None,
 'Waiting answer': None,
 'I knew': None,
 "I didn't expect it from you": None,
 'Clark Kent': None,
 'fed up': None}

In [68]:
## there is no choice but manual translation/fixing for these ones

dic_sentences['go to'] = 'wrath' ## Google Translate is spliting 'ira' (wrath) into 'ir a'
dic_sentences['in tears'] =  'crying'
dic_sentences['Calm down'] =  'calm'
dic_sentences['calmed down'] =  'calm'
dic_sentences['I laughed'] =  'laughing'
dic_sentences['would be'] =  'serious' ## 'serio = serious' 'seria = would be' 
dic_sentences['sassy Girl'] =  'sassy'
dic_sentences['I long'] = 'longing'
dic_sentences['I disapprove'] = 'disapprove'
dic_sentences['something sucks'] = 'sucks'
dic_sentences['silly faces'] = 'silly'
dic_sentences['poor appetite'] = ''
dic_sentences['nice to meet you'] = 'nice'
dic_sentences['in love'] = 'love'
dic_sentences['Bad smell'] = 'smelly'
dic_sentences['fed up'] = 'disgusted'
dic_sentences['bad gal'] = 'bad'
dic_sentences['Waiting answer'] = 'expectant'
dic_sentences['bowled over'] = 'surprised'
dic_sentences['Clark Kent'] = np.nan
dic_sentences['He shouted'] = 'shouting'
dic_sentences['I knew'] = 'know'
dic_sentences['bad attitude'] = 'gloomy'
dic_sentences['bad move'] = np.nan
dic_sentences['in love'] = 'love'
dic_sentences['man of steel in disgrace'] = 'disgraced'
dic_sentences['poor appetite'] = 'satiated'
dic_sentences["I didn't expect it from you"] = np.nan

In [69]:
## to search for 'keys' based on 'value'
print(list(emotion_dic.keys())[list(emotion_dic.values()).index('bad move')]) 

mala jugada


In [70]:
dic_sentences

{'Calm down': 'calm',
 'I laughed': 'laughing',
 'would be': 'serious',
 'to weigh': None,
 'sassy Girl': 'sassy',
 'I long': 'longing',
 'calmed down': 'calm',
 'hoping for': None,
 'I disapprove': 'disapprove',
 'pissed off': None,
 'nice to meet you': 'nice',
 'silly faces': 'silly',
 'poor appetite': 'satiated',
 'in love': 'love',
 'something sucks': 'sucks',
 'to persuade': None,
 'man of steel in disgrace': 'disgraced',
 'Bad smell': 'smelly',
 'in agreement': None,
 'bowled over': 'surprised',
 'bad move': nan,
 'bad gal': 'bad',
 'bad attitude': 'gloomy',
 'Waiting answer': 'expectant',
 'I knew': 'know',
 "I didn't expect it from you": nan,
 'Clark Kent': nan,
 'fed up': 'disgusted',
 'go to': 'wrath',
 'in tears': 'crying',
 'He shouted': 'shouting'}

In [71]:
## replace with new translation

df_emo_answers_free_long_m = df_emo_answers_free_long_m.replace(dic_sentences)

In [72]:
df_emo_answers_free_long_m

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,len_words
0,0,Q2.1_1,upset,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,1
1,0,Q118.1_1,afraid,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,1
2,0,Q118.1_2,upset,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,1
3,0,Q119.1_1,surprised,white,female,adult,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,1
4,0,Q120.1_1,neutral,white,female,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,1
...,...,...,...,...,...,...,...,...,...
9444,32,Q127.1_1,surprise,white,female,adult,incertidumbre,https://uwmadison.co1.qualtrics.com/ControlPan...,1
9445,32,Q126.1_3,peaceful,bipoc,male,child,incertidumbre,https://uwmadison.co1.qualtrics.com/ControlPan...,1
9446,32,Q126.1_2,calm,bipoc,male,child,incertidumbre,https://uwmadison.co1.qualtrics.com/ControlPan...,1
9447,32,Q126.1_1,calm,bipoc,male,child,incertidumbre,https://uwmadison.co1.qualtrics.com/ControlPan...,2


In [73]:
# clean up text from:
# 1. phrases
# 2. words with numbers 
# 3. words with non alphabetic symbols
# 4. words shorter than three characters
# 5. words that appear just once

In [74]:
df_emo_answers_free_long_m = df_emo_answers_free_long_m.dropna()
df_emo_answers_free_long_m['emotion_txt'] = df_emo_answers_free_long_m['emotion'].str.lower() # as lower case
df_emo_answers_free_long_m['emotion_txt'] = df_emo_answers_free_long_m['emotion_txt'].replace({'na':'none'}) 
df_emo_answers_free_long_m['emotion_txt'] = df_emo_answers_free_long_m['emotion_txt'].astype(str) # as str
df_emo_answers_free_long_m['len_words'] = df_emo_answers_free_long_m['emotion_txt'].str.split().apply(len) # count words per answer
df_emo_answers_free_long_m = df_emo_answers_free_long_m[df_emo_answers_free_long_m['len_words'] <= 1] # keep single word answers
df_emo_answers_free_long_m['len_letters'] = df_emo_answers_free_long_m['emotion_txt'].apply(len) # cont number of letters
df_emo_answers_free_long_m = df_emo_answers_free_long_m[df_emo_answers_free_long_m['len_letters'] >= 3] # keep words with 3 or more letters
df_emo_answers_free_long_m['emotion_txt'] = df_emo_answers_free_long_m['emotion_txt'].str.replace('[^a-zA-Z]', '') # remove non-alphabetic characters
df_emo_answers_free_long_m = df_emo_answers_free_long_m[~df_emo_answers_free_long_m['emotion_txt'].str.contains(r'[0-9]')] #drop words containing numbers
df_emo_answers_free_long_m = df_emo_answers_free_long_m[df_emo_answers_free_long_m.groupby('emotion_txt').emotion_txt.transform(len) > 1] # keep words that ocurr > 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)
  df_emo_answers_free_long_m['emotion_txt'] = df_emo_answers_free_long_m['emotion_txt'].str.replace('[^a-zA-Z]', '') # remove non-alphabetic characters


In [75]:
## recheck NAN values
df_emo_answers_free_long_m.isna().sum(), df_emo_answers_free_long_m.shape

(participantId    0
 photoId          0
 emotion          0
 ethnicity        0
 sex              0
 age              0
 label            0
 url              0
 len_words        0
 emotion_txt      0
 len_letters      0
 dtype: int64,
 (9130, 11))

In [76]:
## drop np.nan
df_emo_answers_free_long_m = df_emo_answers_free_long_m.dropna()

df_emo_answers_free_long_m.isna().sum(), df_emo_answers_free_long_m.shape

(participantId    0
 photoId          0
 emotion          0
 ethnicity        0
 sex              0
 age              0
 label            0
 url              0
 len_words        0
 emotion_txt      0
 len_letters      0
 dtype: int64,
 (9130, 11))

In [77]:
#### recheck sentences

## count length sentences 
df_emo_answers_free_long_m['len_words'] = df_emo_answers_free_long_m['emotion'].str.split().apply(len)

## get list of distinct sentences (>= 2 words)
list_sentences_ = df_emo_answers_free_long_m[df_emo_answers_free_long_m['len_words'] >= 2].value_counts('emotion').index.tolist()

list_sentences_

[]

In [78]:
## remove unnecesary column

df_emo_answers_free_long_m.drop(['len_words', 'len_letters', 'emotion'], axis=1, inplace=True)
df_emo_answers_free_long_m.rename(columns={"emotion_txt": "emotion"}, inplace=True)
df_emo_answers_free_long_m.head()

Unnamed: 0,participantId,photoId,ethnicity,sex,age,label,url,emotion
0,0,Q2.1_1,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,upset
1,0,Q118.1_1,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,afraid
2,0,Q118.1_2,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,upset
3,0,Q119.1_1,white,female,adult,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,surprised
4,0,Q120.1_1,white,female,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,neutral


## Sentiment score

In [79]:
## compute sentiment score for each word
df_emo_answers_free_long_m['sentimentScore'] = df_emo_answers_free_long_m['emotion'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [80]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,ethnicity,sex,age,label,url,emotion,sentimentScore
0,0,Q2.1_1,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,upset,-0.3818
1,0,Q118.1_1,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,afraid,0.0


## Center predictors

In [81]:
df_emo_answers_free_long_m['sexC'] = df_emo_answers_free_long_m['sex'].replace({'female': -0.5, 'male': 0.5})
df_emo_answers_free_long_m['ageC'] = df_emo_answers_free_long_m['age'].replace({'child': -0.5, 'adult': 0.5})
df_emo_answers_free_long_m['ethnicityC'] = df_emo_answers_free_long_m['ethnicity'].replace({'bipoc': -0.5, 'white': 0.5})

In [82]:
df_emo_answers_free_long_m.head(2)

Unnamed: 0,participantId,photoId,ethnicity,sex,age,label,url,emotion,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1_1,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,upset,-0.3818,-0.5,0.5,-0.5
1,0,Q118.1_1,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,afraid,0.0,0.5,-0.5,-0.5


In [83]:
df_emo_answers_free_long_m_fil = df_emo_answers_free_long_m[df_emo_answers_free_long_m['label'] != 'attention']
df_emo_answers_free_long_m_fil = df_emo_answers_free_long_m_fil.reset_index(drop=True)

In [84]:
df_emo_answers_free_long_m_fil.head(2)

Unnamed: 0,participantId,photoId,ethnicity,sex,age,label,url,emotion,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1_1,bipoc,female,adult,enfado,https://uwmadison.co1.qualtrics.com/ControlPan...,upset,-0.3818,-0.5,0.5,-0.5
1,0,Q118.1_1,bipoc,male,child,sorpresa,https://uwmadison.co1.qualtrics.com/ControlPan...,afraid,0.0,0.5,-0.5,-0.5


In [85]:
df_emo_answers_free_long_m_fil.to_csv('../clean_data_mturk_espanol/free_labeling_emotion_mturk_long_format_lmer_espanol.csv', index=False)