In [32]:
import pandas as pd

In [33]:
df = pd.read_csv('../clean_data/forced_choice_emotion_uw_students.csv')
df_labels = pd.read_csv('../data/emotion_labels.csv')

In [34]:
df_emo_answers = df.loc[:, 'Q2.1':'Q195.1'] # subset photos

# Long format DF

In [35]:
df_emo_answers_long = df_emo_answers.reset_index()
df_emo_answers_long['index'] = df_emo_answers_long.index
df_emo_answers_long = df_emo_answers_long.rename(columns={'index':'participantId'})
df_emo_answers_long.head(2)

Unnamed: 0,participantId,Q2.1,Q3.1,Q4.1,Q5.1,Q6.1,Q7.1,Q8.1,Q9.1,Q10.1,...,Q186.1,Q187.1,Q188.1,Q189.1,Q190.1,Q191.1,Q192.1,Q193.1,Q194.1,Q195.1
0,0,Neutral,Anger,Anger,Anger,Anger,Anger,Anger,Happiness,Disgust,...,Surprise,Neutral,Happiness,Happiness,Other,Other,Uncertain,Uncertain,Other,Neutral
1,1,Anger,Anger,Anger,Sadness,Anger,Anger,Fear,Anger,Other,...,Surprise,Surprise,Happiness,Happiness,Disgust,Uncertain,Other,Anger,Uncertain,Uncertain


In [36]:
df_labels['photoId'] =  df_labels['photoId'].str.strip()
photoIds = df_labels['photoId'].tolist()

In [37]:
df_emo_answers_long = pd.melt(df_emo_answers_long, id_vars=['participantId'], value_vars=photoIds).reset_index(drop=True).sort_values(by=['participantId'])

In [38]:
df_emo_answers_long = df_emo_answers_long.rename(columns={'variable':'photoId', 'value':'emotion'})

In [39]:
df_emo_answers_long_m = pd.merge(df_emo_answers_long, df_labels, how="left", on=["photoId"], validate="many_to_one")

In [40]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...


# Sentiment score

In [41]:
from nltk.sentiment import SentimentIntensityAnalyzer

In [42]:
sia = SentimentIntensityAnalyzer()

In [43]:
df_emo_answers_long_m['sentimentScore'] = df_emo_answers_long_m['emotion'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [44]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574


# LMER

## Center predictors

In [45]:
df_emo_answers_long_m['sexC'] = df_emo_answers_long_m['sex'].replace({'female': -0.5, 'male': 0.5})
df_emo_answers_long_m['ageC'] = df_emo_answers_long_m['age'].replace({'child': -0.5, 'adult': 0.5})
df_emo_answers_long_m['ethnicityC'] = df_emo_answers_long_m['ethnicity'].replace({'bipoc': -0.5, 'white': 0.5})

In [46]:
df_emo_answers_long_m.head(2)

Unnamed: 0,participantId,photoId,emotion,ethnicity,sex,age,label,url,sentimentScore,sexC,ageC,ethnicityC
0,0,Q2.1,Neutral,bipoc,female,adult,anger,https://uwmadison.co1.qualtrics.com/ControlPan...,0.0,-0.5,0.5,-0.5
1,0,Q163.1,Happiness,white,male,child,happiness,https://uwmadison.co1.qualtrics.com/ControlPan...,0.5574,0.5,-0.5,0.5


In [47]:
df_emo_answers_long_m_fil = df_emo_answers_long_m[df_emo_answers_long_m['label'] != 'attention']
df_emo_answers_long_m_fil = df_emo_answers_long_m_fil.reset_index(drop=True)

In [48]:
df_emo_answers_long_m_fil.to_csv('../clean_data/forced_choice_emotion_uw_students_long_format_lmer.csv', index=False)

## Run model

In [18]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tools.sm_exceptions import ConvergenceWarning

In [19]:
md = smf.mixedlm("sentimentScore ~  ethnicityC*sexC*ageC", df_emo_answers_long_m_fil, groups=df_emo_answers_long_m_fil["participantId"])

In [20]:
mdf = md.fit(method=["lbfgs"])



In [21]:
print(mdf.summary())

             Mixed Linear Model Regression Results
Model:             MixedLM  Dependent Variable:  sentimentScore
No. Observations:  9792     Method:              REML          
No. Groups:        51       Scale:               0.1682        
Min. group size:   192      Log-Likelihood:      inf           
Max. group size:   192      Converged:           Yes           
Mean group size:   192.0                                       
---------------------------------------------------------------
                     Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------------
Intercept             0.140                                    
ethnicityC            0.029    0.008  3.463 0.001  0.012  0.045
sexC                 -0.016    0.008 -1.932 0.053 -0.032  0.000
ethnicityC:sexC       0.041    0.017  2.465 0.014  0.008  0.073
ageC                 -0.030    0.008 -3.561 0.000 -0.046 -0.013
ethnicityC:ageC       0.040    0.017  2.416 0.016  0.

  sdf[0:self.k_fe, 1] = np.sqrt(np.diag(self.cov_params()[0:self.k_fe]))
