Combines the MC (multiple choice) and LIKERT (confidence coding/scale) into one file to compare difference in proportions of correct responses by MC and LIKERT. 
Using the downstream output file `mturk_hk_MC_LIKERT.csv` in Stata.

In [1]:
import pandas as pd
import janitor
import numpy as np

mturk_MC_data = "mturk_hk_MC.csv"
mturk_SCALE_data = "mturk_hk_LIKERT.csv"
savepath = "mturk_hk_MC_LIKERT.csv"

In [2]:
dfmc = (pd.read_csv(mturk_MC_data)
        .case_when(
               lambda df: df.questions=="aca_correct", "aca",
               lambda df: df.questions=="aca2_correct", "aca2",
               lambda df: df.questions=="gg_correct", "gg",
               lambda df: df.questions=="dt_correct", "dt",
               np.nan,
               column_name="question")
        .assign(condition="mc")
        .assign(correct=lambda df: df.responses)
        .remove_columns(column_names=['questions', 'responses'])
       )
dfmc.head()

Unnamed: 0,respondent,democrat,republican,independent,democrat_noleaners,democrat_leaners,republican_leaners,probe,congenial,response_probe,question,condition,correct
0,3,0.0,0.0,1.0,,,,open,,,aca,mc,0
1,3,0.0,0.0,1.0,,,,open,,0.0,aca2,mc,0
2,3,0.0,0.0,1.0,,,,open,,,gg,mc,0
3,3,0.0,0.0,1.0,,,,open,,1.0,dt,mc,1
4,4,1.0,0.0,0.0,1.0,1.0,0.0,closed,1.0,,aca,mc,0


In [3]:
dfscale = (pd.read_csv("mturk_hk_LIKERT.csv")
           .dropna(subset="congenial")
           .dropna(subset="scale_mc_c_10")
           .case_when(
               lambda df: df.questions=="rg_s_aca_3", "aca",
               lambda df: df.questions=="rg_s_aca2_3", "aca2",
               lambda df: df.questions=="rg_s_gg_4", "gg",
               lambda df: df.questions=="rg_s_dt_4", "dt",
               np.nan,
               column_name="question")
           .assign(condition="scale")
           .assign(correct=lambda df: df.scale_mc_c_10)
           .remove_columns(["questions", "responses", "scale_mc_c_10", 'scale_correct_10', 'scale_correct_7', 'scale_mc_c_10', 'scale_mc_c_7'])
          )
dfscale.head()

Unnamed: 0,respondent,democrat,republican,independent,democrat_noleaners,democrat_leaners,congenial,question,condition,correct
2,1,0.0,1.0,0.0,0.0,0.0,0.0,aca,scale,0.0
6,1,0.0,1.0,0.0,0.0,0.0,0.0,aca2,scale,0.0
11,1,0.0,1.0,0.0,0.0,0.0,0.0,gg,scale,0.0
19,1,0.0,1.0,0.0,0.0,0.0,1.0,dt,scale,0.0
42,6,0.0,1.0,0.0,0.0,0.0,0.0,aca,scale,0.0


In [4]:
df = (pd.concat([dfmc, dfscale], ignore_index=True)
      .assign(correct=lambda df: df.correct.apply(int))
     )

df.to_csv(savepath, index=False)
df

Unnamed: 0,respondent,democrat,republican,independent,democrat_noleaners,democrat_leaners,republican_leaners,probe,congenial,response_probe,question,condition,correct
0,3,0.0,0.0,1.0,,,,open,,,aca,mc,0
1,3,0.0,0.0,1.0,,,,open,,0.0,aca2,mc,0
2,3,0.0,0.0,1.0,,,,open,,,gg,mc,0
3,3,0.0,0.0,1.0,,,,open,,1.0,dt,mc,1
4,4,1.0,0.0,0.0,1.0,1.0,0.0,closed,1.0,,aca,mc,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3911,1054,1.0,0.0,0.0,1.0,1.0,,,0.0,,dt,scale,1
3912,1056,1.0,0.0,0.0,1.0,1.0,,,1.0,,aca,scale,0
3913,1056,1.0,0.0,0.0,1.0,1.0,,,1.0,,aca2,scale,1
3914,1056,1.0,0.0,0.0,1.0,1.0,,,1.0,,gg,scale,0
