In [74]:
import numpy as np
import pandas as pd

In [75]:
df = pd.read_csv("LoP_WordGroups.csv")
# First exclude all the preview responses and p's who didn't finish
df = df.loc[(df.Finished == "1") & (df.DistributionChannel != "preview")]
# Now exclude anyone who didn't answer yes to all the consent questions
df = df.loc[(df.Q5_1 == "1") & (df.Q5_2 == "1") & (df.Q5_3 == "1") & (df.Q5_4 == "1")]
len(df)

203

In [76]:
# Now exclude everybody who failed the attention checks
imc = np.vectorize(lambda s: "ATTENTION" in s)
df = df.loc[imc(df.Q1541.values)]

In [77]:
len(df)

170

In [78]:
# Include measures of party identity and engagement to speculate on whether or not signal
# recovery is a top down (party identity) or bottom-up (political engagement) process
df = df[["1","2","Q1557","4","5","1.1","2.1","3","4.1","Q1442",
         "list1_party","list2_party","list3_party","list4_party",
         "list5_party","list1","list2","list3","list4","list5",
         "Q612","Q613","Q1430","Q610","Q611","Q619"]]
df.rename(columns = { "Q1557": "3", "3": "3.1", "Q1442": "5.1" }, 
          inplace = True)
df1 = df[["1","2","3","4","5"]]
df2 = df[["1.1","2.1","3.1","4.1","5.1"]]
df2.replace({ "1": "6.1", "2": "5.1", "3": "4.1", "4": "3.1", 
              "5": "2.1", "6": "1.1" }, inplace = True)
df2.replace({ "1.1": "1", "2.1": "2", "3.1": "3", "4.1": "4", 
              "5.1": "5", "6.1": "6" }, inplace = True)
df2.rename(columns = { "1.1": "1", "2.1": "2", "3.1": "3", "4.1": "4", 
                       "5.1": "5", "6.1": "6" }, inplace = True)
df_ = df1.fillna(df2)
df_[["list1_party","list2_party","list3_party","list4_party",
     "list5_party","list1","list2","list3","list4","list5","Q612","Q613","Q1430",
     "Q610","Q611","Q619"]] = \
    df[["list1_party","list2_party","list3_party","list4_party",
        "list5_party","list1","list2","list3","list4","list5","Q612","Q613","Q1430",
        "Q610","Q611","Q619"]]
df_.rename(columns = { "Q612": "party", "Q613": "party_identity", 
                       "Q1430": "political_engagement", "Q610": "age", "Q611": "gender",
                       "Q619": "voted" }, inplace = True)
recode = lambda x: int(x)-4 if x != "-99" else np.nan
df_.party_identity = map(recode, df_.party_identity)
df_.political_engagement = map(recode, df_.political_engagement)

In [80]:
df_.to_csv("responses.csv")

In [81]:
len(df_.loc[df_.party == "2"]), len(df_.loc[df_.party == "1"])

(78, 37)

Get valence data.

In [82]:
valence_df = pd.read_csv("LoP_ValenceRatings.csv")
# First exclude all the preview responses and p's who didn't finish
valence_df = valence_df.loc[(valence_df.Finished == "TRUE") & 
                            (valence_df.DistributionChannel != "preview")]
# Now exclude anyone who didn't answer yes to all the consent questions
valence_df = valence_df.loc[(valence_df.Q5_1 == "Yes") & 
                            (valence_df.Q5_2 == "Yes") & 
                            (valence_df.Q5_3 == "Yes") & 
                            (valence_df.Q5_4 == "Yes")]
len(valence_df)

41

In [83]:
# Now exclude everybody who failed the attention checks
imc = np.vectorize(lambda s: "ATTENTION" in s)
valence_df = valence_df.loc[imc(valence_df.Q1440.values)]

In [84]:
len(valence_df)

26

In [85]:
valence_df.rename(columns = dict(zip(map(str, range(1,26)), 
                                     range(1,26))), inplace = True)
# Recode everything in reverse for interpretability 
# (higher numbers == higher valence)
valence_df.replace(dict(zip(map(str, range(2,9)), [ 8,7,6,5,4,3,2 ])), 
                   inplace = True)
valence_df.replace({ "Happy 1": 9, "Unhappy 9": 1 }, inplace = True)

In [86]:
words = pd.read_csv("../study-3a/synonyms.csv")
valence_data = words.loc[range(7) + range(8,26)][["D","R"]]
valence_data["D_valence"] = np.full((25,), np.nan)
valence_data["R_valence"] = np.full((25,), np.nan)
valence_data.reset_index(inplace = True)
valence_data.index = valence_data.index + 1

In [87]:
for i in range(1,26):
    dratings = valence_df[i][valence_df["pair{}".format(i)] == valence_data.loc[i]["D"]]
    rratings = valence_df[i][valence_df["pair{}".format(i)] == valence_data.loc[i]["R"]]
    valence_data.loc[(i,"D_valence")] = np.mean(dratings)
    valence_data.loc[(i,"R_valence")] = np.mean(rratings)

In [88]:
valence_data.to_csv("valence_data.csv")