In [5]:
import numpy as np
import pandas as pd

In [6]:
# Load data
df_ = pd.read_csv("LoP_Synonyms.csv")
df = df_[2:]

In [7]:
# Check if anyone replied "no" to any of the questions on the consent form
pd.concat([ df[df["Q5_1"] != "Yes"], df[df["Q5_2"] != "Yes"], 
            df[df["Q5_3"] != "Yes"], df[df["Q5_4"] != "Yes"] ])

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration (in seconds),Finished,RecordedDate,DistributionChannel,UserLanguage,Q5_1,...,pair17,pair18,pair19,pair20,pair21,pair22,pair23,pair24,pair25,pair26


In [8]:
# See if anyone didn't finish the survey
unfinished = df.loc[df["Finished"] == "FALSE"]
print "Excluding some participants"*(not unfinished.empty)
df = df.loc[df["Finished"] == "TRUE"]
print len(df)


202


In [9]:
# Combine "Democrat first" and "Republican first" conditions
df1 = df[map(str, range(1, 27))]
df2 = df[map(lambda n: str(n+.1), range(1, 27))]
df2.rename(columns = dict([ (str(i+.1), str(i)) for i in range(1, 27) ]), inplace = True)
df__ = df1.fillna(df2)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [10]:
code = { "I am almost certain the speaker is a Democrat.": 0,
         "I am reasonably sure the speaker is a Democrat.": 1,
         "I am unsure but think that the speaker is a Democrat.": 2,
         "I am unsure but think that the speaker is a Republican.": 3,
         "I am reasonably sure the speaker is a Republican.": 4,
         "I am almost certain the speaker is a Republican.": 5,
         "-99": np.nan
       }

In [11]:
df__ = df__.replace(code)

In [12]:
minidf = df__[map(str, range(1, 27))]
minidf.rename(columns = dict(zip(map(str, range(1, 27)), range(26))), inplace = True)

In [13]:
# Helper functions for cleaning demographic responses
def int_(s):
    if s == "-99":
        return np.nan
    return int(s)

def get_gender(row):
    gender = row["Q611"]
    if "f" in gender.lower():
        return "F"
    if "m" in gender.lower():
        return "M"
    return np.nan

def get_party_identity(row):
    pi = row["Q613"]
    if pi == "-99":
        return np.nan
    return int(pi[0])-4
    
def get_political_engagement(row):
    pe = row["Q1430"]
    if pe == "-99":
        return np.nan
    return int(pe[0])-4
    
def get_education(row):
    edu = row["Q617"]
    if edu == "-99":
        return np.nan
    return {
        "Some high school (no degree)": 0,
        "High school": 1,
        "Some college (no degree)": 2,
        "Associate's/professional/vocational degree": 3,
        "Bachelor's degree": 4,
        "Master's degree": 5,
        "Higher-level graduate degree": 6,
        "Other": np.nan
    }[edu]

def get_voted(row):
    voted = row["Q619"]
    if voted == "Yes":
        return 1
    if voted == "No":
        return 0
    return np.nan

In [14]:
# Add demographics
minidf["age"] = map(int_, df["Q610"])
minidf["gender"] = df["Q611"].replace("-99", np.nan)
minidf["party"] = df["Q612"].replace("-99", np.nan)
minidf["party_identity"] = df.apply(get_party_identity, axis = 1)
minidf["political_engagement"] = df.apply(get_political_engagement,
                                          axis = 1)
minidf["education"] = df.apply(get_education, axis = 1)
minidf["voted"] = df.apply(get_voted, axis = 1)
minidf["political_bubble"] = map(int_, df["Q620_4"])
minidf["atn"] = df["Q1440"]
minidf[map(lambda i: "pair{}".format(i), range(26))] = df[map(lambda i: "pair{}".format(i), range(1,27))]

# Participant-level exclusions

We use a version of the instructional manipulation check (Oppenheimer et al., 2009) as an attention check. We will exclude from analysis all participants who fail the attention check. We will also exclude data from participants who make a judgment about 19 or fewer of the 26 items.

Exclusion based on the instructional manipulation check

In [15]:
imc = np.vectorize(lambda s: "ATTENTION" in s)
minidf = minidf.loc[imc(minidf["atn"])]
print len(minidf)

174


Exclusion based on missing data.

In [16]:
minidf = minidf.loc[np.apply_along_axis(lambda s: len(s[~np.isnan(s)]) > 19, 1, minidf[range(26)])]
print len(minidf)

174


In [17]:
minidf.to_csv("responses.csv")