In [2]:
import os
import pandas as pd
import pickle
from pyspan.config import *
# The version of the Purger at commit b5a767 was used to generate these stimuli. For reproducibility, I've included a
# copy of that version of the Purger module in the study directory.
from Purger import Purger

Since these surveys were designed, we've made a couple changes to the PKL calculations (see changes.md). The old calculations are saved to make the surveys reproducible.

In [22]:
# Load data
with open("version_used_for_LoP_Ratings_2_partial_kls-unigrams", "rb") as rfh:
    df = pickle.load(rfh)

In [23]:
# Sort data
dsorted = df.drop(columns=["rmetric"]).sort_values(["dmetric"], ascending=False)
rsorted = df.drop(columns=["dmetric"]).sort_values(["rmetric"], ascending=False)

In [24]:
purger = Purger(years = range(2012, 2018), overwrite=True)

In [25]:
# Remove stopwords
dsorted = dsorted[dsorted.index.map(lambda w: w not in purger.stopwords)]
rsorted = rsorted[rsorted.index.map(lambda w: w not in purger.stopwords)]

In [26]:
# Remove procedural terms
fn = "categorized"
categorized = pickle.load(open(fn, "rb"))

In [27]:
exclude = [ w for w in categorized.index if categorized.loc[w]["class"] == "P" ]

In [28]:
exclude

['consent',
 'workers',
 'extraneous',
 'material',
 'senate',
 'house',
 'subcommittee',
 'funding',
 'caucus',
 'infrastructure',
 'members',
 'consideration',
 'act',
 'revise',
 'suspend',
 'vote',
 'legislative',
 'working',
 'support',
 'friend',
 'colleagues',
 'rights',
 'budget',
 'unanimous',
 'ms',
 'balance',
 'tax',
 'middle',
 'administration',
 'majority',
 'may',
 'time',
 'remarks',
 'rules',
 'rule',
 'voting',
 'reserve']

In [29]:
dsorted = dsorted[dsorted.index.map(lambda w: w not in exclude)]
rsorted = rsorted[rsorted.index.map(lambda w: w not in exclude)]

In [30]:
dsorted[:39]

Unnamed: 0_level_0,dmetric
term,Unnamed: 1_level_1
bill,0.000502
women,0.000454
republicans,0.000432
health,0.000382
public,0.000306
million,0.000298
cuts,0.000287
care,0.000256
americans,0.000251
gun,0.00025


In [31]:
# We need 78 words (80 to test, minus 2 for the attention check)
words = list(dsorted[:39].index) + list(rsorted[:39].index)

In [32]:
#with open("to_classify.txt", "w") as wfh:
#    wfh.write("\n".join([ w for w in words if w not in categorized.index ]))
assert all([ w in categorized.index for w in words ])

In [33]:
# Include opposite-valenced words
df_antonyms = pd.read_csv("version_used_for_LoP_Ratings_2_antonyms_modified_with-pkls2.csv")
antonyms = list(df_antonyms["0"][:10]) + list(df_antonyms["1"][:10])
words += antonyms

In [34]:
assert len(words) == 98
assert len(set(words)) == 98

In [35]:
with open("survey_terms.txt", "w") as wfh:
    wfh.write("\n".join(words))

In [36]:
# Instructions to be included at the top of every page
instructions = """

LOOPN. For each word, imagine you were watching a congressional debate on C-Span and you heard that word.  Please estimate how likely it is that the word is spoken either by a ${e://Field/Condition}.

"""

# Response options
options = {
    
    "republican":"""I am <strong>almost certain</strong> the speaker is a <strong>Republican</strong>.
I am <strong>reasonably sure</strong> the speaker is a <strong>Republican</strong>.
I am <strong>unsure but think that</strong> the speaker is a <strong>Republican</strong>.
I am <strong>unsure but think that</strong> the speaker is a <strong>Democrat</strong>.
I am <strong>reasonably sure</strong> the speaker is a <strong>Democrat</strong>.
I am <strong>almost certain</strong> the speaker is a <strong>Democrat</strong>.""",

    "democrat":"""I am <strong>almost certain</strong> the speaker is a <strong>Democrat</strong>.
I am <strong>reasonably sure</strong> the speaker is a <strong>Democrat</strong>.
I am <strong>unsure but think that</strong> the speaker is a <strong>Democrat</strong>.
I am <strong>unsure but think that</strong> the speaker is a <strong>Republican</strong>.
I am <strong>reasonably sure</strong> the speaker is a <strong>Republican</strong>.
I am <strong>almost certain</strong> the speaker is a <strong>Republican</strong>."""
}

In [37]:
def write_block(party):
    wfh = open("survey_{}_first.txt".format(party), "w")
    atc = open("atc_{}_first.txt".format(party), "w")
    wfh.write("[[Block:Words]]")
    for i, word in enumerate(words):
        if i%20 == 0:
            wfh.write(instructions.replace("LOOPN", str(1000+i)))
        wfh.write("""

{}. {}

{}

""".format(i+1, word, options[party]))
        atc.write("""

{}. {}

{}

""".format(i+99, word, options[party]))

    wfh.write("""

98.1 BREAK

""")

    wfh.close()
    atc.close()

    # Merge attention checks with main blocks
    os.system("cat atc_{}_first.txt >> survey_{}_first.txt".format(party, party))

In [38]:
for party in ("democrat", "republican"):
    write_block(party)