Note: "(1) Process Data" cannot be run publicly as it relies on Prolific data that exposes user IDs. Instead, the output of this file is already included in the repo, so users can skip to the next file.

## Libraries

In [22]:
import pandas as pd
import numpy as np

from mlgear.utils import show, display_columns
from surveyweights import run_weighting_iteration, run_weighting_scheme, normalize_weights
from survey_dud_detector import detect_straightlining, detect_low_incidence


def is_valid_prolific_id(pid):
    if len(pid) != 24:
        return False
    try:
        pid = int(pid, 16)
        return True
    except:
        return False


def sjoin(x):
    return ';'.join(x[x.notnull()].astype(str))


def transform_age(age):
    if age < 18:
        return 'Under 18'
    if age < 24:
        return '18-24'
    if age < 45:
        return '25-44'
    if age < 64:
        return '45-64'
    return '65+'


def transform_income(inc):
    if inc == 'Between $30,000 and $49,999' or inc == 'Between $15,000 and $29,999':
        return 'Between $15,000 and $49,999'
    else:
        return inc


def transform_education(educ):
    if educ in ['1st grade', '2nd grade', '3rd grade', '4th grade', '5th grade', '6th grade', '7th grade',
                '8th grade', '9th grade', '10th grade', '11th grade', 'Did not attend school']: 
        return 'Less than high school'
    if educ in ['1 year of college', '2 years of college', '3 years of college']:
        return 'Some college, no degree'
    if educ == 'Some graduate school':
        return 'Graduated from college'
    return educ


def transform_race(race):
    if race in ['Another race', 'American Indian or Alaska Native', 'Native Hawaiian or other Pacific Islander']:
        return 'Other'
    else:
        return race


def transform_region(state):
    if state in ['Illinois', 'Indiana', 'Iowa', 'Michigan', 'Minnesota', 'Ohio', 'Pennsylvania', 'Wisconsin']:
        return 'Midwest'
    elif state in ['Alaska', 'Idaho', 'Kansas', 'Montana', 'Nebraska', 'North Dakota', 'South Dakota',
                   'Utah', 'Wyoming', 'Oklahoma']:
        return 'Mountains'
    elif state in ['Connecticut', 'Delaware', 'District of Columbia (DC)', 'Maine', 'Maryland', 'Massachusetts',
                   'New Hampshire', 'New Jersey', 'New York', 'Rhode Island', 'Vermont']:
        return 'Northeast'
    elif state in ['California', 'Hawaii', 'Oregon', 'Washington', 'Guam', 'Puerto Rico', 'Virgin Islands']:
        return 'Pacific'
    elif state in ['Missouri', 'Tennessee', 'Alabama', 'Arkansas', 'Kentucky', 'Louisiana', 'Mississippi',
                   'Texas', 'Virginia', 'West Virginia']:
        return 'South'
    elif state in ['Arizona', 'Colorado', 'Nevada', 'New Mexico']:
        return 'Southwest'
    elif state in ['Florida', 'Georgia', 'North Carolina', 'South Carolina']:
        return 'Southeast'
    else:
        return 'Other'


def transform_2016_vote(vote):
    vote = vote.split(',')[0]
    if vote in ['Gary Johnson', 'Jill Stein', 'Another candidate']:
        vote = 'Other'
    return vote


def transform_gss_trust(trust):
    if trust == 'You can’t be too careful':
        return 'Can\'t be too careful'
    elif trust == 'Most people can be trusted':
        return 'Can trust'
    else:
        return trust
    

def transform_gss_bible(bible):
    if bible == 'The Bible is an ancient book of fables, legends, history, and moral precepts recorded by man':
        return 'Book of fables'
    elif bible == 'The Bible is the actual word of God and it is to be taken literally, word for word':
        return 'Word of God'
    elif bible == 'The Bible is the inspired word of God but not everything should be taken literally, word for word':
        return 'Inspired word'
    else:
        return bible
    
    
def simplify_likert(likert):
    likert = likert.replace('’', '\'')
    if likert == 'Strongly agree':
        return 'Agree'
    elif likert == 'Strongly disagree':
        return 'Disagree'
    elif likert == 'Neither agree or disagree':
        return 'Don\'t know'
    elif likert == 'Neither agree nor disagree':
        return 'Don\'t know'
    else:
        return likert

## Load Survey

In [23]:
survey = pd.read_csv('responses.csv')
show(survey)

          Respondent ID     Collector ID              Start Date  \
0                   nan              nan                     NaN   
1    12094965161.000000 395072413.000000  10/20/2020 09:03:45 PM   
2    12094897567.000000 395072413.000000  10/20/2020 11:00:25 PM   
3    12094877530.000000 395072413.000000  10/20/2020 10:46:36 PM   
4    12094874451.000000 395072413.000000  10/20/2020 10:44:17 PM   
...                 ...              ...                     ...   
4726 12093881035.000000 395072413.000000  10/20/2020 03:43:10 PM   
4727 12093867709.000000 395072413.000000  10/20/2020 03:38:39 PM   
4728 12093867293.000000 395072413.000000  10/20/2020 03:37:18 PM   
4729 12093854779.000000 395072413.000000  10/20/2020 03:34:04 PM   
4730 12093809270.000000 395072413.000000  10/20/2020 03:18:53 PM   

                    End Date      IP Address  Email Address  First Name  \
0                        NaN             NaN            nan         nan   
1     10/20/2020 11:54:19 PM     

In [24]:
variable_map = {'prolific_id': 'What is your Prolific ID?',
                'lv_thought': 'How much thought have you given to the upcoming election?',
                'lv_registered': 'Are you registered to vote in the 2020 election?',
                'lv_plan_vote': 'Do you plan to vote in the 2020 election?',
                'lv_already_voted': 'Have you already voted in the 2020 election?',
                'vote_method_plan': 'How are you planning to vote?',
                'lv_likely': 'How likely are you to vote?',
                'vote_method': 'How did you vote?',
                'p5_condition': "Suppose the following measure was on the ballot in your state. The proposed measure would issue a $5.5 billion general obligation bond for the state's stem cell research institute and make changes to the institute's governance structure and programs.Supporters of the initiative argue that such a bond could lead to research that might produce breakthroughs in the quality of life of everyday Americans.Opponents of the initiative argue that such money is ultimately wasted on ineffective treatments.If the election were held today, would you vote for or against this ballot measure?",
                'p5_vote': "Suppose the following measure was on the ballot in your state. The proposed measure would issue a $5.5 billion general obligation bond for the state's stem cell research institute and make changes to the institute's governance structure and programs.Supporters of the initiative argue that such a bond could lead to research that might produce breakthroughs in the quality of life of everyday Americans.Opponents of the initiative argue that such money is ultimately wasted on ineffective treatments.If the election were held today, would you vote for or against this ballot measure?.1",
                'p6_condition': "Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to allow people on parole for felony convictions to vote.Supporters of the initiative argue that felons who have completed their sentences have already paid their debt to society and should be eligible to vote.Opponents of the initiative argue that by committing a felony one has given up one’s right to participate in the democratic process, and we should not be supporting their right to vote.If the election were held today, would you vote for or against this ballot measure?",
                'p6_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to allow people on parole for felony convictions to vote.Supporters of the initiative argue that felons who have completed their sentences have already paid their debt to society and should be eligible to vote.Opponents of the initiative argue that by committing a felony one has given up one’s right to participate in the democratic process, and we should not be supporting their right to vote.If the election were held today, would you vote for or against this ballot measure?.1',
                'p7_condition': 'Suppose the following measure was on the ballot in your state. The proposed measure would allow the manufacture, delivery, administration of psilocybin at supervised, licensed facilities.Supporters of the initiative argue that such a measure would help alleviate the growing mental health crisis by providing another safe and effective treatment to be given by trained facilitators to treat depression, anxiety, and trauma.Opponents of the initiative argue that there already are effective treatments available and such a measure would needlessly introduce a potentially dangerous hallucinogenic drug into our community.If the election were held today, would you vote for or against this ballot measure?',
                'p7_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would allow the manufacture, delivery, administration of psilocybin at supervised, licensed facilities.Supporters of the initiative argue that such a measure would help alleviate the growing mental health crisis by providing another safe and effective treatment to be given by trained facilitators to treat depression, anxiety, and trauma.Opponents of the initiative argue that there already are effective treatments available and such a measure would needlessly introduce a potentially dangerous hallucinogenic drug into our community.If the election were held today, would you vote for or against this ballot measure?.1',
                'p8_condition': 'Suppose the following measure was on the ballot in your state. The proposed measure would require the Parks and Wildlife Commission to create a plan to reintroduce and manage gray wolves on designated lands by the end of 2023.Supporters of the initiative argue these measures have worked in the past to improve the quality of the local ecosystem.Opponents of the initiative argue such a measure would create significant problems to local livestock and harm outdoor recreational activities.If the election were held today, would you vote for or against this ballot measure?',
                'p8_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would require the Parks and Wildlife Commission to create a plan to reintroduce and manage gray wolves on designated lands by the end of 2023.Supporters of the initiative argue these measures have worked in the past to improve the quality of the local ecosystem.Opponents of the initiative argue such a measure would create significant problems to local livestock and harm outdoor recreational activities.If the election were held today, would you vote for or against this ballot measure?.1',
                'p9_condition': "Suppose the following measure was on the ballot in your state. The proposed measure would require a search warrant to access a person's electronic data and electronic communications.Supporters of the initiative argue that such a measure would ensure that people’s privacy is adequately protected from police overreach.Opponents of the initiative argue that such a measure would prevent police from being able to solve crimes in a timely manner and may let some guilty people go free.If the election were held today, would you vote for or against this ballot measure?",
                'p9_vote': "Suppose the following measure was on the ballot in your state. The proposed measure would require a search warrant to access a person's electronic data and electronic communications.Supporters of the initiative argue that such a measure would ensure that people’s privacy is adequately protected from police overreach.Opponents of the initiative argue that such a measure would prevent police from being able to solve crimes in a timely manner and may let some guilty people go free.If the election were held today, would you vote for or against this ballot measure?.1",
                'vote_measure_100pct_income_tax': 'Suppose the following measure was on the ballot in your state. The proposed measure would abolish the sales tax but tax all income at 100%, such that any money earned would be taxed by the state.Supporters of the initiative argue that such a measure would help alleviate important budget shortfalls and provide much needed funding to education.Opponents of the initiative argue that such a tax is ludicrous, would completely destroy the economy, and impoverish every citizen as effectively no one could earn money.If the election were held today, would you vote for or against this ballot measure?',
                'p11_condition': 'Suppose the following measure was on the ballot in your state. The proposed measure would adding language to the state Constitution stating that “individuals have a right to stand their ground and may use deadly force in self-defense or in defense of another person”.Supporters of the initiative argue that such a measure would allow people to better protect their homes and avoid being criminalized for self-defense.Opponents of the initiative argue that such a measure would be irresponsible and reckless, leading to a lot of unnecessary death as people take the law into their own hands rather than call the police.If the election were held today, would you vote for or against this ballot measure?',
                'p11_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would adding language to the state Constitution stating that “individuals have a right to stand their ground and may use deadly force in self-defense or in defense of another person”.Supporters of the initiative argue that such a measure would allow people to better protect their homes and avoid being criminalized for self-defense.Opponents of the initiative argue that such a measure would be irresponsible and reckless, leading to a lot of unnecessary death as people take the law into their own hands rather than call the police.If the election were held today, would you vote for or against this ballot measure?.1',
                'vote2016': 'In the 2016 Presidential election, who did you vote for?',
                'vote_trump_biden': 'In the 2020 Presidential election, who do you intend to vote for?',
                'vote_trump_biden2': 'In the 2020 Presidential election, who did you vote for?',
                'vote_senate': 'In the 2020 election, who do you plan to vote for Senate?',
                'vote_senate2': 'In the 2020 election, who did you vote for Senate?',
                'vote_rep': 'In the 2020 election, who do you plan to vote for Representative in your Congressional district?',
                'vote_rep2': 'In the 2020 election, who did you votefor Representative in your Congressional district?',
                'agree_flies_animal_feed': 'How much do you agree or disagree with the following?', # Flies are a suitable source of protein for use in animal feed
                'agree_eat_animals_fed_insects': 'Unnamed: 38', # I would eat meat if I knew it the animal had been fed insects as part of its feed
                'agree_eat_insects': 'Unnamed: 39', # I would never personally eat insects
                'agree_flies_animal_feed_alt': 'How much do you agree or disagree with the following?.1', # It is acceptable to feed insects to animals that are raised for food
                'agree_eat_animals_fed_insects_alt_reverse': 'Unnamed: 41', #I would not be comfortable eating meat from an animal that was raised on insect feed
                'agree_insect_feed_harms_health': 'Unnamed: 42', # Eating meat from an animal that was raised on feed made from insects poses a risk to human health
                'agree_farming_harms_environment': 'How much do you agree or disagree with the following?.2', # Livestock farming has a negative impact on the environment
                'agree_farming_climate_change': 'Unnamed: 44', # There is a direct connection between climate change and livestock farming
                'agree_farming_covid': 'Unnamed: 45', # There is a direct connection between disease outbreaks like COVID-19 and livestock farming
                'agree_plant_based_is_meat': 'How much do you agree or disagree with the following?.3', # “Plant-based sausage” contains meat from animals
                'agree_stop_buying_if_reneg': 'Unnamed: 47', # If a restaurant or grocery made a public promise to stop using eggs from hens confined in cages, and then that company broke its promise, I would stop buying products from that company
                'agree_vote_cand_oppose_abortion': 'How much do you agree or disagree with the following?.4', # I would be more likely to vote for a candidate if I knew they opposed abortion
                'agree_vote_cand_gun_control': 'Unnamed: 49', # I would be more likely to vote for a candidate if I knew they supported gun control
                'agree_vote_cand_animal_welfare': 'Unnamed: 50', # I would be more likely to vote for a candidate if I knew they supported animal welfare
                'agree_vote_cand_reg_farming': 'How much do you agree or disagree with the following?.5', # I would be more likely to vote for a candidate if I knew they wanted to regulate factory farming
                'agree_vote_veg': 'Unnamed: 52', # I would be more likely to support a candidate if I knew they were vegetarian
                'agree_vote_gay': 'Unnamed: 53', # I would be more likely to support a candidate if I knew they were gay
                'agree_vote_atheist': 'Unnamed: 54', # I would be more likely to support a candidate if I knew they were an atheist
                'agree_pain_dogs': 'How much do you agree or disagree with the following?.6', # Dogs are capable of feeling pain
                'agree_pain_pigs': 'Unnamed: 56', # Pigs are capable of feeling pain
                'agree_pain_chickens': 'Unnamed: 57', # Chickens are capable of feeling pain
                'agree_pain_fish': 'Unnamed: 58', # Fish are capable of feeling pain
                'agree_pain_honeybees': 'Unnamed: 59', # Honeybees are capable of feeling pain
                'agree_pain_termites': 'Unnamed: 60', # Termites are capable of feeling pain
                'agree_pain_ants': 'Unnamed: 61', # Ants are capable of feeling pain
                'gss_trust': "Generally speaking, would you say that most people can be trusted or that you can't be too careful in dealing with people?",
                'gss_bible': 'Which of these statements comes closest to describing your feelings about the Bible?',
                'gss_spanking': 'How much do you agree or disagree with the following?.7', # It is sometimes necessary to discipline a child with a good, hard spanking
                'agree_birth_control_immoral': 'Unnamed: 65', # Birth control is morally wrong
                'agree_11hrs_sleep': 'Unnamed: 66', # I sleep more than 11 hours a night on average
                'agree_40hrs_work': 'Unnamed: 67', # I work more than 40 hours a week on average
                'agree_puerto_rico': 'How much do you agree or disagree with the following?.8', # Puerto Rico should be a US state
                'agree_socialists_no_teach': 'Unnamed: 69', # Socialists should not be allowed to teach in public schools
                'agree_immigrants_work': 'Unnamed: 70', # Immigrants work hard in America
                'agree_need_more_equal_rights': 'Unnamed: 71', # Our country hasn't gone far enough in giving blacks equal rights with whites
                'ffq_chicken': 'Which of the following types of food have you eaten in the past month? Please select all that apply.', # Chicken (fried chicken, in soup, grilled chicken, etc.)
                'ffq_turkey': 'Unnamed: 73', # Turkey (turkey dinner, turkey sandwich, in soup, etc.)
                'ffq_pork': 'Unnamed: 74', # Pork (ham, pork chops, ribs, etc.)
                'ffq_beef': 'Unnamed: 75', # Beef (steak, meatballs, in tacos, etc.)
                'ffq_fish': 'Unnamed: 76', # Fish (salmon, tuna salad, fish and chips, etc.)
                'ffq_seafood': 'Unnamed: 77', # Other seafood (shrimp, crab, mussels, etc.)
                'ffq_other_meat': 'Unnamed: 78', # Other meat (duck, lamb, goat, venison, etc.)
                'ffq_dairy': 'Unnamed: 79', # Dairy products (cheese, milk, yogurt, etc.)
                'ffq_eggs': 'Unnamed: 80', # Eggs (omelet, in salad, in baked goods, etc.)
                'ffq_none': 'Unnamed: 81', # None of the above
                'news_wsj': 'Which of these, if any, do you read, listen, or watch for news?', # The Wall Street Journal
                'news_daily_wire': 'Unnamed: 83', # The Daily Wire
                'news_npr': 'Unnamed: 84', # NPR
                'news_guardian': 'Unnamed: 85', # The Guardian
                'news_msnbc': 'Unnamed: 86', # MSNBC
                'news_cnn': 'Unnamed: 87', # CNN
                'news_john_oliver': 'Unnamed: 88', # Last Week Tonight with John Oliver
                'news_steven_colbert': 'Unnamed: 89', # The Late Show with Steven Colbert
                'news_attention_check': 'Unnamed: 90', # The Current Show with Al Franken
                'news_nyt': 'Unnamed: 91', # The New York Times
                'news_atlantic': 'Unnamed: 92', # The Atlantic
                'news_fox': 'Unnamed: 93', # Fox News
                'news_wp': 'Unnamed: 94', # The Washington Post
                'news_none': 'Unnamed: 95', # None of the above
                'news_amount': 'How often do you read the news?',
                'social_fb': 'Which of these social media networks do you use?', # Facebook
                'social_twitter': 'Unnamed: 98', # Twitter
                'social_instagram': 'Unnamed: 99', # Instagram
                'social_tiktok': 'Unnamed: 100', # TikTok
                'social_pinterest': 'Unnamed: 101', # Pinterest
                'social_none': 'Unnamed: 102', # None of the above
                'gender': 'What is your gender?',
                'birth_year': 'In which year were you born? Please write your answer as 4 digits only',
                'race': 'What is your race?',
                'education': 'What is the highest level of education you have completed?',
                'state': 'In what state or U.S. territorydo you live in?',
                'income': 'What is your annual income?',
                'urban_rural': 'Which of the following best describes the area in which you live?',
                'honesty': 'How honestly have you answered these questions?'}

survey.columns = [c.replace('\xa0', '') for c in survey.columns]
variable_map = {v: k for k, v in variable_map.items()}
survey = survey[variable_map.keys()].rename(variable_map, axis=1)
survey = survey.drop(0)
show(survey)

                   prolific_id     lv_thought lv_registered lv_plan_vote  \
1     5f17dcdca4b99e18ed83100e    Quite a lot           Yes          Yes   
2     5f11d83ebe3dfb0abd270cfe    Quite a lot           Yes          Yes   
3     5f44c203ce8f190184ff6882    Quite a lot           Yes          Yes   
4     5f44c203ce8f190184ff6882    Quite a lot           Yes          Yes   
5     5f44c203ce8f190184ff6882    Quite a lot           Yes          Yes   
...                        ...            ...           ...          ...   
4726  5d4c7acd2799110019400b6a    Quite a lot            No           No   
4727  5ed014837725460d6f673cdd    Quite a lot           Yes          Yes   
4728  5f8f4708a8d10219158c8743    Quite a lot           Yes          Yes   
4729  5dc9a379d210e46baff956fb    Quite a lot           Yes          Yes   
4730                      test  Only a little            No           No   

     lv_already_voted           vote_method_plan    lv_likely vote_method  \
1         

In [25]:
measures = {'p5': {'ca_prop16_affirmative_action': 'Suppose the following measure was on the ballot in your state. The proposed measure would:<br>\n<ul>\n<li>Permit government decision-making policies to consider race, sex, color, ethnicity, or national origin to address diversity.</li>\n<li>Does not alter other state and federal laws guaranteeing equal protection and prohibiting unlawful discrimination.</li>\n</ul>\nSupporters of the initiative argue that including race in decision making is essential to ensure that all races are treated fairly.<br><br>Opponents of the initiative argue that truly fair decisions are race-blind and including race would only lead to unfair discrimination.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'ca_prop22_rideshare': 'Suppose the following measure was on the ballot in your state. The proposed measure would define app-based transportation (rideshare) and delivery drivers as independent contractors and adopt labor and wage policies specific to app-based drivers and companies.<br><br>Supporters of the initiative argue that this measure would protect rideshare drivers and ensure easy access to affordable ridesharing.<br><br>Opponents of the initiative argue such a measure would just boost technology company profits and harm rideshare drivers by denying them access to important benefits such as paid time off.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'ca_prop14_stem_cell': "Suppose the following measure was on the ballot in your state. The proposed measure would\xa0 issue a $5.5 billion general obligation bond for the state's stem cell research institute and make changes to the institute's governance structure and programs.<br><br>Supporters of the initiative argue that such a bond could lead to research that might produce breakthroughs in the quality of life of everyday Americans.<br><br>Opponents of the initiative argue that such money is ultimately wasted on ineffective treatments.<br><br>If the election were held today, would you vote for or against this ballot measure?",
                   'ca_prop15_property_tax': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to require commercial and industrial properties, except those zoned as commercial agriculture, to be taxed based on their market value, rather than their purchase price.<br><br>Supporters of the initiative argue that this measure would require large companies to pay their fair share of taxes.<br><br>Opponents of the initiative argue such a measure would hurt the economy by increasing tax burdens on small businesses.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'ca_prop21_rent_control': 'Suppose the following measure was on the ballot in your state. The proposed measure would:<br>\n<ul>\n<li>Amend state law to allow local governments to establish rent control on residential properties over 15 years old.</li>\n<li>Allow local limits on annual rent increases to differ from current statewide limit.</li>\n<li>Allow rent increases in rent-controlled properties of up to 15 percent over three years at start of new tenancy(above any increase allowed by local ordinance).</li>\n<li>Exempt individuals who own no more than two homes from new rent-control policies.</li>\n<li>Prohibit rent control from violating landlords’ right to fair financial return</li>\n</ul>\nSupporters of the initiative argue that such a measure would help reduce eviction and allow people to afford to live in their own homes.<br><br>Opponents of the initiative argue that such a measure would hurt the economy and prevent the money needed to ensure the development of affordable housing.<br><br>If the election were held today, would you vote for or against this ballot measure?'},
            'p6': {'ca_prop23_dialysis': 'Suppose the following measure was on the ballot in your state. The proposed measure would:<br>\n<ul>\n<li>Require at least one licensed physician on site during treatment at outpatient kidney dialysis clinics</li>\n<li>Authorize the state Department of Public Health to exempt clinics from this requirement if there is a shortage of qualified licensed physicians and the clinic has at least one nurse practitioner or physician assistant on site</li>\n<li>Require clinics to report dialysis-related infection data to state and federal governments.</li>\n<li>Prohibit clinics from closing or reducing services without state approval</li>\n<li>Prohibit clinics from refusing to treat patients based on the source of payment for care</li>\n</ul>\nSupporters of the initiative argue that such a measure would help protect the health of people on dialysis by guaranteeing them access for a licensed physician and increasing monitoring.<br><br>Opponents of the initiative argue that such a measure puts a dangerous and costly burden on dialysis clinics, potentially risking the lives of thousands of people who need dialysis by blocking them from accessing care.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'ca_prop25_bail': 'Suppose the following measure was on the ballot in your state. The proposed measure would:<br>\n<ul>\n<li>Replace the money bail system (for obtaining release from jail before trial) with a system based on a determination of public safety and flight risk</li>\n<li>Limit detention of a person in jail before trial for most misdemeanors</li>\n</ul>\nSupporters of the initiative argue that such a measure would help the thousands of innocent people each year who have to spend a year or more waiting for trial because they can’t afford bail.<br><br>Opponents of the initiative argue that such a measure would involve racially-biased computer algorithms\xa0 deciding who gets stuck in jail and who goes free, which is unfair.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'ca_prop17_felon_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to allow people on parole for felony convictions to vote.<br><br>Supporters of the initiative argue that felons who have completed their sentences have already paid their debt to society and should be eligible to vote.<br><br>Opponents of the initiative argue that by committing a felony one has given up one’s right to participate in the democratic process, and we should not be supporting their right to vote.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'ca_prop20_parole': 'Suppose the following measure was on the ballot in your state. The proposed measure would:<br>\n<ul>\n<li>Limit access to parole programs established for non-violent offenders who have completed the full term of their primary offense by eliminating eligibility for certain offenses.</li>\n<li>Change standards and requirements governing parole decisions under this program.</li>\n<li>Authorize felony charges for specified theft crimes currently chargeable only as misdemeanors, including some theft crimes where the value is between $250 and $950.</li>\n<li>Require persons convicted of specified misdemeanors to submit to collection of DNA samples for state database.</li>\n</ul>\nSupporters of the initiative argue that such a measure would ensure that allegedly “non-violent” but serious crimes, like rape, are adequately punished and not subject to parole.<br><br>Opponents of the initiative argue that such a measure would overburden our prison system and potentially limit access to parole for criminals who have served their time.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'ca_prop18_17yo_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to allow 17-year-olds who will be 18 at the time of the next general election to vote in primary elections and special elections.<br><br>Supporters of the initiative argue that such a measure would expand access to democracy to people who will be old enough to vote at the actual time of the election.<br><br>Opponents of the initiative argue that such a measure would be expanding voting access to people who are technically still children.<br><br>If the election were held today, would you vote for or against this ballot measure?'},
            'p7': {'psilocybin': 'Suppose the following measure was on the ballot in your state. The proposed measure would allow the manufacture, delivery, administration of psilocybin at supervised, licensed facilities.<br><br>Supporters of the initiative argue that such a measure would help alleviate the growing mental health crisis by providing another safe and effective treatment to be given by trained facilitators to treat depression, anxiety, and trauma.<br><br>Opponents of the initiative argue that there already are effective treatments available and such a measure would needlessly introduce a potentially dangerous hallucinogenic drug into our community.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'alt_protein_funding': "Suppose the following measure was on the ballot in your state. The proposed measure would\xa0 support early-stage academic research into 'alternative protein,' a field dedicated to creating meat using plants and cellular agriculture processes.<br><br>Supporters of the initiative argue that such a measure will create jobs and stimulate economic growth, while strengthening and securing the US food supply chain.<br><br>Opponents of the initiative argue such a measure would not be necessary because existing protein from animals is already healthy and cheap.<br><br>If the election were held today, would you vote for or against this ballot measure?",
                   'no_insect_feed': 'Suppose the following measure was on the ballot in your state. The proposed measure would make it illegal to use farmed insects as part of the feed for farmed animals.<br><br>Supporters of the initiative argue that using farmed insects for animal feed is disgusting and unnecessary, as existing animal feed is already efficient.<br><br>Opponents of the initiative argue that such a measure would hurt the environment as insect feed is more sustainable.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'no_labeling': 'Suppose the following measure was on the ballot in your state. The proposed measure would\xa0 prevent companies using terms such as "burger," "sausage," and "bacon" for food products that do not contain meat.<br><br>Supporters of the initiative argue that such a measure will avoid misleading consumers who may accidentally buy plant-based products thinking they contain meat.<br><br>Opponents of the initiative argue that consumers are not confused by these labels and that this measure is unnecessary, unconstitutional, and anti-free market.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'cage_free': 'Suppose the following measure was on the ballot in your state. The proposed measure would ensure that egg-laying hens, mother pigs, and calves used for veal would have enough room to stand up, lie down, turn around, and extend their limbs. It would also ensure that the eggs, pork, and veal sold in your state come from farms that meet this standard, including that the eggs would come from cage-free hens.<br><br>Supporters of the initiative argue confining an animal inside a tiny cage is cruel.<br><br>Opponents of the initiative argue such a measure would increase food prices.<br><br>If the election were held today, would you vote for or against this ballot measure?'},
            'p8': {'co_gray_wolf': 'Suppose the following measure was on the ballot in your state. The proposed measure would require the Parks and Wildlife Commission to create a plan to reintroduce and manage gray wolves on designated lands by the end of 2023.<br><br>Supporters of the initiative argue these measures have worked in the past to improve the quality of the local ecosystem.<br><br>Opponents of the initiative argue such a measure would create significant problems to local livestock and harm outdoor recreational activities.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'utah_hunt': 'Suppose the following measure was on the ballot in your state. The proposed measure would establish a constitutional right to hunt and fish.<br><br>Supporters of the initiative argue that such a measure would protect the state’s heritage of hunting and further safeguard people’s freedom from government overreach.<br><br>Opponents of the initiative argue such a measure would not make any difference in the state and would needlessly reduce Second Amendment rights merely to hunting and fishing.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'mt_firearms': 'Suppose the following measure was on the ballot in your state. The proposed measure would remove a local government unit’s power to regulate the carrying of permitted concealed weapons or to restrict the carrying of unconcealed firearms except in publicly owned and occupied buildings under the local government unit’s jurisdiction. It furthermore repeals a local government unit’s authority to prevent or suppress the possession of firearms by convicted felons, adjudicated mental incompetents, illegal aliens, and minors. Federal and other state firearm restrictions would remain unchanged, including for these individuals.<br><br>Supporters of the initiative argue that gun control is ineffective at deterring crime and only restricts people’s freedom to exercise their Second Amendment rights to protect themselves from harm.<br><br>Opponents of the initiative argue such a measure would represent overreach of the state government, preventing local governments from creating their own laws.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'al_citizen_voting': 'Suppose the following measure was on the ballot in your state. The proposed measure would clarify that the state constitution only permits US citizens to vote.<br><br>Supporters of the initiative argue that such a measure would help protect the vote against illegal voting.<br><br>Opponents of the initiative argue that this measure accomplishes nothing and that there already are effective safeguards in place to protect the vote.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'la_a1_abortion': 'Suppose the following measure was on the ballot in your state. The proposed measure would adding language to the state Constitution stating that “nothing in this constitution shall be construed to secure or protect a right to abortion or require the funding of abortion.”<br><br>Supporters of the initiative argue that such a measure would make it very clear that the state is opposed to abortion and help clear the pathway for more legislation to protect the rights of the unborn.<br><br>Opponents of the initiative argue that the state should not play more of a role in dictating women’s health choices and that existing laws are already clear enough on their own.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'primate_rights': 'Suppose the following measure was on the ballot in your state. The proposed measure would alter the state constitution to give a right to life for non-human primates, and a right to physical and mental integrity for non-human primates.<br><br>Supporters of the initiative argue that such a measure would recognize primates as intelligent and free beings and protect their rights.<br><br>Opponents of the initiative argue that such a measure would harm scientific progress and hurt the economy.<br><br>If the election were held today, would you vote for or against this ballot measure?'},
            'p9': {'ne428_payday_cap': 'Suppose the following measure was on the ballot in your state. The proposed measure would limit the annual interest charged for delayed deposit services (also known as payday lending) to 36% interest.<br><br>Supporters of the initiative argue that such a measure would help protect consumers from being taken advantage of through unfair interest rates.<br><br>Opponents of the initiative argue that such a measure would hurt the economy and prevent people from qualifying for loans who otherwise would qualify and could use the money.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'nvq6_renewables_mandate': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state Constitution to require electric utilities to acquire 50 percent of their electricity from renewable resources by 2030.<br><br>Supporters of the initiative argue that such a measure would help protect the environment and solve our climate crisis by transitioning to sustainable fuel sources.<br><br>Opponents of the initiative argue that such a measure would hurt the economy and risks being too inflexible by putting law into the constitution instead of passing through the state legislature.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'marijuana': 'Suppose the following measure was on the ballot in your state. The proposed measure would legalize the possession and use of marijuana for persons who are at least 21 years old, enact a tax on marijuana sales, and require the state Department of Health and Human Services to develop rules to regulate marijuana businesses.<br><br>Supporters of the initiative argue that such a measure would overturn an outdated ban and promote a drug that is safe and effective for treating a variety of problems.<br><br>Opponents of the initiative argue that such a message would put undue stress on the public health system as well as lower overall workplace productivity.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'no_slavery': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state Constitution to prevent the use of slavery and involuntary servitude as criminal punishments.<br><br>Supporters of the initiative argue that slavery is always wrong and should not be used as a form of criminal punishment.<br><br>Opponents of the initiative argue that such a measure would hurt the economy and avoid properly punishing prisoners.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'rank_choice': 'Suppose the following measure was on the ballot in your state. The proposed measure would implement a voting system known as “ranked-choice voting,” in which voters rank one or more candidates by order of preference. Ranked-choice voting would be used in primary and general elections for all statewide offices, state legislative offices, federal congressional offices, and certain other offices beginning in 2022.<br><br>Under the proposed law, votes would be counted in a series of rounds. In the first round, if one candidate received more than 50 percent of the first-place votes, that candidate would be declared the winner and no other rounds would be necessary. If no candidate received more than 50 percent of the first-place votes, then the candidate or candidates who received the fewest first-place votes would be eliminated and, in the next round, each vote for an eliminated candidate would instead be counted toward the next highest-ranked candidate on that voter’s ballot.<br><br>Depending on the number of candidates, additional rounds of counting could occur, with the last-place candidate or candidates in each round being eliminated and the votes for an eliminated candidate going to the voter’s next choice out of the remaining candidates. A tie for last place in any round would be broken by comparing the tied candidates’ support in earlier rounds. Ultimately, the candidate who was, out of the remaining candidates, the preference of a majority of voters would be declared the winner.<br><br>Supporters of the initiative argue that such a measure would be an easy way to put more power into the hands of voters to express their opinions and allow voters to vote for third-party candidates without wasting their vote.<br><br>Opponents of the initiative argue that such a measure would be too confusing and would risk introducing errors into the vote.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                   'mi_prop2_electronic_privacy': "Suppose the following measure was on the ballot in your state. The proposed measure would require a search warrant to access a person's electronic data and electronic communications.<br><br>Supporters of the initiative argue that such a measure would ensure that people’s privacy is adequately protected from police overreach.<br><br>Opponents of the initiative argue that such a measure would prevent police from being able to solve crimes in a timely manner and may let some guilty people go free.<br><br>If the election were held today, would you vote for or against this ballot measure?<br>"},
            'p11': {'family_leave': 'Suppose the following measure was on the ballot in your state. The proposed measure would establish a paid family and medical leave program to provide 12 weeks (up to 16 weeks in certain cases) of paid leave (with a maximum benefit of $1,100 per week) funded through a payroll tax to be paid for by employers and employees in a 50/50 split.<br><br>Supporters of the initiative argue that such a measure would allow employees to take critically needed paid time off to care for a newborn baby or a seriously ill family member.<br><br>Opponents of the initiative argue that such a measure puts an unfair tax burden on employees and would hurt the economy.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                    'min_wage': "Suppose the following measure was on the ballot in your state. The proposed measure would increase the state's minimum wage incrementally until reaching $15 per hour in September 2026.<br><br>Supporters of the initiative argue that such a measure would allow employees to be paid a living wage and dramatically reduce poverty throughout the state.<br><br>Opponents of the initiative argue that such a measure hurts the economy and increases unemployment by placing too much of a burden on small businesses, leading them to hire less.<br><br>If the election were held today, would you vote for or against this ballot measure?",
                    'stand_ground': 'Suppose the following measure was on the ballot in your state. The proposed measure would adding language to the state Constitution stating that “individuals have a right to stand their ground and may use deadly force in self-defense or in defense of another person”.<br><br>Supporters of the initiative argue that such a measure would allow people to better protect their homes and avoid being criminalized for self-defense.<br><br>Opponents of the initiative argue that such a measure would be irresponsible and reckless, leading to a lot of unnecessary death as people take the law into their own hands rather than call the police.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                    '22wk_abortion': "Suppose the following measure was on the ballot in your state. The proposed measure would prohibit abortions in your state after a fetus reaches 22-weeks gestational age as calculated from the first day of the woman's last menstrual period.<br><br>Supporters of the initiative argue that such a measure would protect the rights of the unborn.<br><br>Opponents of the initiative argue that the state should not play more of a role in dictating women’s health choices.<br><br>If the election were held today, would you vote for or against this ballot measure?",
                    'income_tax_for_educ': 'Suppose the following measure was on the ballot in your state. The proposed measure would enact a 3.50% income tax, in addition to the existing income tax, on income above $250,000 (single filing) or $500,000 (joint filing) and distribute the revenue from the 3.50% income tax to teacher and classroom support staff salaries, teacher mentoring and retention programs, and career and technical education programs.<br><br>Supporters of the initiative argue that such a measure would raise much needed money for our underfunded schools.<br><br>Opponents of the initiative argue that people are taxed enough already such a measure would hurt the economy.<br><br>If the election were held today, would you vote for or against this ballot measure?',
                    'cig_tax': 'Suppose the following measure was on the ballot in your state. The proposed measure would create a tax on nicotine products such as e-cigarettes, increase cigarette and tobacco taxes, and dedicate revenues to various health and education programs.<br><br>Supporters of the initiative argue that such a measure would both discourage dangerous cigarette use and provide funding to our underfunded schools.<br><br>Opponents of the initiative argue that such a measure restricts the freedom of consumers and hurts the economy by placing too much of a tax burden on our citizens.<br><br>If the election were held today, would you vote for or against this ballot measure?'}}

print('Processing measures...')
for page in ['p5', 'p6', 'p7', 'p8', 'p9', 'p11']:
    measures_ = measures[page]
    measures_ = {v: k for k, v in measures_.items()}
    survey['{}_condition'.format(page)] = survey['{}_condition'.format(page)].apply(lambda m: measures_[m] if (isinstance(m, str) and m != 'nan') else m)
    
    measures_ = {v: k for k, v in measures_.items()}
    for m in measures_.keys():
        survey['vote_measure_{}'.format(m)] = 'Not presented'
        survey.loc[survey['{}_condition'.format(page)] == m, 'vote_measure_{}'.format(m)] = survey.loc[survey['{}_condition'.format(page)] == m, '{}_vote'.format(page)]
        
    survey = survey.drop(['{}_condition'.format(page), '{}_vote'.format(page)], axis=1)

show(survey)

Processing measures...
                   prolific_id     lv_thought lv_registered lv_plan_vote  \
1     5f17dcdca4b99e18ed83100e    Quite a lot           Yes          Yes   
2     5f11d83ebe3dfb0abd270cfe    Quite a lot           Yes          Yes   
3     5f44c203ce8f190184ff6882    Quite a lot           Yes          Yes   
4     5f44c203ce8f190184ff6882    Quite a lot           Yes          Yes   
5     5f44c203ce8f190184ff6882    Quite a lot           Yes          Yes   
...                        ...            ...           ...          ...   
4726  5d4c7acd2799110019400b6a    Quite a lot            No           No   
4727  5ed014837725460d6f673cdd    Quite a lot           Yes          Yes   
4728  5f8f4708a8d10219158c8743    Quite a lot           Yes          Yes   
4729  5dc9a379d210e46baff956fb    Quite a lot           Yes          Yes   
4730                      test  Only a little            No           No   

     lv_already_voted           vote_method_plan    lv_likely vo

## California data

In [26]:
ca_survey = pd.read_csv('ca_responses.csv')
show(ca_survey)

         Respondent ID     Collector ID              Start Date  \
0                  nan              nan                     NaN   
1   12096250439.000000 395077197.000000  10/21/2020 08:42:23 AM   
2   12096233260.000000 395077197.000000  10/21/2020 08:42:15 AM   
3   12096207177.000000 395077197.000000  10/21/2020 08:42:08 AM   
4   12096146314.000000 395077197.000000  10/21/2020 08:41:38 AM   
..                 ...              ...                     ...   
948 12094157021.000000 395077197.000000  10/20/2020 05:26:07 PM   
949 12094132467.000000 395077197.000000  10/20/2020 05:16:40 PM   
950 12094128810.000000 395077197.000000  10/20/2020 05:15:02 PM   
951 12094021792.000000 395077197.000000  10/20/2020 04:35:02 PM   
952 12094012004.000000 395077197.000000  10/20/2020 04:31:24 PM   

                   End Date       IP Address  Email Address  First Name  \
0                       NaN              NaN            nan         nan   
1    10/21/2020 09:15:42 AM       72.17.88.7 

In [27]:
ca_survey.columns

Index(['Respondent ID', 'Collector ID', 'Start Date', 'End Date', 'IP Address',
       'Email Address', 'First Name', 'Last Name', 'Custom Data 1',
       'What is your Prolific ID?',
       'How much thought have you given to the upcoming election?',
       'Are you registered to vote in the 2020 election?',
       'Do you plan to vote in the 2020 election?',
       'Have you already voted in the 2020 election?',
       'How are you planning to vote?', 'How likely are you to vote?',
       'How did you vote?',
       'Suppose the following measure was on the ballot in your state. The proposed measure would  issue a $5.5 billion general obligation bond for the state's stem cell research institute and make changes to the institute's governance structure and programs.Supporters of the initiative argue that such a bond could lead to research that might produce breakthroughs in the quality of life of everyday Americans.Opponents of the initiative argue that such money is ultimately wasted 

In [28]:
ca_variable_map = {'prolific_id': 'What is your Prolific ID?',
                   'lv_thought': 'How much thought have you given to the upcoming election?',
                   'lv_registered': 'Are you registered to vote in the 2020 election?',
                   'lv_plan_vote': 'Do you plan to vote in the 2020 election?',
                   'lv_already_voted': 'Have you already voted in the 2020 election?',
                   'vote_method_plan': 'How are you planning to vote?',
                   'lv_likely': 'How likely are you to vote?',
                   'vote_method': 'How did you vote?',
                   'vote_measure_ca_prop14_stem_cell': "Suppose the following measure was on the ballot in your state. The proposed measure would issue a $5.5 billion general obligation bond for the state's stem cell research institute and make changes to the institute's governance structure and programs.Supporters of the initiative argue that such a bond could lead to research that might produce breakthroughs in the quality of life of everyday Americans.Opponents of the initiative argue that such money is ultimately wasted on ineffective treatments.If the election were held today, would you vote for or against this ballot measure?",
                   'vote_measure_ca_prop16_affirmative_action': 'Suppose the following measure was on the ballot in your state. The proposed measure would:\n\nPermit government decision-making policies to consider race, sex, color, ethnicity, or national origin to address diversity.\nDoes not alter other state and federal laws guaranteeing equal protection and prohibiting unlawful discrimination.\n\nSupporters of the initiative argue that including race in decision making is essential to ensure that all races are treated fairly.Opponents of the initiative argue that truly fair decisions are race-blind and including race would only lead to unfair discrimination.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop15_property_tax': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to require commercial and industrial properties, except those zoned as commercial agriculture, to be taxed based on their market value, rather than their purchase price.Supporters of the initiative argue that this measure would require large companies to pay their fair share of taxes.Opponents of the initiative argue such a measure would hurt the economy by increasing tax burdens on small businesses.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop22_rideshare': 'Suppose the following measure was on the ballot in your state. The proposed measure would define app-based transportation (rideshare) and delivery drivers as independent contractors and adopt labor and wage policies specific to app-based drivers and companies.Supporters of the initiative argue that this measure would protect rideshare drivers and ensure easy access to affordable ridesharing.Opponents of the initiative argue such a measure would just boost technology company profits and harm rideshare drivers by denying them access to important benefits such as paid time off.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop21_rent_control': 'Suppose the following measure was on the ballot in your state. The proposed measure would:\n\nAmend state law to allow local governments to establish rent control on residential properties over 15 years old.\nAllow local limits on annual rent increases to differ from current statewide limit.\nAllow rent increases in rent-controlled properties of up to 15 percent over three years at start of new tenancy(above any increase allowed by local ordinance).\nExempt individuals who own no more than two homes from new rent-control policies.\nProhibit rent control from violating landlords’ right to fair financial return\nSupporters of the initiative argue that such a measure would help reduce eviction and allow people to afford to live in their own homes.\n\nOpponents of the initiative argue that such a measure would hurt the economy and prevent the money needed to ensure the development of affordable housing.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_100pct_income_tax': 'Suppose the following measure was on the ballot in your state. The proposed measure would abolish the sales tax but tax all income at 100%, such that any money earned would be taxed by the state.Supporters of the initiative argue that such a measure would help alleviate important budget shortfalls and provide much needed funding to education.Opponents of the initiative argue that such a tax is ludicrous, would completely destroy the economy, and impoverish every citizen as effectively no one could earn money.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop17_felon_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to allow people on parole for felony convictions to vote.Supporters of the initiative argue that felons who have completed their sentences have already paid their debt to society and should be eligible to vote.Opponents of the initiative argue that by committing a felony one has given up one’s right to participate in the democratic process, and we should not be supporting their right to vote.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop23_dialysis': 'Suppose the following measure was on the ballot in your state. The proposed measure would:\n\nRequire at least one licensed physician on site during treatment at outpatient kidney dialysis clinics\nAuthorize the state Department of Public Health to exempt clinics from this requirement if there is a shortage of qualified licensed physicians and the clinic has at least one nurse practitioner or physician assistant on site\nRequire clinics to report dialysis-related infection data to state and federal governments.\nProhibit clinics from closing or reducing services without state approval\nProhibit clinics from refusing to treat patients based on the source of payment for care\n\nSupporters of the initiative argue that such a measure would help protect the health of people on dialysis by guaranteeing them access for a licensed physician and increasing monitoring.Opponents of the initiative argue that such a measure puts a dangerous and costly burden on dialysis clinics, potentially risking the lives of thousands of people who need dialysis by blocking them from accessing care.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop25_bail': 'Suppose the following measure was on the ballot in your state. The proposed measure would:\n\nReplace the money bail system (for obtaining release from jail before trial) with a system based on a determination of public safety and flight risk\nLimit detention of a person in jail before trial for most misdemeanors\n\nSupporters of the initiative argue that such a measure would help the thousands of innocent people each year who have to spend a year or more waiting for trial because they can’t afford bail.Opponents of the initiative argue that such a measure would involve racially-biased computer algorithms deciding who gets stuck in jail and who goes free, which is unfair.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop20_parole': 'Suppose the following measure was on the ballot in your state. The proposed measure would:\n\nLimit access to parole programs established for non-violent offenders who have completed the full term of their primary offense by eliminating eligibility for certain offenses.\nChange standards and requirements governing parole decisions under this program.\nAuthorize felony charges for specified theft crimes currently chargeable only as misdemeanors, including some theft crimes where the value is between $250 and $950.\nRequire persons convicted of specified misdemeanors to submit to collection of DNA samples for state database.\n\nSupporters of the initiative argue that such a measure would ensure that allegedly “non-violent” but serious crimes, like rape, are adequately punished and not subject to parole.Opponents of the initiative argue that such a measure would overburden our prison system and potentially limit access to parole for criminals who have served their time.If the election were held today, would you vote for or against this ballot measure?',
                   'vote_measure_ca_prop18_17yo_vote': 'Suppose the following measure was on the ballot in your state. The proposed measure would amend the state constitution to allow 17-year-olds who will be 18 at the time of the next general election to vote in primary elections and special elections.Supporters of the initiative argue that such a measure would expand access to democracy to people who will be old enough to vote at the actual time of the election.Opponents of the initiative argue that such a measure would be expanding voting access to people who are technically still children.If the election were held today, would you vote for or against this ballot measure?',
                   'vote2016': 'In the 2016 Presidential election, who did you vote for?',
                   'vote_trump_biden': 'In the 2020 Presidential election, who do you intend to vote for?',
                   'vote_trump_biden2': 'In the 2020 Presidential election, who did you vote for?',
                   'vote_senate': 'In the 2020 election, who do you plan to vote for Senate?',
                   'vote_senate2': 'In the 2020 election, who did you vote for Senate?',
                   'vote_rep': 'In the 2020 election, who do you plan to vote for Representative in your Congressional district?',
                   'vote_rep2': 'In the 2020 election, who did you votefor Representative in your Congressional district?',
                   'gss_trust': "Generally speaking, would you say that most people can be trusted or that you can't be too careful in dealing with people?",
                   'gss_bible': 'Which of these statements comes closest to describing your feelings about the Bible?',
                   'gss_spanking': 'How much do you agree or disagree with the following?', # It is sometimes necessary to discipline a child with a good, hard spanking
                   'agree_birth_control_immoral': 'Unnamed: 38', # Birth control is morally wrong
                   'agree_11hrs_sleep': 'Unnamed: 39', # I sleep more than 11 hours a night on average
                   'agree_40hrs_work': 'Unnamed: 40', # I work more than 40 hours a week on average
                   'news_wsj': 'Which of these, if any, do you read, listen, or watch for news?', # The Wall Street Journal
                   'news_daily_wire': 'Unnamed: 42', # The Daily Wire
                   'news_npr': 'Unnamed: 43', # NPR
                   'news_guardian': 'Unnamed: 44', # The Guardian
                   'news_msnbc': 'Unnamed: 45', # MSNBC
                   'news_cnn': 'Unnamed: 46', # CNN
                   'news_john_oliver': 'Unnamed: 47', # Last Week Tonight with John Oliver
                   'news_steven_colbert': 'Unnamed: 48', # The Late Show with Steven Colbert
                   'news_attention_check': 'Unnamed: 49', # The Current Show with Al Franken
                   'news_nyt': 'Unnamed: 50', # The New York Times
                   'news_atlantic': 'Unnamed: 51', # The Atlantic
                   'news_fox': 'Unnamed: 52', # Fox News
                   'news_wp': 'Unnamed: 53', # The Washington Post
                   'news_none': 'Unnamed: 54', # None of the above
                   'news_amount': 'How often do you read the news?',
                   'social_fb': 'Which of these social media networks do you use?', # Facebook
                   'social_twitter': 'Unnamed: 57', # Twitter
                   'social_instagram': 'Unnamed: 58', # Instagram
                   'social_tiktok': 'Unnamed: 59', # TikTok
                   'social_pinterest': 'Unnamed: 60', # Pinterest
                   'social_none': 'Unnamed: 61', # None of the above
                   'gender': 'What is your gender?',
                   'birth_year': 'In which year were you born? Please write your answer as 4 digits only',
                   'race': 'What is your race?',
                   'education': 'What is the highest level of education you have completed?',
                   'state': 'In what state or U.S. territorydo you live in?',
                   'income': 'What is your annual income?',
                   'urban_rural': 'Which of the following best describes the area in which you live?',
                   'honesty': 'How honestly have you answered these questions?'}

ca_survey.columns = [c.replace('\xa0', '') for c in ca_survey.columns]
ca_variable_map = {v: k for k, v in ca_variable_map.items()}
ca_survey = ca_survey[ca_variable_map.keys()].rename(ca_variable_map, axis=1)
ca_survey = ca_survey.drop(0)
show(ca_survey)  

                                           prolific_id     lv_thought  \
1                             5dd48444dd9a7447d224cc71           Some   
2                             5e56457f6370ba0184e041b4  Only a little   
3                             5cb5f1e080ba1c0001790399           Some   
4                             5cbb78d664170f00011d7c4f           Some   
5                             558fe633fdf99b3f6868bf45    Quite a lot   
..                                                 ...            ...   
948                           5a4f2e99c64cfe0001b805f6    Quite a lot   
949  https://www.youtube.com/watch?v=ZD4jnqy5JcE&ab...  Only a little   
950                           5e3c93b7e1be880896b43763    Quite a lot   
951                                               test    Quite a lot   
952                                               test    Quite a lot   

    lv_registered lv_plan_vote lv_already_voted  \
1             Yes          Yes              Yes   
2             Yes    

In [29]:
for c in survey.columns:
    if c not in ca_survey.columns:
        ca_survey[c] = ''

survey = pd.concat((survey, ca_survey))
survey = survey.reset_index(drop=True)
show(survey)

               agree_11hrs_sleep            agree_40hrs_work  \
0              Strongly disagree  Neither agree nor disagree   
1              Strongly disagree                    Disagree   
2              Strongly disagree              Strongly agree   
3                            NaN                         NaN   
4                            NaN                         NaN   
...                          ...                         ...   
5677           Strongly disagree                    Disagree   
5678                    Disagree                    Disagree   
5679  Neither agree nor disagree              Strongly agree   
5680           Strongly disagree                       Agree   
5681                         NaN                         NaN   

     agree_birth_control_immoral agree_eat_animals_fed_insects  \
0                 Strongly agree                    Don't know   
1              Strongly disagree                      Disagree   
2              Strongly disagree 

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  survey = pd.concat((survey, ca_survey))


In [30]:
for c in ['vote_trump_biden', 'vote_senate', 'vote_rep']:
    print('Processing {}...'.format(c))
    survey = survey.rename({'{}2'.format(c): c}, axis=1)
    joint = survey[c].groupby(level=0, axis=1).apply(lambda x: x.apply(sjoin, axis=1))
    survey = survey.drop(c, axis=1)
    survey[c] = joint
    survey[c] = survey[c].apply(lambda x: x.split(';')[0])

print('Processing age...')
survey['birth_year'] = survey['birth_year'].astype(float)
survey['age'] = (2020 - survey['birth_year'].fillna(2020)).astype(float)
survey = survey.drop('birth_year', axis=1)
survey['age'] = survey['age'].apply(transform_age)
survey = survey[survey['age'] != 'Under 18']

show(survey)

Processing vote_trump_biden...
Processing vote_senate...
Processing vote_rep...
Processing age...
               agree_11hrs_sleep            agree_40hrs_work  \
0              Strongly disagree  Neither agree nor disagree   
1              Strongly disagree                    Disagree   
2              Strongly disagree              Strongly agree   
5                          Agree  Neither agree nor disagree   
6                       Disagree              Strongly agree   
...                          ...                         ...   
5676                    Disagree                    Disagree   
5677           Strongly disagree                    Disagree   
5678                    Disagree                    Disagree   
5679  Neither agree nor disagree              Strongly agree   
5680           Strongly disagree                       Agree   

     agree_birth_control_immoral agree_eat_animals_fed_insects  \
0                 Strongly agree                    Don't know   
1

## Data Quality

In [31]:
survey['valid_id'] = survey['prolific_id'].apply(is_valid_prolific_id)
survey['valid_id'].value_counts()

True     5463
False      37
Name: valid_id, dtype: int64

In [32]:
survey['prolific_id'].duplicated().sum()

396

In [33]:
survey['honesty'].value_counts()

Completely honestly    4495
Very honestly           902
Somewhat honestly       100
Not honestly at all       3
Name: honesty, dtype: int64

In [34]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age',
                'vote2016', 'gss_trust', 'gss_bible', 'gss_spanking']
other_low_incidence_checks = ['agree_birth_control_immoral', 'agree_11hrs_sleep']
low_incidence_counts = detect_low_incidence(survey[demographics + other_low_incidence_checks],
                                            low_incidence_threshold=0.04)
survey = survey.reset_index(drop=True)
survey['meta_low_incidence_count'] = low_incidence_counts.reset_index(drop=True)
low_incidence_counts.value_counts()

4159.452747    4191
329.752533      331
281.818872      254
168.392525      147
118.219300      137
121.421151      122
80.953205        80
11.409239        37
46.649498        35
44.330808        34
22.341998        19
9.626010         14
3.003582         12
9.372174          9
2.363155          8
13.349800         7
1.888572          7
5.484890          5
6.417797          5
8.226749          4
3.698273          4
0.127958          3
3.160682          3
0.862787          3
3.514452          3
0.904500          3
0.149722          3
3.451013          2
0.187346          1
4.915650          1
0.033686          1
0.121598          1
0.061515          1
0.250572          1
1.361774          1
0.652199          1
1.325865          1
1.794702          1
0.333054          1
0.068400          1
1.294088          1
0.001364          1
4.786025          1
8.009811          1
0.379426          1
0.434830          1
dtype: int64

In [35]:
(low_incidence_counts < 0.5).value_counts()

False    5484
True       16
dtype: int64

## More processing

In [36]:
print('Processing income...')
survey['income'] = survey['income'].astype(str).apply(transform_income)

print('Processing education...')
survey['education'] = survey['education'].astype(str).apply(transform_education)

for c in survey.columns:
    if c.startswith('news_') or c.startswith('social_') or c.startswith('ffq_'):
        print('Processing {}...'.format(c))
        survey[c] = survey[c].apply(lambda x: isinstance(x, str))

for c in survey.columns:
    if c.startswith('news_') or c.startswith('social_') or c.startswith('ffq_'):
        survey[c] = survey[c].astype(bool)
    elif not c.startswith('meta_'):
        survey[c] = survey[c].astype(str)
        
survey['valid_id'] = survey['valid_id'].astype(bool)

print('Processing diet...')
survey['pescetarian'] = (~survey['ffq_beef'] & ~survey['ffq_chicken'] & ~survey['ffq_other_meat'] & ~survey['ffq_pork'] & ~survey['ffq_turkey']).astype(bool)
survey['vegetarian'] = (~survey['ffq_beef'] & ~survey['ffq_chicken'] & ~survey['ffq_fish'] & ~survey['ffq_other_meat'] & ~survey['ffq_pork'] & ~survey['ffq_seafood'] & ~survey['ffq_turkey']).astype(bool)
survey['vegan'] = (~survey['ffq_beef'] & ~survey['ffq_chicken'] & ~survey['ffq_fish'] & ~survey['ffq_other_meat'] & ~survey['ffq_pork'] & ~survey['ffq_seafood'] & ~survey['ffq_eggs'] & ~survey['ffq_dairy'] & ~survey['ffq_turkey']).astype(bool)

print('Processing race...')
survey['race'] = survey['race'].apply(transform_race)

print('Processing gender...')
survey['gender'] = survey['gender'].fillna('Other')

print('Processing region...')
survey['region'] = survey['state'].apply(transform_region)

print('Processing 2016 vote...')
survey['vote2016'] = survey['vote2016'].apply(transform_2016_vote)

print('Processing GSS trust...')
survey['gss_trust'] = survey['gss_trust'].apply(transform_gss_trust)

print('Processing GSS Bible...')
survey['gss_bible'] = survey['gss_bible'].apply(transform_gss_bible)

print('Processing GSS Spanking...')
survey['gss_spanking'] = survey['gss_spanking'].apply(simplify_likert)

print('Processing voted...')
survey['voted2016'] = (survey['vote2016'] != 'Did not vote')

print('Processing noncollege white...')
survey['race_white'] = (survey['race'] == 'White or Caucasian')
survey['college'] = (~survey['education'].isin(['Some college, no degreee',
                                                'Graduated from high school',
                                                'Less than high school']))
survey['noncollege_white'] = (survey['race_white'] & ~survey['college'])

for c in survey.columns:
    if 'agree_' in c and 'meta' not in c:
        print('Processing {}...'.format(c))
        survey[c] = survey[c].apply(simplify_likert)

show(survey[sorted(list(survey.columns))])

Processing income...
Processing education...
Processing ffq_beef...
Processing ffq_chicken...
Processing ffq_dairy...
Processing ffq_eggs...
Processing ffq_fish...
Processing ffq_none...
Processing ffq_other_meat...
Processing ffq_pork...
Processing ffq_seafood...
Processing ffq_turkey...
Processing news_amount...
Processing news_atlantic...
Processing news_attention_check...
Processing news_cnn...
Processing news_daily_wire...
Processing news_fox...
Processing news_guardian...
Processing news_john_oliver...
Processing news_msnbc...
Processing news_none...
Processing news_npr...
Processing news_nyt...
Processing news_steven_colbert...
Processing news_wp...
Processing news_wsj...
Processing social_fb...
Processing social_instagram...
Processing social_none...
Processing social_pinterest...
Processing social_tiktok...
Processing social_twitter...
Processing diet...
Processing race...
Processing gender...
Processing region...
Processing 2016 vote...
Processing GSS trust...
Processing GSS 

## More data quality

In [37]:
straightlining_vote = detect_straightlining(survey[[c for c in survey.columns if 'vote_measure' in c]])
survey['meta_straightlining_vote_measure'] = straightlining_vote
survey['meta_straightlining_vote_measure'].value_counts()

0.794118    4595
0.676471     904
0.823529       1
Name: meta_straightlining_vote_measure, dtype: int64

In [38]:
survey['news_attention_check'].value_counts()

False    5426
True       74
Name: news_attention_check, dtype: int64

In [39]:
print('Initial survey... N={}'.format(len(survey)))

survey = survey.reset_index(drop=True)
survey = survey[survey['valid_id']]
survey = survey.drop('valid_id', axis=1)
print('Dropping invalid ID... N={}'.format(len(survey)))

survey = survey.drop_duplicates('prolific_id', keep='last')
print('Dropping duplicate ID... N={}'.format(len(survey)))

survey = survey[~survey['honesty'].isin(['Somewhat honestly', 'Not honestly at all'])]
survey = survey.drop('honesty', axis=1)
print('Dropping dishonest... N={}'.format(len(survey)))

survey = survey[~survey['news_attention_check']]
survey = survey.drop('news_attention_check', axis=1)
print('Dropping failed news attention check... N={}'.format(len(survey)))

survey = survey[survey['meta_low_incidence_count'] > 0.5]
survey = survey.drop('meta_low_incidence_count', axis=1)
print('Dropping multiple low incidence... N={}'.format(len(survey)))

Initial survey... N=5500
Dropping invalid ID... N=5500
Dropping duplicate ID... N=5104
Dropping dishonest... N=5011
Dropping failed news attention check... N=4944
Dropping multiple low incidence... N=4933


In [40]:
survey['state'].value_counts()

California                   1022
New York                      388
Texas                         350
Florida                       340
Pennsylvania                  195
Illinois                      187
North Carolina                168
Ohio                          160
New Jersey                    135
Georgia                       131
Michigan                      125
Washington                    121
Massachusetts                 114
Virginia                      106
Arizona                        88
Maryland                       88
Indiana                        86
Oregon                         84
Missouri                       79
Wisconsin                      75
Minnesota                      63
Kentucky                       57
South Carolina                 57
Tennessee                      55
Louisiana                      54
Colorado                       53
Alabama                        51
Nevada                         44
Oklahoma                       42
Connecticut   

In [41]:
survey = survey.drop('prolific_id', axis=1)

In [42]:
survey.to_csv('responses_processed.csv', index=False)