## Libraries

In [2]:
import pandas as pd
import numpy as np
import scipy.stats as stat

from math import sqrt
from mlgear.utils import show, display_columns
from surveyweights import normalize_weights, run_weighting_iteration


def margin_of_error(n=None, sd=None, p=None, type='proportion', interval_size=0.95):
    z_lookup = {0.8: 1.28, 0.85: 1.44, 0.9: 1.65, 0.95: 1.96, 0.99: 2.58}
    if interval_size not in z_lookup.keys():
        raise ValueError('{} not a valid `interval_size` - must be {}'.format(interval_size,
                                                                              ', '.join(list(z_lookup.keys()))))
    if type == 'proportion':
        se = sqrt(p * (1 - p)) / sqrt(n)
    elif type == 'continuous':
        se = sd / sqrt(n)
    else:
        raise ValueError('{} not a valid `type` - must be proportion or continuous')
    
    z = z_lookup[interval_size]
    return se * z


def print_pct(pct, digits=0):
    pct = pct * 100
    pct = np.round(pct, digits)
    if pct >= 100:
        if digits == 0:
            val = '>99.0%'
        else:
            val = '>99.'
            for d in range(digits - 1):
                val += '9'
            val += '9%'
    elif pct <= 0:
        if digits == 0:
            val = '<0.1%'
        else:
            val = '<0.'
            for d in range(digits - 1):
                val += '0'
            val += '1%'
    else:
        val = '{}%'.format(pct)
    return val


def calc_result(for_vote, against_vote, n, state, interval=0.8):
    GENERAL_POLLING_ERROR = 4.0
    ACQUIESENCE_BIAS = -5.0
    N_SIMS = 1000000
    
    margin_needed_for_victory = 0.5
    if state == 'Florida':
        margin_needed_for_victory = 0.6
    elif state == 'Colorado':
        margin_needed_for_victory = 0.55
    
    for_moe = margin_of_error(n=n, p=for_vote/100, interval_size=interval)
    against_moe = margin_of_error(n=n, p=against_vote/100, interval_size=interval)
    undecided = 100 - for_vote - against_vote
    mean = for_vote + undecided * 0.25 + ACQUIESENCE_BIAS
    raw_moe = for_moe * 100 + against_moe * 100
    
    allocate_undecided = undecided * 0.4
    margin = raw_moe + allocate_undecided + GENERAL_POLLING_ERROR
    
    cdf_value = 0.5 + 0.5 * interval
    normed_sigma = stat.norm.ppf(cdf_value)
    sigma = margin / 100 / normed_sigma
    
    sims = np.random.normal(mean / 100, sigma, N_SIMS)
    chance_pass = np.sum([s > margin_needed_for_victory for s in sims]) / N_SIMS
    low, high = np.percentile(sims, [20, 80]) * 100
    
    return {'mean': mean, 'high': high, 'low': low, 'n': n,
            'raw_moe': raw_moe, 'margin': margin, 'sigma': sigma, 'chance_pass': chance_pass}


def print_result(mean, high, low, n, raw_moe, margin, sigma, chance_pass):
    mean = np.round(mean, 1)
    first = np.round(high, 1)
    second = np.round(low, 1)
    sigma = np.round(sigma * 100, 1)
    raw_moe = np.round(raw_moe, 1)
    margin = np.round(margin, 1)
    chance_pass = print_pct(chance_pass, 1)
    if second < first:
        _ = first
        first = second
        second = _
    if second > 100:
        second = 100
    if first < 0:
        first = 0
    print(('Result {} (80% CI: {} to {}) (N={}) (raw_moe={}pts, margin={}pts, '
           'sigma={}pts) ({} likely to pass)').format(mean,
                                                      first,
                                                      second,
                                                      n,
                                                      raw_moe,
                                                      margin,
                                                      sigma,
                                                      chance_pass))
    print(('{} (80% CI: {} to {}) ({})').format(mean,
                                                first,
                                                second,
                                                chance_pass))
    print('-')

## Load Processed Data

In [3]:
survey = pd.read_csv('responses_processed_national_weighted.csv').fillna('Not presented')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


## Other Propositions

In [4]:
POTUS_CENSUS = {'Alabama': {'Hillary Clinton': 0.3436, 'Donald Trump': 0.6208},
                'Alaska': {'Hillary Clinton': 0.3655, 'Donald Trump': 0.5128},
                'Arizona': {'Hillary Clinton': 0.4513, 'Donald Trump': 0.4867},
                'Arkansas': {'Hillary Clinton': 0.3365, 'Donald Trump': 0.6057},
                'California': {'Hillary Clinton': 0.6173, 'Donald Trump': 0.3162},
                'Colorado': {'Hillary Clinton': 0.4816, 'Donald Trump': 0.4325},
                'Connecticut': {'Hillary Clinton': 0.5457, 'Donald Trump': 0.4093},
                'Delaware': {'Hillary Clinton': 0.531, 'Donald Trump': 0.417},
                'Washington DC': {'Hillary Clinton': 0.905, 'Donald Trump': 0.016},
                'Florida': {'Hillary Clinton': 0.478, 'Donald Trump': 0.490},
                'Georgia': {'Hillary Clinton': 0.456, 'Donald Trump': 0.508},
                'Hawaii': {'Hillary Clinton': 0.622, 'Donald Trump': 0.300},
                'Idaho': {'Hillary Clinton': 0.275, 'Donald Trump': 0.593},
                'Illinois': {'Hillary Clinton': 0.558, 'Donald Trump': 0.379},
                'Indiana': {'Hillary Clinton': 0.379, 'Donald Trump': 0.511},
                'Iowa': {'Hillary Clinton': 0.417, 'Donald Trump': 0.512},
                'Kansas': {'Hillary Clinton': 0.361, 'Donald Trump': 0.567},
                'Kentucky': {'Hillary Clinton': 0.327, 'Donald Trump': 0.625},
                'Louisiana': {'Hillary Clinton': 0.385, 'Donald Trump': 0.581},
                'Maine': {'Hillary Clinton': 0.478, 'Donald Trump': 0.449},
                'Maryland': {'Hillary Clinton': 0.603, 'Donald Trump': 0.339},
                'Massachusetts': {'Hillary Clinton': 0.600, 'Donald Trump': 0.328},
                'Michigan': {'Hillary Clinton': 0.473, 'Donald Trump': 0.475},
                'Minnesota': {'Hillary Clinton': 0.464, 'Donald Trump': 0.449},
                'Mississippi': {'Hillary Clinton': 0.401, 'Donald Trump': 0.579},
                'Missouri': {'Hillary Clinton': 0.401, 'Donald Trump': 0.579},
                'Montana': {'Hillary Clinton': 0.381, 'Donald Trump': 0.562},
                'Nebraska': {'Hillary Clinton': 0.337, 'Donald Trump': 0.588},
                'Nevada': {'Hillary Clinton': 0.479, 'Donald Trump': 0.455},
                'New Hampshire': {'Hillary Clinton': 0.470, 'Donald Trump': 0.466},
                'New Jersey': {'Hillary Clinton': 0.555, 'Donald Trump': 0.414},
                'New Mexico': {'Hillary Clinton': 0.483, 'Donald Trump': 0.404},
                'New York': {'Hillary Clinton': 0.590, 'Donald Trump': 0.365},
                'North Carolina': {'Hillary Clinton': 0.462, 'Donald Trump': 0.498},
                'North Dakota': {'Hillary Clinton': 0.272, 'Donald Trump': 0.630},
                'Ohio': {'Hillary Clinton': 0.436, 'Donald Trump': 0.517},
                'Oklahoma': {'Hillary Clinton': 0.289, 'Donald Trump': 0.653},
                'Oregon': {'Hillary Clinton': 0.501, 'Donald Trump': 0.391},
                'Pennsylvania': {'Hillary Clinton': 0.475, 'Donald Trump': 0.481},
                'Rhode Island': {'Hillary Clinton': 0.544, 'Donald Trump': 0.389},
                'South Carolina': {'Hillary Clinton': 0.407, 'Donald Trump': 0.549},
                'South Dakota': {'Hillary Clinton': 0.317, 'Donald Trump': 0.615},
                'Tennessee': {'Hillary Clinton': 0.347, 'Donald Trump': 0.607},
                'Texas': {'Hillary Clinton': 0.432, 'Donald Trump': 0.522},
                'Utah': {'Hillary Clinton': 0.275, 'Donald Trump': 0.454},
                'Vermont': {'Hillary Clinton': 0.567, 'Donald Trump': 0.303},
                'Virginia': {'Hillary Clinton': 0.497, 'Donald Trump': 0.444},
                'Washington': {'Hillary Clinton': 0.525, 'Donald Trump': 0.368},
                'West Virginia': {'Hillary Clinton': 0.264, 'Donald Trump': 0.685},
                'Wisconsin': {'Hillary Clinton': 0.465, 'Donald Trump': 0.472},
                'Wyoming': {'Hillary Clinton': 0.216, 'Donald Trump': 0.674 }}

MEASURES_STATES = {'Oregon Psilocybin Program Initiative': {'var': 'vote_measure_psilocybin', 'state': 'Oregon'},
                   'DC Initiative 81 (Psilocybin)':  {'var': 'vote_measure_psilocybin', 'state': 'Washington DC'},
                   'Colorado Gray Wolf Reintroduction Initiative': {'var': 'vote_measure_co_gray_wolf', 'state': 'Colorado'},
                   'Montana Firearms': {'var': 'vote_measure_mt_firearms', 'state': 'Montana'},
                   'Alabama Amendment 1 (Citizen Voting)': {'var': 'vote_measure_al_citizen_voting', 'state': 'Alabama'},
                   'Utah Right to Hunt and Fish Amendment': {'var': 'vote_measure_utah_hunt', 'state': 'Utah'},
                   'Louisiana Amendment 1 (No Right to Abortion)': {'var': 'vote_measure_la_a1_abortion', 'state': 'Louisiana'},
                   'Michigan Proposal 2 (Search Warrant for Electronic Data Amendment)': {'var': 'vote_measure_mi_prop2_electronic_privacy', 'state': 'Michigan'},
                   'Nebraska Initiative 428 (Payday Lender Interest Rate Cap Initiative)': {'var': 'vote_measure_ne428_payday_cap', 'state': 'Nebraska'},
                   'Nevada Question 6 (Renewable Energy Standards Initiative)': {'var': 'vote_measure_nvq6_renewables_mandate', 'state': 'Nevada'},
                   'Utah Constitutional Amendment C (Remove Slavery as Punishment for a Crime from Constitution)': {'var': 'vote_measure_no_slavery', 'state': 'Utah'},
                   'Massachusetts Question 2 (Ranked Choice Voting)': {'var': 'vote_measure_rank_choice', 'state': 'Massachusetts'},
                   'Arizona Proposition 207 (Legalize Marijiuna)': {'var': 'vote_measure_marijuana', 'state': 'Arizona'},
                   'Alabama Amendment 5 (Stand Your Ground)': {'var': 'vote_measure_stand_ground', 'state': 'Alabama'},
                   'Arizona Proposition 208 (Income Tax for Education)': {'var': 'vote_measure_income_tax_for_educ', 'state': 'Arizona'},
                   'Colorado Proposition 115 (22-Week Abortion Ban Initiative)': {'var': 'vote_measure_22wk_abortion', 'state': 'Colorado'},
                   'Colorado Proposition 118 (Paid Medical and Family Leave Initiative)': {'var': 'vote_measure_family_leave', 'state': 'Colorado'},
                   'Colorado Proposition EE (Tobacco and E-Cigarette Tax Increase for Health and Education Programs Measure)': {'var': 'vote_measure_cig_tax', 'state': 'Colorado'},
                   'Florida Amendment 2 ($15 Minimum Wage Initiative)': {'var': 'vote_measure_min_wage', 'state': 'Florida'}}

options = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']


for measure_name, measure_data in MEASURES_STATES.items():
    print('## {} ##'.format(measure_name))
    state = measure_data['state']
    measure = measure_data['var']
    
    state_survey = survey.copy()
    potus_census = {'vote2016': POTUS_CENSUS[state].copy()}
    potus_census['vote2016']['Other'] = 1 - potus_census['vote2016']['Hillary Clinton'] - potus_census['vote2016']['Donald Trump']
    output = run_weighting_iteration(state_survey, census=potus_census, weigh_on=['vote2016'], verbose=0)
    potus_weights = output['weights']['vote2016']
    potus_weights = state_survey['vote2016'].astype(str).replace(potus_weights)
    state_survey['weight'] = normalize_weights(state_survey['weight'] * potus_weights)
    state_survey['lv_weight'] = normalize_weights(state_survey['weight'] * state_survey['lv_index'])

    state_survey_ = state_survey.loc[state_survey[measure].isin(options)].copy()
    state_survey_['weight'] = normalize_weights(state_survey_['weight'])
    state_survey_['rv_weight'] = normalize_weights(state_survey_['rv_weight'])
    state_survey_['lv_weight'] = normalize_weights(state_survey_['lv_weight'])

    lv_weighted_n = int(np.round(state_survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = state_survey_[measure].value_counts(normalize=True) * state_survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print('Raw N: {}'.format(len(state_survey_)))
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               state=state,
                               n=lv_weighted_n))
    print('-')

## Oregon Psilocybin Program Initiative ##
Vote for / Support        48.468325
Vote against / Oppose     29.947858
Don’t know / Undecided    21.583817
dtype: float64
Raw N: 797
Result 48.9 (80% CI: 36.3 to 61.5) (N=351) (raw_moe=6.5pts, margin=19.2pts, sigma=15.0pts) (47.1% likely to pass)
48.9 (80% CI: 36.3 to 61.5) (47.1%)
-
-
## DC Initiative 81 (Psilocybin) ##
Vote for / Support        49.524886
Vote against / Oppose     25.903103
Don’t know / Undecided    24.572011
dtype: float64
Raw N: 797
Result 50.7 (80% CI: 37.1 to 64.3) (N=305) (raw_moe=6.9pts, margin=20.7pts, sigma=16.2pts) (51.6% likely to pass)
50.7 (80% CI: 37.1 to 64.3) (51.6%)
-
-
## Colorado Gray Wolf Reintroduction Initiative ##
Vote for / Support        59.911646
Vote against / Oppose     26.520703
Don’t know / Undecided    13.567651
dtype: float64
Raw N: 695
Result 58.3 (80% CI: 47.6 to 69.0) (N=293) (raw_moe=7.0pts, margin=16.4pts, sigma=12.8pts) (60.2% likely to pass)
58.3 (80% CI: 47.6 to 69.0) (60.2%)
-
-
## Mon

## Other Propositions (Alt Weights, Post-Hoc)

In [5]:
for measure_name, measure_data in MEASURES_STATES.items():
    print('## {} ##'.format(measure_name))
    state = measure_data['state']
    measure = measure_data['var']
    
    state_survey = survey.copy()
    potus_census = {'vote2016': POTUS_CENSUS[state].copy()}
    potus_census['vote2016']['Other'] = 1 - potus_census['vote2016']['Hillary Clinton'] - potus_census['vote2016']['Donald Trump']
    output = run_weighting_iteration(state_survey, census=potus_census, weigh_on=['vote2016'], verbose=0)
    potus_weights = output['weights']['vote2016']
    potus_weights = state_survey['vote2016'].astype(str).replace(potus_weights)
    state_survey['weight'] = normalize_weights(state_survey['weight'] * potus_weights)
    state_survey['lv_weight'] = normalize_weights(state_survey['weight'] * state_survey['lv_index'])
    state_survey['lv_weight_alt'] = state_survey['lv_weight']
    state_survey.loc[(~state_survey['voted2016']) & (state_survey['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
    state_survey['lv_weight_alt'] = normalize_weights(state_survey['lv_weight_alt'])
    state_survey_ = state_survey.loc[state_survey[measure].isin(options)].copy()
    state_survey_['lv_weight_alt'] = normalize_weights(state_survey_['lv_weight_alt'])
    lv_weighted_n = int(np.round(state_survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = state_survey_[measure].value_counts(normalize=True) * state_survey_.groupby(measure)['lv_weight_alt'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print('Raw N: {}'.format(len(state_survey_)))
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               state=state,
                               n=lv_weighted_n))
    print('-')

## Oregon Psilocybin Program Initiative ##
Vote for / Support        47.461090
Vote against / Oppose     29.924201
Don’t know / Undecided    22.614709
dtype: float64
Raw N: 797
Result 48.1 (80% CI: 35.2 to 61.0) (N=348) (raw_moe=6.6pts, margin=19.6pts, sigma=15.3pts) (45.1% likely to pass)
48.1 (80% CI: 35.2 to 61.0) (45.1%)
-
-
## DC Initiative 81 (Psilocybin) ##
Vote for / Support        48.549618
Vote against / Oppose     25.982431
Don’t know / Undecided    25.467951
dtype: float64
Raw N: 797
Result 49.9 (80% CI: 36.0 to 63.7) (N=304) (raw_moe=6.9pts, margin=21.1pts, sigma=16.4pts) (49.8% likely to pass)
49.9 (80% CI: 36.0 to 63.7) (49.8%)
-
-
## Colorado Gray Wolf Reintroduction Initiative ##
Vote for / Support        59.526001
Vote against / Oppose     26.998555
Don’t know / Undecided    13.475444
dtype: float64
Raw N: 695
Result 57.9 (80% CI: 47.1 to 68.7) (N=294) (raw_moe=7.0pts, margin=16.4pts, sigma=12.8pts) (59.0% likely to pass)
57.9 (80% CI: 47.1 to 68.7) (59.0%)
-
-
## Mon