## Libraries

In [23]:
import pandas as pd
import numpy as np
import scipy.stats as stat

from math import sqrt
from mlgear.utils import show, display_columns
from surveyweights import normalize_weights


def margin_of_error(n=None, sd=None, p=None, type='proportion', interval_size=0.95):
    z_lookup = {0.8: 1.28, 0.85: 1.44, 0.9: 1.65, 0.95: 1.96, 0.99: 2.58}
    if interval_size not in z_lookup.keys():
        raise ValueError('{} not a valid `interval_size` - must be {}'.format(interval_size,
                                                                              ', '.join(list(z_lookup.keys()))))
    if type == 'proportion':
        se = sqrt(p * (1 - p)) / sqrt(n)
    elif type == 'continuous':
        se = sd / sqrt(n)
    else:
        raise ValueError('{} not a valid `type` - must be proportion or continuous')
    
    z = z_lookup[interval_size]
    return se * z


def print_pct(pct, digits=0):
    pct = pct * 100
    pct = np.round(pct, digits)
    if pct >= 100:
        if digits == 0:
            val = '>99.0%'
        else:
            val = '>99.'
            for d in range(digits - 1):
                val += '9'
            val += '9%'
    elif pct <= 0:
        if digits == 0:
            val = '<0.1%'
        else:
            val = '<0.'
            for d in range(digits - 1):
                val += '0'
            val += '1%'
    else:
        val = '{}%'.format(pct)
    return val


def calc_result(for_vote, against_vote, n, interval=0.8):
    GENERAL_POLLING_ERROR = 8.0
    TIME_SHIFT_ERROR = 4.0
    N_SIMS = 10000000
    
    for_moe = margin_of_error(n=n, p=for_vote/100, interval_size=interval)
    against_moe = margin_of_error(n=n, p=against_vote/100, interval_size=interval)
    undecided = 100 - for_vote - against_vote
    mean = for_vote + undecided * 0.25
    raw_moe = for_moe * 100 + against_moe * 100
    
    allocate_undecided = undecided * 0.4
    margin = raw_moe + allocate_undecided + GENERAL_POLLING_ERROR + TIME_SHIFT_ERROR
    
    cdf_value = 0.5 + 0.5 * interval
    normed_sigma = stat.norm.ppf(cdf_value)
    sigma = margin / 100 / normed_sigma
    
    sims = np.random.normal(mean / 100, sigma, N_SIMS)
    chance_pass = np.sum([s > 0.5 for s in sims]) / N_SIMS
    low, high = np.percentile(sims, [20, 80]) * 100
    
    return {'mean': mean, 'high': high, 'low': low, 'n': n,
            'raw_moe': raw_moe, 'margin': margin, 'sigma': sigma, 'chance_pass': chance_pass}


def print_result(mean, high, low, n, raw_moe, margin, sigma, chance_pass):
    mean = np.round(mean, 1)
    first = np.round(high, 1)
    second = np.round(low, 1)
    sigma = np.round(sigma * 100, 1)
    raw_moe = np.round(raw_moe, 1)
    margin = np.round(margin, 1)
    chance_pass = print_pct(chance_pass, 1)
    if second < first:
        _ = first
        first = second
        second = _
    if second > 100:
        second = 100
    if first < 0:
        first = 0
    print(('Result {} (80% CI: {} to {}) (N={}) (raw_moe={}pts, margin={}pts, '
           'sigma={}pts) ({} likely to pass)').format(mean,
                                                      first,
                                                      second,
                                                      n,
                                                      raw_moe,
                                                      margin,
                                                      sigma,
                                                      chance_pass))
    print(('{} (80% CI: {} to {}) ({})').format(mean,
                                                first,
                                                second,
                                                chance_pass))
    print('-')

## Load Processed Data

In [24]:
survey = pd.read_csv('repsonses_processed_national_weighted.csv').fillna('Not presented')
ca_national_survey = pd.read_csv('repsonses_processed_ca_weighted.csv').fillna('Not presented')

## California Propositions

In [25]:
options = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
ca_measures = [c for c in survey.columns if 'vote_measure_ca_' in c and 'meta' not in c]

for measure in ca_measures:
    survey_ = survey.loc[survey[measure].isin(options)].copy()
    survey_['weight'] = normalize_weights(survey_['weight'])
    survey_['rv_weight'] = normalize_weights(survey_['rv_weight'])
    survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
    
    print('## {} NATIONAL UNWEIGHTED ##'.format(measure))
    n = len(survey_)
    votes = survey_[measure].value_counts(normalize=True) * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=n))
    
    print('## {} NATIONAL WEIGHTED ##'.format(measure))
    weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['weight'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=weighted_n))

    print('## {} NATIONAL WEIGHTED + LV ##'.format(measure))
    lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = survey_[measure].value_counts(normalize=True) * survey.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=lv_weighted_n))
    
    if measure != 'vote_measure_100pct_income_tax':
        survey_ = survey.loc[survey[measure].isin(options)].copy()
        survey_.loc[survey_['vote_measure_100pct_income_tax'] == 'Vote for / Support', measure] = 'Don’t know / Undecided'
        survey_['weight'] = normalize_weights(survey_['weight'])
        survey_['rv_weight'] = normalize_weights(survey_['rv_weight'])
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])

        print('## {} NATIONAL CORRECTED + WEIGHTED + LV ##'.format(measure))
        lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
        votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
        votes = votes[options] * (100 / votes[options].sum())
        print(votes)
        print_result(**calc_result(for_vote=votes['Vote for / Support'],
                                   against_vote=votes['Vote against / Oppose'],
                                   n=lv_weighted_n))
        
    survey_ = survey.loc[ca_national_survey[measure].isin(options)].copy()
    survey_['weight'] = normalize_weights(survey_['weight'])
    survey_['rv_weight'] = normalize_weights(survey_['rv_weight'])
    survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
    
    print('## {} NATIONAL CA-WEIGHTED ##'.format(measure))
    weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['weight'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=weighted_n))

    print('## {} NATIONAL CA-WEIGHTED + LV ##'.format(measure))
    lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = survey_[measure].value_counts(normalize=True) * survey.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=lv_weighted_n))
    
    if measure != 'vote_measure_100pct_income_tax':
        survey_ = ca_national_survey.loc[survey[measure].isin(options)].copy()
        survey_.loc[survey_['vote_measure_100pct_income_tax'] == 'Vote for / Support', measure] = 'Don’t know / Undecided'
        survey_['weight'] = normalize_weights(survey_['weight'])
        survey_['rv_weight'] = normalize_weights(survey_['rv_weight'])
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])

        print('## {} NATIONAL CORRECTED + CA-WEIGHTED + LV ##'.format(measure))
        lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
        votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
        votes = votes[options] * (100 / votes[options].sum())
        print(votes)
        print_result(**calc_result(for_vote=votes['Vote for / Support'],
                                   against_vote=votes['Vote against / Oppose'],
                                   n=lv_weighted_n))

    survey_ = ca_national_survey.loc[survey[measure].isin(options)].copy()
    survey_ = survey_.loc[survey['state'] == 'California']
    survey_['weight'] = normalize_weights(survey_['weight'])
    survey_['rv_weight'] = normalize_weights(survey_['rv_weight'])
    survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
    
    print('## {} CALI UNWEIGHTED ##'.format(measure))
    n = len(survey_)
    votes = survey_[measure].value_counts(normalize=True) * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=n))
    
    print('## {} CALI CA-WEIGHTED ##'.format(measure))
    weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['weight'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=weighted_n))

    print('## {} CALI CA-WEIGHTED + LV ##'.format(measure))
    lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
    votes = survey_[measure].value_counts(normalize=True) * survey.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options] * (100 / votes[options].sum())
    print(votes)
    print_result(**calc_result(for_vote=votes['Vote for / Support'],
                               against_vote=votes['Vote against / Oppose'],
                               n=lv_weighted_n))
    
    if measure != 'vote_measure_100pct_income_tax':
        survey_ = ca_national_survey.loc[survey[measure].isin(options)].copy()
        survey_ = survey_.loc[survey['state'] == 'California']
        survey_.loc[survey_['vote_measure_100pct_income_tax'] == 'Vote for / Support', measure] = 'Don’t know / Undecided'
        survey_['weight'] = normalize_weights(survey_['weight'])
        survey_['rv_weight'] = normalize_weights(survey_['rv_weight'])
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        
        print('## {} CALI CORRECTED + CA-WEIGHTED + LV ##'.format(measure))
        lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
        votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
        votes = votes[options] * (100 / votes[options].sum())
        print(votes)
        print_result(**calc_result(for_vote=votes['Vote for / Support'],
                                   against_vote=votes['Vote against / Oppose'],
                                   n=lv_weighted_n))

    print('-')

## vote_measure_ca_prop16_affirmative_action NATIONAL UNWEIGHTED ##
Vote for / Support        42.950237
Vote against / Oppose     38.447867
Don’t know / Undecided    18.601896
Name: vote_measure_ca_prop16_affirmative_action, dtype: float64
Result 47.6 (80% CI: 32.8 to 62.4) (N=1688) (raw_moe=3.1pts, margin=22.5pts, sigma=17.6pts) (44.6% likely to pass)
47.6 (80% CI: 32.8 to 62.4) (44.6%)
-
## vote_measure_ca_prop16_affirmative_action NATIONAL WEIGHTED ##
Vote for / Support        36.724107
Vote against / Oppose     45.900819
Don’t know / Undecided    17.375074
dtype: float64
Result 41.1 (80% CI: 25.5 to 56.6) (N=691) (raw_moe=4.8pts, margin=23.7pts, sigma=18.5pts) (31.5% likely to pass)
41.1 (80% CI: 25.5 to 56.6) (31.5%)
-
## vote_measure_ca_prop16_affirmative_action NATIONAL WEIGHTED + LV ##
Vote for / Support        38.935427
Vote against / Oppose     48.836327
Don’t know / Undecided    12.228246
dtype: float64
Result 42.0 (80% CI: 27.7 to 56.3) (N=667) (raw_moe=4.9pts, margin=21.8p

Result 56.7 (80% CI: 40.6 to 72.8) (N=705) (raw_moe=4.6pts, margin=24.5pts, sigma=19.1pts) (63.7% likely to pass)
56.7 (80% CI: 40.6 to 72.8) (63.7%)
-
## vote_measure_ca_prop14_stem_cell NATIONAL WEIGHTED + LV ##
Vote for / Support        54.137224
Vote against / Oppose     27.831287
Don’t know / Undecided    18.031490
dtype: float64
Result 58.6 (80% CI: 43.0 to 74.3) (N=686) (raw_moe=4.6pts, margin=23.8pts, sigma=18.6pts) (67.9% likely to pass)
58.6 (80% CI: 43.0 to 74.3) (67.9%)
-
## vote_measure_ca_prop14_stem_cell NATIONAL CORRECTED + WEIGHTED + LV ##
Vote for / Support        42.756823
Vote against / Oppose     25.133684
Don’t know / Undecided    32.109493
dtype: float64
Result 50.8 (80% CI: 31.5 to 70.1) (N=686) (raw_moe=4.5pts, margin=29.4pts, sigma=22.9pts) (51.4% likely to pass)
50.8 (80% CI: 31.5 to 70.1) (51.4%)
-
## vote_measure_ca_prop14_stem_cell NATIONAL CA-WEIGHTED ##
Vote for / Support        51.740378
Vote against / Oppose     28.405273
Don’t know / Undecided    19.8

Result 41.6 (80% CI: 20.1 to 63.1) (N=677) (raw_moe=4.5pts, margin=32.7pts, sigma=25.5pts) (37.1% likely to pass)
41.6 (80% CI: 20.1 to 63.1) (37.1%)
-
## vote_measure_ca_prop21_rent_control NATIONAL CA-WEIGHTED ##
Vote for / Support        41.931629
Vote against / Oppose     29.463099
Don’t know / Undecided    28.605272
dtype: float64
Result 49.1 (80% CI: 30.7 to 67.5) (N=708) (raw_moe=4.6pts, margin=28.0pts, sigma=21.9pts) (48.3% likely to pass)
49.1 (80% CI: 30.7 to 67.5) (48.3%)
-
## vote_measure_ca_prop21_rent_control NATIONAL CA-WEIGHTED + LV ##
Vote for / Support        44.672817
Vote against / Oppose     30.332884
Don’t know / Undecided    24.994299
dtype: float64
Result 50.9 (80% CI: 33.4 to 68.5) (N=677) (raw_moe=4.7pts, margin=26.7pts, sigma=20.8pts) (51.8% likely to pass)
50.9 (80% CI: 33.4 to 68.5) (51.8%)
-
## vote_measure_ca_prop21_rent_control NATIONAL CORRECTED + CA-WEIGHTED + LV ##
Vote for / Support        35.296898
Vote against / Oppose     31.379164
Don’t know / Un

Result 62.3 (80% CI: 48.4 to 76.1) (N=683) (raw_moe=4.6pts, margin=21.1pts, sigma=16.5pts) (77.1% likely to pass)
62.3 (80% CI: 48.4 to 76.1) (77.1%)
-
## vote_measure_ca_prop25_bail NATIONAL CORRECTED + CA-WEIGHTED + LV ##
Vote for / Support        53.281910
Vote against / Oppose     23.571457
Don’t know / Undecided    23.146634
dtype: float64
Result 59.1 (80% CI: 42.0 to 76.1) (N=638) (raw_moe=4.7pts, margin=25.9pts, sigma=20.2pts) (67.3% likely to pass)
59.1 (80% CI: 42.0 to 76.1) (67.3%)
-
## vote_measure_ca_prop25_bail CALI UNWEIGHTED ##
Vote for / Support        58.581236
Vote against / Oppose     27.574371
Don’t know / Undecided    13.844394
Name: vote_measure_ca_prop25_bail, dtype: float64
Result 62.0 (80% CI: 47.9 to 76.2) (N=874) (raw_moe=4.1pts, margin=21.6pts, sigma=16.9pts) (76.3% likely to pass)
62.0 (80% CI: 47.9 to 76.2) (76.3%)
-
## vote_measure_ca_prop25_bail CALI CA-WEIGHTED ##
Vote for / Support        60.997766
Vote against / Oppose     29.843650
Don’t know / Undec

Result 49.0 (80% CI: 32.5 to 65.5) (N=872) (raw_moe=4.2pts, margin=25.1pts, sigma=19.6pts) (48.0% likely to pass)
49.0 (80% CI: 32.5 to 65.5) (48.0%)
-
## vote_measure_ca_prop20_parole CALI CA-WEIGHTED ##
Vote for / Support        54.313789
Vote against / Oppose     30.344025
Don’t know / Undecided    15.342186
dtype: float64
Result 58.1 (80% CI: 42.0 to 74.3) (N=361) (raw_moe=6.5pts, margin=24.6pts, sigma=19.2pts) (66.4% likely to pass)
58.1 (80% CI: 42.0 to 74.3) (66.4%)
-
## vote_measure_ca_prop20_parole CALI CA-WEIGHTED + LV ##
Vote for / Support        52.153317
Vote against / Oppose     30.690647
Don’t know / Undecided    17.156036
dtype: float64
Result 56.4 (80% CI: 39.7 to 73.2) (N=346) (raw_moe=6.6pts, margin=25.5pts, sigma=19.9pts) (62.7% likely to pass)
56.4 (80% CI: 39.7 to 73.2) (62.7%)
-
## vote_measure_ca_prop20_parole CALI CORRECTED + CA-WEIGHTED + LV ##
Vote for / Support        47.208192
Vote against / Oppose     26.874203
Don’t know / Undecided    25.917604
dtype: fl