## Libraries

In [48]:
import pandas as pd
import numpy as np
import scipy.stats as stat

from math import sqrt
from mlgear.utils import show, display_columns
from surveyweights import normalize_weights, run_weighting_scheme, get_census


def margin_of_error(n=None, sd=None, p=None, type='proportion', interval_size=0.95):
    z_lookup = {0.8: 1.28, 0.85: 1.44, 0.9: 1.65, 0.95: 1.96, 0.99: 2.58}
    if interval_size not in z_lookup.keys():
        raise ValueError('{} not a valid `interval_size` - must be {}'.format(interval_size,
                                                                              ', '.join(list(z_lookup.keys()))))
    if type == 'proportion':
        se = sqrt(p * (1 - p)) / sqrt(n)
    elif type == 'continuous':
        se = sd / sqrt(n)
    else:
        raise ValueError('{} not a valid `type` - must be proportion or continuous')
    
    z = z_lookup[interval_size]
    return se * z


def print_pct(pct, digits=0):
    pct = pct * 100
    pct = np.round(pct, digits)
    if pct >= 100:
        if digits == 0:
            val = '>99.0%'
        else:
            val = '>99.'
            for d in range(digits - 1):
                val += '9'
            val += '9%'
    elif pct <= 0:
        if digits == 0:
            val = '<0.1%'
        else:
            val = '<0.'
            for d in range(digits - 1):
                val += '0'
            val += '1%'
    else:
        val = '{}%'.format(pct)
    return val


def calc_result(biden_vote, trump_vote, n, interval=0.8):
    GENERAL_POLLING_ERROR = 0 # Just use raw MOE
    BIDEN_BIAS = 0
    N_SIMS = 100000
    
    biden_moe = margin_of_error(n=n, p=biden_vote/100, interval_size=interval)
    trump_moe = margin_of_error(n=n, p=trump_vote/100, interval_size=interval)
    undecided = (100 - biden_vote - trump_vote) / 2

    biden_mean = biden_vote + undecided * 0.25
    biden_raw_moe = biden_moe * 100
    biden_allocate_undecided = undecided * 0.4
    biden_margin = biden_raw_moe + biden_allocate_undecided + GENERAL_POLLING_ERROR
    
    trump_mean = trump_vote + undecided * 0.25
    trump_raw_moe = trump_moe * 100
    trump_allocate_undecided = undecided * 0.4
    trump_margin = trump_raw_moe + trump_allocate_undecided + GENERAL_POLLING_ERROR
    
    cdf_value = 0.5 + 0.5 * interval
    normed_sigma = stat.norm.ppf(cdf_value)
    
    biden_sigma = biden_margin / 100 / normed_sigma
    biden_sims = np.random.normal(biden_mean / 100, biden_sigma, N_SIMS)
    
    trump_sigma = trump_margin / 100 / normed_sigma
    trump_sims = np.random.normal(trump_mean / 100, trump_sigma, N_SIMS)
    
    chance_pass = np.sum([sim[0] + BIDEN_BIAS / 100 > sim[1] for sim in zip(biden_sims, trump_sims)]) / N_SIMS
    
    low, high = np.percentile(biden_sims - trump_sims, [20, 80]) * 100
    
    return {'mean': biden_mean - trump_mean, 'high': high, 'low': low, 'n': n,
            'raw_moe': biden_raw_moe + trump_raw_moe,
            'margin': (biden_margin + trump_margin) / 2,
            'sigma': (biden_sigma + trump_sigma) / 2,
            'chance_pass': chance_pass}


def print_result(mean, high, low, n, raw_moe, margin, sigma, chance_pass):
    mean = np.round(mean, 1)
    first = np.round(high, 1)
    second = np.round(low, 1)
    sigma = np.round(sigma * 100, 1)
    raw_moe = np.round(raw_moe, 1)
    margin = np.round(margin, 1)
    chance_pass = print_pct(chance_pass, 1)
    if second < first:
        _ = first
        first = second
        second = _
    if second > 100:
        second = 100
    if first < -100:
        first = -100
    print(('Result Biden +{} (80% CI: {} to {}) (Weighted N={}) (raw_moe={}pts, margin={}pts, '
           'sigma={}pts) (Biden {} likely to win)').format(mean,
                                                           first,
                                                           second,
                                                           n,
                                                           raw_moe,
                                                           margin,
                                                           sigma,
                                                           chance_pass))
    print('-')

    
def calc_result_gcb(dem_vote, rep_vote, n, interval=0.8):
    GENERAL_POLLING_ERROR = 0  # Just use raw MOE
    N_SIMS = 100000
    
    dem_moe = margin_of_error(n=n, p=dem_vote/100, interval_size=interval)
    rep_moe = margin_of_error(n=n, p=rep_vote/100, interval_size=interval)
    undecided = 100 - dem_vote - rep_vote

    dem_mean = dem_vote + undecided * 0.25
    dem_raw_moe = dem_moe * 100
    dem_allocate_undecided = undecided * 0.4
    dem_margin = dem_raw_moe + dem_allocate_undecided + GENERAL_POLLING_ERROR
    
    rep_mean = rep_vote + undecided * 0.25
    rep_raw_moe = rep_moe * 100
    rep_allocate_undecided = undecided * 0.4
    rep_margin = rep_raw_moe + rep_allocate_undecided + GENERAL_POLLING_ERROR
    
    cdf_value = 0.5 + 0.5 * interval
    normed_sigma = stat.norm.ppf(cdf_value)
    
    dem_sigma = dem_margin / 100 / normed_sigma
    dem_sims = np.random.normal(dem_mean / 100, dem_sigma, N_SIMS)
    
    rep_sigma = rep_margin / 100 / normed_sigma
    rep_sims = np.random.normal(rep_mean / 100, rep_sigma, N_SIMS)
    
    chance_pass = np.sum([sim[0] > sim[1] for sim in zip(dem_sims, rep_sims)]) / N_SIMS
    
    low, high = np.percentile(dem_sims - rep_sims, [20, 80]) * 100
    
    return {'mean': dem_mean - rep_mean, 'high': high, 'low': low, 'n': n,
            'raw_moe': dem_raw_moe + rep_raw_moe,
            'margin': (dem_margin + rep_margin) / 2,
            'sigma': (dem_sigma + rep_sigma) / 2,
            'chance_pass': chance_pass}


def print_result_gcb(mean, high, low, n, raw_moe, margin, sigma, chance_pass):
    mean = np.round(mean, 1)
    first = np.round(high, 1)
    second = np.round(low, 1)
    sigma = np.round(sigma * 100, 1)
    raw_moe = np.round(raw_moe, 1)
    margin = np.round(margin, 1)
    chance_pass = print_pct(chance_pass, 1)
    if second < first:
        _ = first
        first = second
        second = _
    if second > 100:
        second = 100
    if first < -100:
        first = -100
    print(('Result Dems {}{} (80% CI: {} to {}) (Weighted N={}) (raw_moe={}pts, margin={}pts, '
           'sigma={}pts) (Dems {} likely to win)').format('+' if mean > 0 else '',
                                                          mean,
                                                          first,
                                                          second,
                                                          n,
                                                          raw_moe,
                                                          margin,
                                                          sigma,
                                                          chance_pass))
    print('-')


def calc_result_measure(for_vote, against_vote, n, state=None, interval=0.8):
    GENERAL_POLLING_ERROR = 4.0
    ACQUIESENCE_BIAS = -5.0
    N_SIMS = 1000000
    
    margin_needed_for_victory = 0.5
    if state == 'Florida':
        margin_needed_for_victory = 0.6
    elif state == 'Colorado':
        margin_needed_for_victory = 0.55
    
    for_moe = margin_of_error(n=n, p=for_vote/100, interval_size=interval)
    against_moe = margin_of_error(n=n, p=against_vote/100, interval_size=interval)
    undecided = 100 - for_vote - against_vote
    mean = for_vote + undecided * 0.25 + ACQUIESENCE_BIAS
    raw_moe = for_moe * 100 + against_moe * 100
    
    allocate_undecided = undecided * 0.4
    margin = raw_moe + allocate_undecided + GENERAL_POLLING_ERROR
    
    cdf_value = 0.5 + 0.5 * interval
    normed_sigma = stat.norm.ppf(cdf_value)
    sigma = margin / 100 / normed_sigma
    
    sims = np.random.normal(mean / 100, sigma, N_SIMS)
    chance_pass = np.sum([s > margin_needed_for_victory for s in sims]) / N_SIMS
    low, high = np.percentile(sims, [20, 80]) * 100
    
    return {'mean': mean, 'high': high, 'low': low, 'n': n,
            'raw_moe': raw_moe, 'margin': margin, 'sigma': sigma, 'chance_pass': chance_pass}


def print_result_measure(mean, high, low, n, raw_moe, margin, sigma, chance_pass):
    mean = np.round(mean, 1)
    first = np.round(high, 1)
    second = np.round(low, 1)
    sigma = np.round(sigma * 100, 1)
    raw_moe = np.round(raw_moe, 1)
    margin = np.round(margin, 1)
    chance_pass = print_pct(chance_pass, 1)
    if second < first:
        _ = first
        first = second
        second = _
    if second > 100:
        second = 100
    if first < 0:
        first = 0
    print(('Result {} (80% CI: {} to {}) (N={}) (raw_moe={}pts, margin={}pts, '
           'sigma={}pts) ({} likely to pass)').format(mean,
                                                      first,
                                                      second,
                                                      n,
                                                      raw_moe,
                                                      margin,
                                                      sigma,
                                                      chance_pass))
    print(('{} (80% CI: {} to {}) ({})').format(mean,
                                                first,
                                                second,
                                                chance_pass))
    print('-')
    

def print_veg(veg_rate, n, interval=0.8):
    veg_moe = margin_of_error(n=n, p=veg_rate/100, interval_size=interval)
    veg_rate_lo = np.round(veg_rate - veg_moe * 100, 1)
    veg_rate_hi = np.round(veg_rate + veg_moe * 100, 1)
    if veg_rate_lo < 0:
        veg_rate_lo = 0
    veg_rate = np.round(veg_rate, 1)
    print('% Veg: {}% to {}% (mean {}%)'.format(veg_rate_lo, veg_rate_hi, veg_rate))

In [49]:
survey = pd.read_csv('responses_processed_national_weighted.csv')

In [50]:
census = get_census()
survey['social_twitter'] = survey['social_twitter'].astype(str)
census['social_twitter'] = {'False': 0.8, 'True': 0.22}
survey['social_fb'] = survey['social_fb'].astype(str)
census['social_fb'] = {'False': 0.31, 'True': 0.69}

In [53]:
survey_ = survey.copy()
    
print('## UNWEIGHTED ##')
n = len(survey_)
options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = survey['vegetarian'].value_counts(normalize=True)[True] * 100
print_veg(veg_rate, n=n)

## UNWEIGHTED ##
Joe Biden, the Democrat         69.267760
Donald Trump, the Republican    20.896175
Another candidate                4.349727
Not decided                      5.486339
Name: vote_trump_biden, dtype: float64
Result Biden +48.4 (80% CI: 45.8 to 50.9) (Weighted N=4933) (raw_moe=1.6pts, margin=2.8pts, sigma=2.2pts) (Biden >99.9% likely to win)
-
A Democratic candidate    65.279028
A Republican candidate    21.062106
Another candidate          2.025203
Not decided               11.633663
Name: vote_rep, dtype: float64
Result Dems +44.2 (80% CI: 38.5 to 50.1) (Weighted N=4933) (raw_moe=1.6pts, margin=6.3pts, sigma=4.9pts) (Dems >99.9% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        39.593114
Vote against / Oppose     32.394366
Don’t know / Undecided    28.012520
Name: vote_measure_primate_rights, dtype: float64
Result 41.6 (80% CI: 30.5 to 52.7) (N=4933) (raw_moe=1.7pts, margin=16.9pts, sigma=13.2pts) (26.2% likely to pass)
41.6 (80% CI: 30.5 to 

In [55]:
demographics = ['region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
    
print('## WEIGH ON REGION ##')
weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON REGION ##
Joe Biden, the Democrat         68.641568
Donald Trump, the Republican    21.466726
Another candidate                4.421561
Not decided                      5.470145
dtype: float64
Result Biden +47.2 (80% CI: 44.5 to 49.8) (Weighted N=4091) (raw_moe=1.8pts, margin=2.9pts, sigma=2.2pts) (Biden >99.9% likely to win)
-
A Democratic candidate    65.580454
A Republican candidate    21.690070
Another candidate          1.983747
Not decided               10.745729
dtype: float64
Result Dems +43.9 (80% CI: 38.3 to 49.4) (Weighted N=4091) (raw_moe=1.8pts, margin=6.0pts, sigma=4.7pts) (Dems >99.9% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        39.057955
Vote against / Oppose     32.433387
Don’t know / Undecided    28.508658
dtype: float64
Result 41.2 (80% CI: 30.0 to 52.4) (N=4933) (raw_moe=1.7pts, margin=17.1pts, sigma=13.4pts) (25.5% likely to pass)
41.2 (80% CI: 30.0 to 52.4) (25.5%)
-
## vote_measure_cage_free ##
Vote for / Support       

In [56]:
demographics = ['gender', 'race', 'age', 'income', 'region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION ##')
weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION ##
Joe Biden, the Democrat         64.267214
Donald Trump, the Republican    26.999073
Another candidate                4.104656
Not decided                      4.629057
dtype: float64
Result Biden +37.3 (80% CI: 34.6 to 39.9) (Weighted N=2752) (raw_moe=2.3pts, margin=2.9pts, sigma=2.2pts) (Biden >99.9% likely to win)
-
A Democratic candidate    60.505772
A Republican candidate    29.016064
Another candidate          2.008026
Not decided                8.470137
dtype: float64
Result Dems +31.5 (80% CI: 26.5 to 36.5) (Weighted N=2752) (raw_moe=2.3pts, margin=5.3pts, sigma=4.2pts) (Dems >99.9% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        36.203093
Vote against / Oppose     43.464551
Don’t know / Undecided    20.332356
dtype: float64
Result 36.3 (80% CI: 27.2 to 45.5) (N=4933) (raw_moe=1.8pts, margin=13.9pts, sigma=10.9pts) (10.3% likely to pass)
36.3 (80% CI: 27.2 to 45.5) (10.3%)
-
## vote_measure_cage_fr

In [60]:
demographics = ['gender', 'race', 'age', 'income', 'region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey_['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + LV ##
Joe Biden, the Democrat         65.383467
Donald Trump, the Republican    27.626023
Another candidate                3.927906
Not decided                      3.062604
dtype: float64
Result Biden +37.8 (80% CI: 35.4 to 40.2) (Weighted N=2630) (raw_moe=2.3pts, margin=2.5pts, sigma=2.0pts) (Biden >99.9% likely to win)
-
A Democratic candidate    61.884362
A Republican candidate    29.597162
Another candidate          1.991830
Not decided                6.526645
dtype: float64
Result Dems +32.3 (80% CI: 28.0 to 36.5) (Weighted N=2630) (raw_moe=2.4pts, margin=4.6pts, sigma=3.6pts) (Dems >99.9% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        35.971173
Vote against / Oppose     43.947600
Don’t know / Undecided    20.081227
dtype: float64
Result 36.0 (80% CI: 26.9 to 45.1) (N=4933) (raw_moe=1.8pts, margin=13.8pts, sigma=10.8pts) (9.7% likely to pass)
36.0 (80% CI: 26.9 to 45.1) (9.7%)
-
## vote_measure_cage

In [59]:
demographics = ['gender', 'race', 'education', 'income', 'region', 'age']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey_['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + LV ##
Joe Biden, the Democrat         58.081238
Donald Trump, the Republican    34.609248
Another candidate                4.020539
Not decided                      3.288976
dtype: float64
Result Biden +23.5 (80% CI: 20.8 to 26.1) (Weighted N=2046) (raw_moe=2.7pts, margin=2.8pts, sigma=2.2pts) (Biden >99.9% likely to win)
-
A Democratic candidate    55.106502
A Republican candidate    36.205419
Another candidate          2.191872
Not decided                6.496208
dtype: float64
Result Dems +18.9 (80% CI: 14.4 to 23.4) (Weighted N=2046) (raw_moe=2.8pts, margin=4.9pts, sigma=3.8pts) (Dems >99.9% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        41.280644
Vote against / Oppose     39.813629
Don’t know / Undecided    18.905727
dtype: float64
Result 41.0 (80% CI: 32.2 to 49.8) (N=4933) (raw_moe=1.8pts, margin=13.4pts, sigma=10.4pts) (19.4% likely to pass)
41.0 (80% CI: 32.2 to 49.8) (19.4%)
-
## vot

In [61]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])

print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + LV ##
Joe Biden, the Democrat         51.346667
Donald Trump, the Republican    41.493585
Another candidate                3.583590
Not decided                      3.576158
dtype: float64
Result Biden +9.9 (80% CI: 7.1 to 12.5) (Weighted N=1867) (raw_moe=2.9pts, margin=2.9pts, sigma=2.3pts) (Biden 99.9% likely to win)
-
A Democratic candidate    47.881171
A Republican candidate    42.609703
Another candidate          2.471681
Not decided                7.037445
dtype: float64
Result Dems +5.3 (80% CI: 0.3 to 10.1) (Weighted N=1867) (raw_moe=2.9pts, margin=5.3pts, sigma=4.1pts) (Dems 81.5% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        38.042912
Vote against / Oppose     39.756416
Don’t know / Undecided    22.200672
dtype: float64
Result 38.6 (80% CI: 29.0 to 48.2) (N=4933) (raw_moe=1.8pts, margin=14.7pts, sigma=11.4pts) (15.9% likely to pass)
38.6 (80% CI: 29.0 to 48.2) (15.9%)


In [62]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

print('-')
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight_alt'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight_alt'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight_alt'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + LV ##
Joe Biden, the Democrat         55.739464
Donald Trump, the Republican    37.586728
Another candidate                3.240410
Not decided                      3.433398
dtype: float64
Result Biden +18.2 (80% CI: 15.6 to 20.7) (Weighted N=2017) (raw_moe=2.8pts, margin=2.7pts, sigma=2.1pts) (Biden >99.9% likely to win)
-
A Democratic candidate    52.611596
A Republican candidate    38.684420
Another candidate          1.954918
Not decided                6.749067
dtype: float64
Result Dems +13.9 (80% CI: 9.4 to 18.4) (Weighted N=2017) (raw_moe=2.8pts, margin=4.9pts, sigma=3.8pts) (Dems 99.5% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        42.407383
Vote against / Oppose     37.976178
Don’t know / Undecided    19.616439
dtype: float64
Result 42.3 (80% CI: 33.3 to 51.3) (N=4933) (raw_moe=1.8pts, margin=13.6pts, sigma=10.6pts) (23.5% likely to pass)
42.3 (80% CI: 33.3 t

In [64]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

print('-')
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight_alt'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight_alt'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight_alt'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + LV ##
Joe Biden, the Democrat         55.691480
Donald Trump, the Republican    37.684857
Another candidate                3.247778
Not decided                      3.375885
dtype: float64
Result Biden +18.0 (80% CI: 15.5 to 20.5) (Weighted N=2002) (raw_moe=2.8pts, margin=2.7pts, sigma=2.1pts) (Biden >99.9% likely to win)
-
A Democratic candidate    52.493168
A Republican candidate    38.631866
Another candidate          2.041546
Not decided                6.833420
dtype: float64
Result Dems +13.9 (80% CI: 9.2 to 18.5) (Weighted N=2002) (raw_moe=2.8pts, margin=5.0pts, sigma=3.9pts) (Dems 99.4% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        41.074604
Vote against / Oppose     38.015518
Don’t know / Undecided    20.909878
dtype: float64
Result 41.3 (80% CI: 32.0 to 50.6) (N=4933) (raw_moe=1.8pts, margin=14.1pts, sigma=11.0pts) (21.5% likely to pass)
41.3 

In [65]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust',
                'gss_bible']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

print('-')
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight_alt'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight_alt'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight_alt'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + LV ##
Joe Biden, the Democrat         53.217488
Donald Trump, the Republican    39.812979
Another candidate                3.280012
Not decided                      3.689522
dtype: float64
Result Biden +13.4 (80% CI: 10.8 to 16.0) (Weighted N=1959) (raw_moe=2.9pts, margin=2.8pts, sigma=2.2pts) (Biden >99.9% likely to win)
-
A Democratic candidate    49.614080
A Republican candidate    41.425231
Another candidate          2.118772
Not decided                6.841917
dtype: float64
Result Dems +8.2 (80% CI: 3.6 to 12.9) (Weighted N=1959) (raw_moe=2.9pts, margin=5.0pts, sigma=3.9pts) (Dems 93.1% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        40.376836
Vote against / Oppose     39.104103
Don’t know / Undecided    20.519061
dtype: float64
Result 40.5 (80% CI: 31.3 to 49.7) (N=4933) (raw_moe=1.8pts, margin=14.0pts, sigma=10.9pts) (19.2% likely to p

In [66]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust',
                'gss_bible', 'gss_spanking']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

print('-')
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + SPANKING + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight_alt'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight_alt'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight_alt'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + LV ##
Joe Biden, the Democrat         51.346667
Donald Trump, the Republican    41.493585
Another candidate                3.583590
Not decided                      3.576158
dtype: float64
Result Biden +9.9 (80% CI: 7.1 to 12.5) (Weighted N=1867) (raw_moe=2.9pts, margin=2.9pts, sigma=2.3pts) (Biden 99.9% likely to win)
-
A Democratic candidate    47.881171
A Republican candidate    42.609703
Another candidate          2.471681
Not decided                7.037445
dtype: float64
Result Dems +5.3 (80% CI: 0.4 to 10.2) (Weighted N=1867) (raw_moe=2.9pts, margin=5.3pts, sigma=4.1pts) (Dems 81.8% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        38.042912
Vote against / Oppose     39.756416
Don’t know / Undecided    22.200672
dtype: float64
Result 38.6 (80% CI: 29.0 to 48.2) (N=4933) (raw_moe=1.8pts, margin=14.7pts, sigma=11.4pts) (15.9% lik

In [67]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust',
                'gss_bible', 'gss_spanking', 'social_twitter', 'social_fb']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + SOCIAL MEDIA + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

print('-')
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + SPANKING + SOCIAL MEDIA + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

options2 = ['A Democratic candidate', 'A Republican candidate', 'Another candidate', 'Not decided']
votes = survey_['vote_rep'].value_counts(normalize=True) * survey_.groupby('vote_rep')['lv_weight_alt'].mean() * 100
votes = votes[options2] * (100 / votes[options2].sum())
print(votes)
print_result_gcb(**calc_result_gcb(dem_vote=votes['A Democratic candidate'],
                                   rep_vote=votes['A Republican candidate'],
                                   n=weighted_n))

measures = ['vote_measure_primate_rights', 'vote_measure_cage_free', 'vote_measure_no_labeling',
            'vote_measure_marijuana', 'vote_measure_min_wage']
for measure in measures:
    print('## {} ##'.format(measure))
    options3 = ['Vote for / Support', 'Vote against / Oppose', 'Don’t know / Undecided']
    votes = survey_[measure].value_counts(normalize=True) * survey_.groupby(measure)['lv_weight_alt'].mean() * 100
    votes = votes[options3] * (100 / votes[options3].sum())
    print(votes)
    print_result_measure(**calc_result_measure(for_vote=votes['Vote for / Support'],
                                               against_vote=votes['Vote against / Oppose'],
                                               n=n))

veg_rate = (survey['vegetarian'].value_counts(normalize=True) * survey_.groupby('vegetarian')['lv_weight_alt'].mean() * 100)[True]
print_veg(veg_rate, n=weighted_n)

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + SOCIAL MEDIA + LV ##
Joe Biden, the Democrat         50.103315
Donald Trump, the Republican    42.185069
Another candidate                4.295353
Not decided                      3.416263
dtype: float64
Result Biden +7.9 (80% CI: 5.1 to 10.8) (Weighted N=1713) (raw_moe=3.1pts, margin=3.1pts, sigma=2.4pts) (Biden 99.0% likely to win)
-
A Democratic candidate    45.887465
A Republican candidate    44.297832
Another candidate          2.678113
Not decided                7.136590
dtype: float64
Result Dems +1.6 (80% CI: -3.4 to 6.6) (Weighted N=1713) (raw_moe=3.1pts, margin=5.5pts, sigma=4.3pts) (Dems 60.6% likely to win)
-
## vote_measure_primate_rights ##
Vote for / Support        35.426384
Vote against / Oppose     41.633905
Don’t know / Undecided    22.939711
dtype: float64
Result 36.2 (80% CI: 26.3 to 46.0) (N=4933) (raw_moe=1.8pts, margin=14.9pts, sigma=11.7