## Libraries

In [87]:
import pandas as pd
import numpy as np
import scipy.stats as stat

from math import sqrt
from mlgear.utils import show, display_columns
from surveyweights import normalize_weights


def margin_of_error(n=None, sd=None, p=None, type='proportion', interval_size=0.95):
    z_lookup = {0.8: 1.28, 0.85: 1.44, 0.9: 1.65, 0.95: 1.96, 0.99: 2.58}
    if interval_size not in z_lookup.keys():
        raise ValueError('{} not a valid `interval_size` - must be {}'.format(interval_size,
                                                                              ', '.join(list(z_lookup.keys()))))
    if type == 'proportion':
        se = sqrt(p * (1 - p)) / sqrt(n)
    elif type == 'continuous':
        se = sd / sqrt(n)
    else:
        raise ValueError('{} not a valid `type` - must be proportion or continuous')
    
    z = z_lookup[interval_size]
    return se * z


def print_pct(pct, digits=0):
    pct = pct * 100
    pct = np.round(pct, digits)
    if pct >= 100:
        if digits == 0:
            val = '>99.0%'
        else:
            val = '>99.'
            for d in range(digits - 1):
                val += '9'
            val += '9%'
    elif pct <= 0:
        if digits == 0:
            val = '<0.1%'
        else:
            val = '<0.'
            for d in range(digits - 1):
                val += '0'
            val += '1%'
    else:
        val = '{}%'.format(pct)
    return val


def calc_result(biden_vote, trump_vote, n, interval=0.8):
    GENERAL_POLLING_ERROR = 0
    BIDEN_BIAS = 0
    N_SIMS = 100000
    
    biden_moe = margin_of_error(n=n, p=biden_vote/100, interval_size=interval)
    trump_moe = margin_of_error(n=n, p=trump_vote/100, interval_size=interval)
    undecided = (100 - biden_vote - trump_vote) / 2

    biden_mean = biden_vote + undecided * 0.25
    biden_raw_moe = biden_moe * 100
    biden_allocate_undecided = undecided * 0.4
    biden_margin = biden_raw_moe + biden_allocate_undecided + GENERAL_POLLING_ERROR
    
    trump_mean = trump_vote + undecided * 0.25
    trump_raw_moe = trump_moe * 100
    trump_allocate_undecided = undecided * 0.4
    trump_margin = trump_raw_moe + trump_allocate_undecided + GENERAL_POLLING_ERROR
    
    cdf_value = 0.5 + 0.5 * interval
    normed_sigma = stat.norm.ppf(cdf_value)
    
    biden_sigma = biden_margin / 100 / normed_sigma
    biden_sims = np.random.normal(biden_mean / 100, biden_sigma, N_SIMS)
    
    trump_sigma = trump_margin / 100 / normed_sigma
    trump_sims = np.random.normal(trump_mean / 100, trump_sigma, N_SIMS)
    
    chance_pass = np.sum([sim[0] + BIDEN_BIAS / 100 > sim[1] for sim in zip(biden_sims, trump_sims)]) / N_SIMS
    
    low, high = np.percentile(biden_sims - trump_sims, [20, 80]) * 100
    
    return {'mean': biden_mean - trump_mean, 'high': high, 'low': low, 'n': n,
            'raw_moe': biden_raw_moe + trump_raw_moe,
            'margin': (biden_margin + trump_margin) / 2,
            'sigma': (biden_sigma + trump_sigma) / 2,
            'chance_pass': chance_pass}


def print_result(mean, high, low, n, raw_moe, margin, sigma, chance_pass):
    mean = np.round(mean, 1)
    first = np.round(high, 1)
    second = np.round(low, 1)
    sigma = np.round(sigma * 100, 1)
    raw_moe = np.round(raw_moe, 1)
    margin = np.round(margin, 1)
    chance_pass = print_pct(chance_pass, 1)
    if second < first:
        _ = first
        first = second
        second = _
    if second > 100:
        second = 100
    if first < -100:
        first = -100
    print(('Result Biden +{} (80% CI: {} to {}) (Weighted N={}) (raw_moe={}pts, margin={}pts, '
           'sigma={}pts) (Biden {} likely to win)').format(mean,
                                                           first,
                                                           second,
                                                           n,
                                                           raw_moe,
                                                           margin,
                                                           sigma,
                                                           chance_pass))
    print('-')

In [88]:
survey = pd.read_csv('responses_processed_national_weighted.csv')

In [89]:
census = get_census()
survey['social_twitter'] = survey['social_twitter'].astype(str)
census['social_twitter'] = {'False': 0.8, 'True': 0.22}
survey['social_fb'] = survey['social_fb'].astype(str)
census['social_fb'] = {'False': 0.31, 'True': 0.69}

In [90]:
survey_ = survey.copy()
    
print('## UNWEIGHTED ##')
n = len(survey_)
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=n))

## UNWEIGHTED ##
Joe Biden, the Democrat         69.267760
Donald Trump, the Republican    20.896175
Another candidate                4.349727
Not decided                      5.486339
Name: vote_trump_biden, dtype: float64
Result Biden +48.4 (80% CI: 45.8 to 51.0) (Weighted N=4933) (raw_moe=1.6pts, margin=2.8pts, sigma=2.2pts) (Biden >99.9% likely to win)
-


In [109]:
demographics = ['region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
    
print('## WEIGH ON REGION ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON REGION ##
Joe Biden, the Democrat         68.641568
Donald Trump, the Republican    21.466726
Another candidate                4.421561
Not decided                      5.470145
dtype: float64
Result Biden +47.2 (80% CI: 44.1 to 50.2) (Weighted N=1921) (raw_moe=2.6pts, margin=3.3pts, sigma=2.5pts) (Biden >99.9% likely to win)
-


In [96]:
demographics = ['gender', 'race', 'age', 'income', 'region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION ##
Joe Biden, the Democrat         64.267214
Donald Trump, the Republican    26.999073
Another candidate                4.104656
Not decided                      4.629057
dtype: float64
Result Biden +37.3 (80% CI: 34.4 to 40.1) (Weighted N=2113) (raw_moe=2.6pts, margin=3.0pts, sigma=2.4pts) (Biden >99.9% likely to win)
-


In [97]:
demographics = ['gender', 'race', 'age', 'income', 'region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey_['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + LV ##
Joe Biden, the Democrat         65.383467
Donald Trump, the Republican    27.626023
Another candidate                3.927906
Not decided                      3.062604
dtype: float64
Result Biden +37.8 (80% CI: 35.4 to 40.1) (Weighted N=2630) (raw_moe=2.3pts, margin=2.5pts, sigma=2.0pts) (Biden >99.9% likely to win)
-


In [98]:
demographics = ['gender', 'race', 'education', 'income', 'region', 'age']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey_['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + LV ##
Joe Biden, the Democrat         58.081238
Donald Trump, the Republican    34.609248
Another candidate                4.020539
Not decided                      3.288976
dtype: float64
Result Biden +23.5 (80% CI: 20.9 to 26.1) (Weighted N=2046) (raw_moe=2.7pts, margin=2.8pts, sigma=2.2pts) (Biden >99.9% likely to win)
-


In [99]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'region']
survey_ = survey.copy()
output = run_weighting_scheme(survey_, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])

print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + LV ##
Joe Biden, the Democrat         58.189414
Donald Trump, the Republican    34.525090
Another candidate                4.014695
Not decided                      3.270801
dtype: float64
Result Biden +23.7 (80% CI: 21.0 to 26.3) (Weighted N=2046) (raw_moe=2.7pts, margin=2.8pts, sigma=2.2pts) (Biden >99.9% likely to win)
-


In [100]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])

options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + LV ##
Joe Biden, the Democrat         55.739464
Donald Trump, the Republican    37.586728
Another candidate                3.240410
Not decided                      3.433398
dtype: float64
Result Biden +18.2 (80% CI: 15.6 to 20.7) (Weighted N=2017) (raw_moe=2.8pts, margin=2.7pts, sigma=2.1pts) (Biden >99.9% likely to win)
-
## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + LV ##
Joe Biden, the Democrat         54.488340
Donald Trump, the Republican    38.987652
Another candidate                3.167676
Not decided                      3.356333
dtype: float64
Result Biden +15.5 (80% CI: 13.0 to 18.0) (Weighted N=2015) (raw_moe=2.8pts, margin=2.7pts, sigma=2.1pts) (Biden >99.9% likely to win)
-


In [101]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])

options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + LV ##
Joe Biden, the Democrat         55.691480
Donald Trump, the Republican    37.684857
Another candidate                3.247778
Not decided                      3.375885
dtype: float64
Result Biden +18.0 (80% CI: 15.5 to 20.5) (Weighted N=2002) (raw_moe=2.8pts, margin=2.7pts, sigma=2.1pts) (Biden >99.9% likely to win)
-
## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + LV ##
Joe Biden, the Democrat         54.421671
Donald Trump, the Republican    39.105692
Another candidate                3.173726
Not decided                      3.298912
dtype: float64
Result Biden +15.3 (80% CI: 12.8 to 17.8) (Weighted N=2000) (raw_moe=2.8pts, margin=2.7pts, sigma=2.1pts) (Biden >99.9% likely to win)
-


In [102]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust',
                'gss_bible']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])

options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + LV ##
Joe Biden, the Democrat         53.217488
Donald Trump, the Republican    39.812979
Another candidate                3.280012
Not decided                      3.689522
dtype: float64
Result Biden +13.4 (80% CI: 10.8 to 16.0) (Weighted N=1959) (raw_moe=2.9pts, margin=2.8pts, sigma=2.2pts) (Biden >99.9% likely to win)
-
## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + LV ##
Joe Biden, the Democrat         51.750668
Donald Trump, the Republican    41.471898
Another candidate                3.189606
Not decided                      3.587829
dtype: float64
Result Biden +10.3 (80% CI: 7.7 to 12.9) (Weighted N=1950) (raw_moe=2.9pts, margin=2.8pts, sigma=2.2pts) (Biden 99.9% likely to win)
-


In [103]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust',
                'gss_bible', 'gss_spanking']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])

options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + SPANKING + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + LV ##
Joe Biden, the Democrat         51.346667
Donald Trump, the Republican    41.493585
Another candidate                3.583590
Not decided                      3.576158
dtype: float64
Result Biden +9.9 (80% CI: 7.2 to 12.5) (Weighted N=1867) (raw_moe=2.9pts, margin=2.9pts, sigma=2.3pts) (Biden 99.9% likely to win)
-
## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + SPANKING + LV ##
Joe Biden, the Democrat         49.702741
Donald Trump, the Republican    43.366739
Another candidate                3.468857
Not decided                      3.461663
dtype: float64
Result Biden +6.3 (80% CI: 3.7 to 9.0) (Weighted N=1857) (raw_moe=3.0pts, margin=2.9pts, sigma=2.2pts) (Biden 97.8% likely to win)
-


In [104]:
demographics = ['gender', 'race', 'education', 'urban_rural', 'income', 'age', 'vote2016', 'region', 'gss_trust',
                'gss_bible', 'gss_spanking', 'social_twitter', 'social_fb']
output = run_weighting_scheme(survey, iters=50, weigh_on=demographics, census='US', verbose=0, early_terminate=False)
survey_ = output['final_df']
survey['lv_weight'] = normalize_weights(survey_['weight'] * survey_['lv_index'])
survey_['lv_weight_alt'] = survey_['lv_weight']
survey_.loc[(~survey['voted2016']) & (survey_['vote_trump_biden'] == 'Donald Trump, the Republican'), 'lv_weight_alt'] *= 1.662
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])

options = ['Joe Biden, the Democrat', 'Donald Trump, the Republican', 'Another candidate', 'Not decided']
survey_ = survey_.loc[survey_['vote_trump_biden'].isin(options)].copy()
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
survey_['lv_weight_alt'] = normalize_weights(survey_['lv_weight_alt'])
    
print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + SOCIAL MEDIA + LV ##')
weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

print('## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + SPANKING + SOCIAL MEDIA + LV ##')
weighted_n = int(np.round(survey_['lv_weight_alt'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_trump_biden'].value_counts(normalize=True) * survey_.groupby('vote_trump_biden')['lv_weight_alt'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
print_result(**calc_result(biden_vote=votes['Joe Biden, the Democrat'],
                           trump_vote=votes['Donald Trump, the Republican'],
                           n=weighted_n))

## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE + SOCIAL TRUST + RELIGION + SPANKING + SOCIAL MEDIA + LV ##
Joe Biden, the Democrat         50.103315
Donald Trump, the Republican    42.185069
Another candidate                4.295353
Not decided                      3.416263
dtype: float64
Result Biden +7.9 (80% CI: 5.0 to 10.8) (Weighted N=1713) (raw_moe=3.1pts, margin=3.1pts, sigma=2.4pts) (Biden 99.0% likely to win)
-
## WEIGH ON GENDER + RACE + AGE + INCOME + REGION + EDUCATION + URBAN/RURAL + 2016 VOTE (W/ NONVOTER ADJUSTMENT) + SOCIAL TRUST + RELIGION + SPANKING + SOCIAL MEDIA + LV ##
Joe Biden, the Democrat         48.342596
Donald Trump, the Republican    44.216788
Another candidate                4.144407
Not decided                      3.296210
dtype: float64
Result Biden +4.1 (80% CI: 1.3 to 6.9) (Weighted N=1700) (raw_moe=3.1pts, margin=3.0pts, sigma=2.4pts) (Biden 89.0% likely to win)
-
