In [222]:
import pandas as pd
import numpy as np
import scipy.stats as stat

from math import sqrt
from mlgear.utils import show, display_columns
from surveyweights import run_weighting_iteration, run_weighting_scheme, normalize_weights


def margin_of_error(n=None, sd=None, p=None, type='proportion', interval_size=0.95):
    z_lookup = {0.8: 1.28, 0.85: 1.44, 0.9: 1.65, 0.95: 1.96, 0.99: 2.58}
    if interval_size not in z_lookup.keys():
        raise ValueError('{} not a valid `interval_size` - must be {}'.format(interval_size,
                                                                              ', '.join(list(z_lookup.keys()))))
    if type == 'proportion':
        se = sqrt(p * (1 - p)) / sqrt(n)
    elif type == 'continuous':
        se = sd / sqrt(n)
    else:
        raise ValueError('{} not a valid `type` - must be proportion or continuous')
    
    z = z_lookup[interval_size]
    return se * z


def print_pct(pct, digits=0):
    pct = pct * 100
    pct = np.round(pct, digits)
    if pct >= 100:
        if digits == 0:
            val = '>99.0%'
        else:
            val = '>99.'
            for d in range(digits - 1):
                val += '9'
            val += '9%'
    elif pct <= 0:
        if digits == 0:
            val = '<0.1%'
        else:
            val = '<0.'
            for d in range(digits - 1):
                val += '0'
            val += '1%'
    else:
        val = '{}%'.format(pct)
    return val


def calc_result(dem_vote, gop_vote, n, interval=0.8):
    GENERAL_POLLING_ERROR = 3
    N_SIMS = 100000
    
    dem_moe = margin_of_error(n=n, p=dem_vote/100, interval_size=interval)
    gop_moe = margin_of_error(n=n, p=gop_vote/100, interval_size=interval)
    undecided = (100 - dem_vote - gop_vote) / 2

    dem_mean = dem_vote + undecided * 0.25
    dem_raw_moe = dem_moe * 100
    dem_allocate_undecided = undecided * 0.4
    dem_margin = dem_raw_moe + dem_allocate_undecided + GENERAL_POLLING_ERROR
    
    gop_mean = gop_vote + undecided * 0.25
    gop_raw_moe = gop_moe * 100
    gop_allocate_undecided = undecided * 0.4
    gop_margin = gop_raw_moe + gop_allocate_undecided + GENERAL_POLLING_ERROR
    
    cdf_value = 0.5 + 0.5 * interval
    normed_sigma = stat.norm.ppf(cdf_value)
    
    dem_sigma = dem_margin / 100 / normed_sigma
    dem_sims = np.random.normal(dem_mean / 100, dem_sigma, N_SIMS)
    
    gop_sigma = gop_margin / 100 / normed_sigma
    gop_sims = np.random.normal(gop_mean / 100, gop_sigma, N_SIMS)
    
    chance_pass = np.sum([sim[0] / 100 > sim[1] / 100 for sim in zip(dem_sims, gop_sims)]) / N_SIMS
    
    low, high = np.percentile(dem_sims - gop_sims, [20, 80]) * 100
    
    return {'mean': dem_mean - gop_mean, 'high': high, 'low': low, 'n': n,
            'raw_moe': dem_raw_moe + gop_raw_moe,
            'margin': (dem_margin + gop_margin) / 2,
            'sigma': (dem_sigma + gop_sigma) / 2,
            'chance_pass': chance_pass}


def print_result(mean, high, low, n, raw_moe, margin, sigma, chance_pass, dem_name, gop_name=None):
    mean = np.round(mean, 1)
    first = np.round(high, 1)
    second = np.round(low, 1)
    sigma = np.round(sigma * 100, 1)
    raw_moe = np.round(raw_moe, 1)
    margin = np.round(margin, 1)
    chance_pass = print_pct(chance_pass, 1)
    if second < first:
        _ = first
        first = second
        second = _
    if second > 100:
        second = 100
    if first < -100:
        first = -100
    print(('Result {} {}{} (80% CI: {} to {}) (Weighted N={}) (raw_moe={}pts, margin={}pts, '
           'sigma={}pts) ({} {} likely to win)').format(dem_name,
                                                        '+' if mean > 0 else '',
                                                        mean,
                                                        first,
                                                        second,
                                                        n,
                                                        raw_moe,
                                                        margin,
                                                        sigma,
                                                        dem_name,
                                                        chance_pass))
    print('-')

In [223]:
survey = pd.read_csv('responses_processed.csv')

In [224]:
survey.shape

(1405, 34)

In [225]:
GA_CENSUS = {'survey_method': {'Online': 0.288, 'IVR': 0.712},
             'gender': {'Female': 0.511,
                        'Male': 0.483,
                        'Other': 0.006}, # Male-Female from 2010 US Census https://www.census.gov/prod/cen2010/briefs/c2010br-03.pdf, other from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5227946/
             'race': {'White, not Hispanic': 0.520,
                      'Black, non-Hispanic': 0.326,
                      'Hispanic': 0.099,
                      'Other': 0.055}, # https://www.census.gov/quickfacts/GA
             'education': {'Graduated from college': 0.183,
                           'Some college, no degree': 0.284,
                           'Completed graduate school': 0.091,
                           'Graduated from high school': 0.301,
                           'Less than high school': 0.141}, # https://statisticalatlas.com/state/Georgia/Educational-Attainment
             'income': {'Under $15,000': 0.1376,
                        'Between $15,000 and $49,999': 0.3524,
                        'Between $50,000 and $74,999': 0.1801,
                        'Between $75,000 and $99,999': 0.116,
                        'Between $100,000 and $150,000': 0.1207,
                        'Over $150,000': 0.0932}, # https://statisticalatlas.com/state/Georgia/Household-Income
             'age': {'18-34': 0.278,
                     '35-54': 0.359,
                     '55-64': 0.167,
                     '65 or older': 0.196}, # https://www.statista.com/statistics/910774/georgia-population-share-age-group/
             'vote2016': {'Donald Trump': 0.504,
                          'Hillary Clinton': 0.453,
                          'Other': 0.043},
             'vote2020': {'Joe Biden': 0.495,
                          'Donald Trump': 0.493,
                          'Other': 0.012},
             'loc_county': {'Fulton County, GA': 0.1,
                            'Cobb County, GA': 0.072,
                            'Gwinnett County, GA': 0.088,
                            'DeKalb County, GA': 0.0715,
                            'Another county in Georgia': 0.6685},
             'gss_trust': {'Can trust': 0.331,
                           'Can\'t be too careful': 0.669 }, # From GSS 2018 https://gssdataexplorer.norc.org/variables/441/vshow
             'gss_bible': {'Word of God': 0.41,
                           'Inspired word': 0.46,
                           'Book of fables': 0.13}, # From GSS 2018 https://gssdataexplorer.norc.org/variables/364/vshow (Region=South)
             'gss_spanking': {'Agree': 0.677, 'Disagree': 0.323},  # From GSS 2018 https://gssdataexplorer.norc.org/trends/Gender%20&%20Marriage?measure=spanking
             'social_fb': {'No': 0.31, 'Yes': 0.69}} # https://www.pewresearch.org/internet/fact-sheet/social-media/

# https://www.foxnews.com/elections/2020/general-results/voter-analysis?race=S&state=GA
FOX_NEWS_WEIGHTS = {'survey_method': {'Online': 0.288, 'IVR': 0.712},
                    'gender': {'Female': 0.524,
                               'Male': 0.47,
                               'Other': 0.006},
                    'race': {'White, not Hispanic': 0.63,
                             'Black, non-Hispanic': 0.29,
                             'Hispanic': 0.03,
                             'Other': 0.05},
                    'education': {'Completed graduate school': 0.15,
                                  'Graduated from college': 0.25,
                                  'Some college, no degree': 0.33,
                                  'Graduated from high school': 0.17,
                                  'Less than high school': 0.1},
                    'income': {'Under $15,000': 0.1376,
                               'Between $15,000 and $49,999': 0.3524,
                               'Between $50,000 and $74,999': 0.1801,
                               'Between $75,000 and $99,999': 0.116,
                               'Between $100,000 and $150,000': 0.1207,
                               'Over $150,000': 0.0932},
                    'age': {'18-34': 0.278,
                            '35-54': 0.359,
                            '55-64': 0.167,
                            '65 or older': 0.196},
                    'vote2016': {'Donald Trump': 0.504,
                                 'Hillary Clinton': 0.453,
                                 'Other': 0.043},
                    'vote2020': {'Joe Biden': 0.495,
                                 'Donald Trump': 0.493,
                                 'Other': 0.012},
                    'loc_county': {'Fulton County, GA': 0.1,
                                   'Cobb County, GA': 0.072,
                                   'Gwinnett County, GA': 0.088,
                                   'DeKalb County, GA': 0.0715,
                                   'Another county in Georgia': 0.6685},
                    'gss_trust': {'Can trust': 0.331,
                                  'Can\'t be too careful': 0.669 }, # From GSS 2018 https://gssdataexplorer.norc.org/variables/441/vshow
                    'gss_bible': {'Word of God': 0.41,
                                  'Inspired word': 0.46,
                                  'Book of fables': 0.13}, # From GSS 2018 https://gssdataexplorer.norc.org/variables/364/vshow (Region=South)
                    'gss_spanking': {'Agree': 0.677, 'Disagree': 0.323},  # From GSS 2018 https://gssdataexplorer.norc.org/trends/Gender%20&%20Marriage?measure=spanking
                    'social_fb': {'No': 0.31, 'Yes': 0.69}} # https://www.pewresearch.org/internet/fact-sheet/social-media/

DEFAULT_WEIGHTS = ['loc_county', 'gender', 'race', 'education', 'income', 'age', 'vote2016', 'vote2020']
EXPERIMENTAL_WEIGHTS = DEFAULT_WEIGHTS + ['gss_trust', 'gss_bible', 'gss_spanking', 'social_fb']
    
weigh_on = DEFAULT_WEIGHTS  # can switch to use EXPIERMENTAL_WEIGHTS if desired
weight_to = GA_CENSUS       # can switch to use FOX_NEWS_WEIGHTS if desired
run_weighting_iteration(survey, weigh_on=weigh_on, census=weight_to)


## loc_county ##
Another county in Georgia    0.645715
Cobb County, GA              0.605109
DeKalb County, GA            2.130488
Fulton County, GA            0.678139
Gwinnett County, GA          0.940550
dtype: float64
ERROR: 0.35324570374958486
-
-
## gender ##
Female               1.075522
Male                 1.225079
Other                0.563080
Prefer not to say    1.136319
dtype: float64
ERROR: 0.1468926532157499
-
-
## race ##
Black, non-Hispanic    0.710423
Hispanic               2.623421
Other                  0.306188
White, not Hispanic    0.359968
dtype: float64
ERROR: 0.5862569586062975
-
-
## education ##
Completed graduate school     0.341333
Graduated from college        0.392915
Graduated from high school    1.156902
Less than high school         2.484733
Some college, no degree       0.624117
dtype: float64
ERROR: 0.5015363665753937
-
-
## income ##
Between $100,000 and $150,000    1.275756
Between $15,000 and $49,999      0.880656
Between $50,000 and $74,999     

{'errors': [0.35324570374958486,
  0.1468926532157499,
  0.5862569586062975,
  0.5015363665753937,
  0.1744838528302456,
  0.4720599263156817,
  0.06549888918025813,
  0.159555328306894],
 'error_table': {'race': 1.6234208649704267,
  'education': 1.484732937472617,
  'loc_county': 1.1304875964632113,
  'age': 0.6536341994498072,
  'gender': 0.4369202697281025,
  'vote2020': 0.40707647991582774,
  'income': 0.35034967358962765,
  'vote2016': 0.1536527568970506},
 'weights': {'loc_county': Another county in Georgia    0.645715
  Cobb County, GA              0.605109
  DeKalb County, GA            2.130488
  Fulton County, GA            0.678139
  Gwinnett County, GA          0.940550
  dtype: float64,
  'gender': Female               1.075522
  Male                 1.225079
  Other                0.563080
  Prefer not to say    1.136319
  dtype: float64,
  'race': Black, non-Hispanic    0.710423
  Hispanic               2.623421
  Other                  0.306188
  White, not Hispanic   

In [226]:
 output = run_weighting_scheme(survey, iters=100, weigh_on=weigh_on, census=weight_to, verbose=1, early_terminate=False)

ITER 1/1 - initialization - ERROR 100
ITER 2/100 - weight loc_county - ERROR 2.4595296787801053
ITER 3/100 - weight gender - ERROR 2.101820123627781
ITER 4/100 - weight race - ERROR 1.9861293198563104
ITER 5/100 - weight education - ERROR 1.8710287109223458
ITER 6/100 - weight income - ERROR 1.7766716016204525
ITER 7/100 - weight age - ERROR 1.434701831232264
ITER 8/100 - weight vote2016 - ERROR 1.3364642463420258
ITER 9/100 - weight vote2020 - ERROR 1.034698115189538
ITER 10/100 - weight education - ERROR 1.0588916007918199
ITER 11/100 - weight gender - ERROR 0.6883149962780006
ITER 12/100 - weight race - ERROR 0.41928512936981843
ITER 13/100 - weight income - ERROR 0.34824318510490926
ITER 14/100 - weight vote2020 - ERROR 0.377978490932975
ITER 15/100 - weight vote2016 - ERROR 0.3275131632030449
ITER 16/100 - weight gender - ERROR 0.29261780633132206
ITER 17/100 - weight education - ERROR 0.3252931879377654
ITER 18/100 - weight vote2020 - ERROR 0.36733405253906476
ITER 19/100 - weigh

In [227]:
survey = output['final_df']
_ = run_weighting_iteration(survey, weigh_on=weigh_on, census=weight_to)

## loc_county ##
Another county in Georgia    0.999771
Cobb County, GA              0.999870
DeKalb County, GA            1.000622
Fulton County, GA            1.000190
Gwinnett County, GA          0.999547
dtype: float64
ERROR: 0.00024094966890593933
-
-
## gender ##
Female               0.999834
Male                 0.999710
Other                0.999931
Prefer not to say    1.000526
dtype: float64
ERROR: 0.00022088453690216354
-
-
## race ##
Black, non-Hispanic    0.999965
Hispanic               1.000136
Other                  1.000240
White, not Hispanic    0.999660
dtype: float64
ERROR: 0.0002622304551906532
-
-
## education ##
Completed graduate school     0.999718
Graduated from college        0.999780
Graduated from high school    1.000228
Less than high school         1.000313
Some college, no degree       0.999960
dtype: float64
ERROR: 0.0001811596664886946
-
-
## income ##
Between $100,000 and $150,000    1.000036
Between $15,000 and $49,999      0.999758
Between $50,000 and

In [228]:
survey['lv_index'] = 0
survey.loc[(survey['lv_likely'] == 'Very likely'), 'lv_index'] += 1
survey.loc[(survey['lv_likely'] == 'Already voted'), 'lv_index'] += 1
survey.loc[(survey['lv_likely'] == 'Likely'), 'lv_index'] += 0.7
survey.loc[(survey['lv_likely'] == 'Somewhat likely'), 'lv_index'] += 0.4
survey.loc[(survey['lv_likely'] == 'Neither likely nor unlikely'), 'lv_index'] += 0.2
survey.loc[(survey['lv_likely'] == 'Somewhat unlikely'), 'lv_index'] += 0.1
survey.loc[(survey['lv_likely'] == 'Unlikely'), 'lv_index'] += 0.05

survey.loc[(survey['vote2020'] != 'Did not vote'), 'lv_index'] += 0.5

max_pts = 1.5

survey['lv_index'].value_counts()


1.5    1189
0.5     148
1.2      25
0.9      19
1.0      16
0.0       5
0.4       3
Name: lv_index, dtype: int64

In [229]:
# https://www.pewresearch.org/methods/2016/01/07/measuring-the-likelihood-to-vote/
perry_gallup_loadings = {7: 0.83, 6: 0.63, 5: 0.59, 4: 0.4, 3: 0.34, 2: 0.23, 1: 0.13, 0: 0.11}
survey['lv_index'] = survey['lv_index'].apply(lambda l: perry_gallup_loadings[min(7, int(np.round(l * 7/max_pts)))])
survey['lv_weight'] = normalize_weights(survey['weight'] * survey['lv_index'])
survey['lv_index'].value_counts()

0.83    1189
0.23     151
0.63      25
0.40      19
0.59      16
0.11       5
Name: lv_index, dtype: int64

In [230]:
survey.to_csv('responses_processed_with_default_weights.csv', index=False)

In [231]:
print('## 2016 VOTE - DEMOGRAPHIC WEIGHTS ##')
options = ['Hillary Clinton', 'Donald Trump', 'Other', 'Did not vote']
survey_ = survey.loc[survey['vote2016'].isin(options)].copy()
survey_['weight'] = normalize_weights(survey_['weight'])
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote2016'].value_counts(normalize=True) * survey_.groupby('vote2016')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Hillary Clinton'],
                   gop_vote=votes['Donald Trump'],
                   n=weighted_n)
data['dem_name'] = 'Clinton'
print_result(**data)

## 2016 VOTE - DEMOGRAPHIC WEIGHTS ##
Hillary Clinton    39.506817
Donald Trump       44.005986
Other               3.752234
Did not vote       12.734964
dtype: float64
Result Clinton -4.5 (80% CI: -12.5 to 3.5) (Weighted N=743) (raw_moe=4.6pts, margin=8.6pts, sigma=6.7pts) (Clinton 31.9% likely to win)
-


In [232]:
print('## 2016 VOTE - DEMOGRAPHIC WEIGHTS + 2020 RUNOFF LIKELY VOTERS ##')
votes = survey_['vote2016'].value_counts(normalize=True) * survey_.groupby('vote2016')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Hillary Clinton'],
                   gop_vote=votes['Donald Trump'],
                   n=lv_weighted_n)
data['dem_name'] = 'Clinton'
print_result(**data)

## 2016 VOTE - DEMOGRAPHIC WEIGHTS + 2020 RUNOFF LIKELY VOTERS ##
Hillary Clinton    38.706985
Donald Trump       47.323214
Other               3.769950
Did not vote       10.199851
dtype: float64
Result Clinton -8.6 (80% CI: -16.2 to -1.1) (Weighted N=725) (raw_moe=4.7pts, margin=8.1pts, sigma=6.4pts) (Clinton 16.7% likely to win)
-


In [233]:
print('## 2020 PREZ VOTE - DEMOGRAPHIC WEIGHTS ##')
options = ['Joe Biden', 'Donald Trump', 'Other', 'Did not vote']
survey_ = survey.loc[survey['vote2020'].isin(options)].copy()
survey_['weight'] = normalize_weights(survey_['weight'])
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote2020'].value_counts(normalize=True) * survey_.groupby('vote2020')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Joe Biden'],
                   gop_vote=votes['Donald Trump'],
                   n=weighted_n)
data['dem_name'] = 'Biden'
print_result(**data)

## 2020 PREZ VOTE - DEMOGRAPHIC WEIGHTS ##
Joe Biden       48.654448
Donald Trump    48.457865
Other            1.179502
Did not vote     1.708185
dtype: float64
Result Biden +0.2 (80% CI: -5.3 to 5.7) (Weighted N=743) (raw_moe=4.7pts, margin=5.9pts, sigma=4.6pts) (Biden 51.3% likely to win)
-


In [234]:
print('## 2020 PREZ VOTE - DEMOGRAPHIC WEIGHTS + 2020 RUNOFF LIKELY VOTERS ##')
votes = survey_['vote2020'].value_counts(normalize=True) * survey_.groupby('vote2020')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Joe Biden'],
                   gop_vote=votes['Donald Trump'],
                   n=lv_weighted_n)
data['dem_name'] = 'Biden'
print_result(**data)

## 2020 PREZ VOTE - DEMOGRAPHIC WEIGHTS + 2020 RUNOFF LIKELY VOTERS ##
Joe Biden       45.821788
Donald Trump    52.166336
Other            1.125207
Did not vote     0.886670
dtype: float64
Result Biden -6.3 (80% CI: -11.8 to -1.0) (Weighted N=725) (raw_moe=4.7pts, margin=5.8pts, sigma=4.5pts) (Biden 15.9% likely to win)
-


In [235]:
print('## OSSOFF vs. PERDUE - DEMOGRAPHIC WEIGHTS ##')
options = ['Jon Ossoff', 'David Perdue', 'Undecided']
survey_ = survey.loc[survey['vote_ossoff_perdue'].isin(options)].copy()
survey_['weight'] = normalize_weights(survey_['weight'])
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_ossoff_perdue'].value_counts(normalize=True) * survey_.groupby('vote_ossoff_perdue')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Jon Ossoff'],
                   gop_vote=votes['David Perdue'],
                   n=weighted_n)
data['dem_name'] = 'Ossoff'
print_result(**data)

## OSSOFF vs. PERDUE - DEMOGRAPHIC WEIGHTS ##
Jon Ossoff      48.874874
David Perdue    46.196310
Undecided        4.928816
dtype: float64
Result Ossoff +2.7 (80% CI: -3.2 to 8.6) (Weighted N=743) (raw_moe=4.7pts, margin=6.3pts, sigma=4.9pts) (Ossoff 65.2% likely to win)
-


In [236]:
print('## OSSOFF vs. PERDUE - DEMOGRAPHIC WEIGHTS + 2020 RUNOFF LIKELY VOTERS ##')
votes = survey_['vote_ossoff_perdue'].value_counts(normalize=True) * survey_.groupby('vote_ossoff_perdue')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Jon Ossoff'],
                   gop_vote=votes['David Perdue'],
                   n=lv_weighted_n)
data['dem_name'] = 'Ossoff'
print_result(**data)

## OSSOFF vs. PERDUE - DEMOGRAPHIC WEIGHTS + 2020 RUNOFF LIKELY VOTERS ##
Jon Ossoff      45.562568
David Perdue    49.511783
Undecided        4.925649
dtype: float64
Result Ossoff -3.9 (80% CI: -9.9 to 2.0) (Weighted N=725) (raw_moe=4.7pts, margin=6.4pts, sigma=5.0pts) (Ossoff 28.7% likely to win)
-


In [237]:
print('## WARNOCK vs. LOEFFLER - DEMOGRAPHIC WEIGHTS ##')
options = ['Raphael Warnock', 'Kelly Loeffler', 'Undecided']
survey_ = survey.loc[survey['vote_warnock_loeffler'].isin(options)].copy()
survey_['weight'] = normalize_weights(survey_['weight'])
survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
weighted_n = int(np.round(survey_['weight'].apply(lambda w: 1 if w > 1 else w).sum()))
lv_weighted_n = int(np.round(survey_['lv_weight'].apply(lambda w: 1 if w > 1 else w).sum()))
votes = survey_['vote_warnock_loeffler'].value_counts(normalize=True) * survey_.groupby('vote_warnock_loeffler')['weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Raphael Warnock'],
                   gop_vote=votes['Kelly Loeffler'],
                   n=weighted_n)
data['dem_name'] = 'Warnock'
print_result(**data)

## WARNOCK vs. LOEFFLER - DEMOGRAPHIC WEIGHTS ##
Raphael Warnock    49.406443
Kelly Loeffler     46.914578
Undecided           3.678979
dtype: float64
Result Warnock +2.5 (80% CI: -3.1 to 8.2) (Weighted N=743) (raw_moe=4.7pts, margin=6.1pts, sigma=4.7pts) (Warnock 64.5% likely to win)
-


In [238]:
print('## WARNOCK vs. LOEFFLER - DEMOGRAPHIC WEIGHTS + 2020 LIKELY VOTER ##')
votes = survey_['vote_warnock_loeffler'].value_counts(normalize=True) * survey_.groupby('vote_warnock_loeffler')['lv_weight'].mean() * 100
votes = votes[options] * (100 / votes[options].sum())
print(votes)
data = calc_result(dem_vote=votes['Raphael Warnock'],
                   gop_vote=votes['Kelly Loeffler'],
                   n=lv_weighted_n)
data['dem_name'] = 'Warnock'
print_result(**data)

## WARNOCK vs. LOEFFLER - DEMOGRAPHIC WEIGHTS + 2020 LIKELY VOTER ##
Raphael Warnock    46.200395
Kelly Loeffler     50.342543
Undecided           3.457061
dtype: float64
Result Warnock -4.1 (80% CI: -9.7 to 1.5) (Weighted N=725) (raw_moe=4.7pts, margin=6.1pts, sigma=4.7pts) (Warnock 27.1% likely to win)
-
