In [114]:
import pandas as pd

from mlgear.utils import display_columns
from surveyweights import normalize_weights

In [115]:
survey = pd.read_csv('responses_processed_with_default_weights.csv')

## Raw Demographics

In [116]:
demographics = ['loc_county', 'gender', 'race', 'education', 'income', 'age', 'vote2016', 'vote2020']
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    72.455516
Fulton County, GA            10.320285
Cobb County, GA               8.327402
Gwinnett County, GA           6.548043
DeKalb County, GA             2.348754
Name: loc_county, dtype: float64
-
-
## gender ##
Female               53.950178
Male                 44.768683
Other                 1.209964
Prefer not to say     0.071174
Name: gender, dtype: float64
-
-
## race ##
White, not Hispanic    68.113879
Black, non-Hispanic    21.637011
Other                   8.469751
Hispanic                1.779359
Name: race, dtype: float64
-
-
## education ##
Graduated from college        30.960854
Some college, no degree       30.249110
Completed graduate school     17.722420
Graduated from high school    17.295374
Less than high school          3.772242
Name: education, dtype: float64
-
-
## income ##
Between $15,000 and $49,999      36.725979
Between $50,000 and $74,999      19.928826
Under $15,000                    16.441281
Between $75,0

## Demographics after weighting

In [117]:
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * survey.groupby(var)['weight'].mean() * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    66.856037
Cobb County, GA               7.199936
DeKalb County, GA             7.144560
Fulton County, GA             9.996708
Gwinnett County, GA           8.802759
dtype: float64
-
-
## gender ##
Female               51.060622
Male                 48.268777
Other                 0.599480
Prefer not to say     0.071121
dtype: float64
-
-
## race ##
Black, non-Hispanic    32.595866
Hispanic                9.897053
Other                   5.497792
White, not Hispanic    52.009290
dtype: float64
-
-
## education ##
Completed graduate school      9.102887
Graduated from college        18.304673
Graduated from high school    30.094212
Less than high school         14.096090
Some college, no degree       28.402138
dtype: float64
-
-
## income ##
Between $100,000 and $150,000    12.068808
Between $15,000 and $49,999      35.246324
Between $50,000 and $74,999      18.006331
Between $75,000 and $99,999      11.599689
Over $150,000                  

## Expected Vote Demographics (Demographics after Weighting + Likely Voter Model)

In [118]:
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * survey.groupby(var)['lv_weight'].mean() * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    69.893732
Cobb County, GA               6.830933
DeKalb County, GA             5.205877
Fulton County, GA             9.341776
Gwinnett County, GA           8.727682
dtype: float64
-
-
## gender ##
Female               50.082035
Male                 49.305544
Other                 0.552167
Prefer not to say     0.060254
dtype: float64
-
-
## race ##
Black, non-Hispanic    32.572761
Hispanic                9.147083
Other                   5.312267
White, not Hispanic    52.967888
dtype: float64
-
-
## education ##
Completed graduate school      8.956373
Graduated from college        17.569730
Graduated from high school    30.169418
Less than high school         15.581855
Some college, no degree       27.722624
dtype: float64
-
-
## income ##
Between $100,000 and $150,000    12.359966
Between $15,000 and $49,999      34.366229
Between $50,000 and $74,999      17.516048
Between $75,000 and $99,999      12.149610
Over $150,000                  

## Demographic by Vote

In [None]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('-')
        print('---- vote choice for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print('------ "{}" 2016 vote'.format(option))
        print(survey_['vote2016'].value_counts(normalize=True) * survey_.groupby('vote2016')['lv_weight'].mean() * 100)
        print('------ "{}" 2020 vote'.format(option))
        print(survey_['vote2020'].value_counts(normalize=True) * survey_.groupby('vote2020')['lv_weight'].mean() * 100)
        print('------ "{}" Ossoff-Perdue vote'.format(option))
        print(survey_['vote_ossoff_perdue'].value_counts(normalize=True) * survey_.groupby('vote_ossoff_perdue')['lv_weight'].mean() * 100)
        print('------ "{}" Warnock-Loeffler vote'.format(option))
        print(survey_['vote_warnock_loeffler'].value_counts(normalize=True) * survey_.groupby('vote_warnock_loeffler')['lv_weight'].mean() * 100)
    print('-')
    print('-')
    print('-')
    print('-')

## loc_county ##
-
---- vote choice for "Another county in Georgia"
------ "Another county in Georgia" 2016 vote
Did not vote        7.544310
Donald Trump       56.708254
Hillary Clinton    32.278083
Other               3.469352
dtype: float64
------ "Another county in Georgia" 2020 vote
Did not vote     1.015949
Donald Trump    59.899854
Joe Biden       37.602152
Other            1.482045
dtype: float64
------ "Another county in Georgia" Ossoff-Perdue vote
David Perdue    56.262463
Jon Ossoff      37.718859
Undecided        6.018678
dtype: float64
------ "Another county in Georgia" Warnock-Loeffler vote
Kelly Loeffler     57.772963
Raphael Warnock    38.354989
Undecided           3.872048
dtype: float64
-
---- vote choice for "Cobb County, GA"
------ "Cobb County, GA" 2016 vote
Did not vote        8.800234
Donald Trump       40.636977
Hillary Clinton    40.294460
Other              10.268329
dtype: float64
------ "Cobb County, GA" 2020 vote
Did not vote     1.167102
Donald Trump    51

## Vote by Demographic

In [None]:
for vote in ['vote_ossoff_perdue', 'vote_warnock_loeffler']:
    for choice in survey[vote].unique():
        for var in demographics:
            print('{} = {}, broken down by {}'.format(vote, choice, var))
            survey_ = survey[survey[vote] == choice]
            survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
            print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['lv_weight'].mean() * 100)
            print('-')
        print('-')
        print('-')

## Demographic by Likely Voter

In [None]:
demographics += ['vote_warnock_loeffler', 'vote_ossoff_perdue']
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- likely voter breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['lv_likely'].value_counts(normalize=True) * survey_.groupby('lv_likely')['weight'].mean() * 100)
        print('-')

## Likely Voter by Demographic

In [None]:
for lvx in survey['lv_likely'].unique():
    if isinstance(lvx, str):
        for var in demographics:
            print('lv_likely = {}, broken down by {}'.format(lvx, var))
            survey_ = survey[survey['lv_likely'] == lvx]
            survey_['weight'] = normalize_weights(survey_['weight'])
            print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
            print('-')

## Demographic by Likely Voter Score

In [None]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- likely voter score for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['lv_index'].value_counts(normalize=True) * survey_.groupby('lv_index')['weight'].mean() * 100)
        print('-')

## Likely Voter Score by Demographic

In [None]:
for lvx in sorted(survey['lv_index'].unique())[::-1]:
    for var in demographics:
        print('lv_index = {}, broken down by {}'.format(lvx, var))
        survey_ = survey[survey['lv_index'] == lvx]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')

## Demographic by Enthusiasm

In [None]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- enthusiasm breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['enthusiasm'].value_counts(normalize=True) * survey_.groupby('enthusiasm')['weight'].mean() * 100)
        print('-')

## Enthusiasm by Demographic

In [None]:
for enth in sorted(survey['enthusiasm'].unique())[::-1]:
    for var in demographics:
        print('enthusiasm = {}, broken down by {}'.format(enth, var))
        survey_ = survey[survey['enthusiasm'] == enth]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')

## Demographic by Vote Method

In [None]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- vote method breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print(survey_['vote_method'].value_counts(normalize=True) * survey_.groupby('vote_method')['lv_weight'].mean() * 100)
        print('-')

## Vote Method by Demographic

In [None]:
for method in sorted(survey['vote_method'].unique())[::-1]:
    for var in demographics:
        print('vote method = "{}", broken down by {}'.format(method, var))
        survey_ = survey[survey['vote_method'] == method]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')