In [144]:
import pandas as pd

from mlgear.utils import display_columns
from surveyweights import normalize_weights

In [145]:
survey = pd.read_csv('responses_processed_with_default_weights.csv')

## Raw Demographics

In [146]:
demographics = ['loc_county', 'gender', 'race', 'education', 'income', 'age', 'vote2016', 'vote2020']
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    72.455516
Fulton County, GA            10.320285
Cobb County, GA               8.327402
Gwinnett County, GA           6.548043
DeKalb County, GA             2.348754
Name: loc_county, dtype: float64
-
-
## gender ##
Female               53.950178
Male                 44.768683
Other                 1.209964
Prefer not to say     0.071174
Name: gender, dtype: float64
-
-
## race ##
White, not Hispanic    68.113879
Black, non-Hispanic    21.637011
Other                   8.469751
Hispanic                1.779359
Name: race, dtype: float64
-
-
## education ##
Graduated from college        30.960854
Some college, no degree       30.249110
Completed graduate school     17.722420
Graduated from high school    17.295374
Less than high school          3.772242
Name: education, dtype: float64
-
-
## income ##
Between $15,000 and $49,999      36.725979
Between $50,000 and $74,999      19.928826
Under $15,000                    16.441281
Between $75,0

## Demographics after weighting

In [147]:
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * survey.groupby(var)['weight'].mean() * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    66.836368
Cobb County, GA               7.209118
DeKalb County, GA             7.138179
Fulton County, GA             9.997984
Gwinnett County, GA           8.818350
dtype: float64
-
-
## gender ##
Female               51.056062
Male                 48.274189
Other                 0.598637
Prefer not to say     0.071112
dtype: float64
-
-
## race ##
Black, non-Hispanic    32.575141
Hispanic                9.897447
Other                   5.487773
White, not Hispanic    52.039639
dtype: float64
-
-
## education ##
Completed graduate school      9.119156
Graduated from college        18.333012
Graduated from high school    30.075030
Less than high school         14.067680
Some college, no degree       28.405122
dtype: float64
-
-
## income ##
Between $100,000 and $150,000    12.076122
Between $15,000 and $49,999      35.234993
Between $50,000 and $74,999      18.003221
Between $75,000 and $99,999      11.610271
Over $150,000                  

## Expected Vote Demographics (Demographics after Weighting + Likely Voter Model)

In [148]:
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * survey.groupby(var)['lv_weight'].mean() * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    68.822592
Cobb County, GA               6.997357
DeKalb County, GA             5.760525
Fulton County, GA             9.646536
Gwinnett County, GA           8.772989
dtype: float64
-
-
## gender ##
Female               50.364356
Male                 49.141186
Other                 0.436245
Prefer not to say     0.058213
dtype: float64
-
-
## race ##
Black, non-Hispanic    32.583761
Hispanic                9.259468
Other                   5.350309
White, not Hispanic    52.806462
dtype: float64
-
-
## education ##
Completed graduate school      9.112139
Graduated from college        17.868721
Graduated from high school    29.946849
Less than high school         15.095818
Some college, no degree       27.976473
dtype: float64
-
-
## income ##
Between $100,000 and $150,000    12.362446
Between $15,000 and $49,999      34.353384
Between $50,000 and $74,999      17.779579
Between $75,000 and $99,999      12.025944
Over $150,000                  

## Demographic by Vote

In [149]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('-')
        print('---- vote choice for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print('------ "{}" 2016 vote'.format(option))
        print(survey_['vote2016'].value_counts(normalize=True) * survey_.groupby('vote2016')['lv_weight'].mean() * 100)
        print('------ "{}" 2020 vote'.format(option))
        print(survey_['vote2020'].value_counts(normalize=True) * survey_.groupby('vote2020')['lv_weight'].mean() * 100)
        print('------ "{}" Ossoff-Perdue vote'.format(option))
        print(survey_['vote_ossoff_perdue'].value_counts(normalize=True) * survey_.groupby('vote_ossoff_perdue')['lv_weight'].mean() * 100)
        print('------ "{}" Warnock-Loeffler vote'.format(option))
        print(survey_['vote_warnock_loeffler'].value_counts(normalize=True) * survey_.groupby('vote_warnock_loeffler')['lv_weight'].mean() * 100)
    print('-')
    print('-')
    print('-')
    print('-')

## loc_county ##
-
---- vote choice for "Another county in Georgia"
------ "Another county in Georgia" 2016 vote
Did not vote        7.750357
Donald Trump       56.413235
Hillary Clinton    32.334045
Other               3.502364
dtype: float64
------ "Another county in Georgia" 2020 vote
Did not vote     0.717223
Donald Trump    59.393290
Joe Biden       38.376633
Other            1.512854
dtype: float64
------ "Another county in Georgia" Ossoff-Perdue vote
David Perdue    55.606177
Jon Ossoff      38.295047
Undecided        6.098776
dtype: float64
------ "Another county in Georgia" Warnock-Loeffler vote
Kelly Loeffler     57.114737
Raphael Warnock    39.066555
Undecided           3.818709
dtype: float64
-
---- vote choice for "Cobb County, GA"
------ "Cobb County, GA" 2016 vote
Did not vote       10.401862
Donald Trump       39.157145
Hillary Clinton    40.232280
Other              10.208713
dtype: float64
------ "Cobb County, GA" 2020 vote
Did not vote     0.824986
Donald Trump    49

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])



------ "Graduated from high school" Warnock-Loeffler vote
Kelly Loeffler     54.175218
Raphael Warnock    43.421703
Undecided           2.403078
dtype: float64
-
---- vote choice for "Graduated from college"
------ "Graduated from college" 2016 vote
Did not vote       11.790614
Donald Trump       31.560659
Hillary Clinton    51.692331
Other               4.956396
dtype: float64
------ "Graduated from college" 2020 vote
Did not vote     0.718612
Donald Trump    45.546812
Joe Biden       53.128643
Other            0.605932
dtype: float64
------ "Graduated from college" Ossoff-Perdue vote
David Perdue    43.790355
Jon Ossoff      53.791472
Undecided        2.418173
dtype: float64
------ "Graduated from college" Warnock-Loeffler vote
Kelly Loeffler     44.502756
Raphael Warnock    53.749824
Undecided           1.747420
dtype: float64
-
---- vote choice for "Some college, no degree"
------ "Some college, no degree" 2016 vote
Did not vote       15.237980
Donald Trump       46.695165
Hillary

dtype: float64
-
---- vote choice for "Donald Trump"
------ "Donald Trump" 2016 vote
Donald Trump    100.0
dtype: float64
------ "Donald Trump" 2020 vote
Did not vote     0.106697
Donald Trump    96.558530
Joe Biden        3.289246
Other            0.045527
dtype: float64
------ "Donald Trump" Ossoff-Perdue vote
David Perdue    90.957085
Jon Ossoff       5.042123
Undecided        4.000792
dtype: float64
------ "Donald Trump" Warnock-Loeffler vote
Kelly Loeffler     91.951192
Raphael Warnock     6.255275
Undecided           1.793533
dtype: float64
-
---- vote choice for "Other"
------ "Other" 2016 vote
Other    100.0
dtype: float64
------ "Other" 2020 vote
Did not vote     2.602827
Donald Trump    43.461970
Joe Biden       33.604492
Other           20.330712
dtype: float64
------ "Other" Ossoff-Perdue vote
David Perdue    36.438589
Jon Ossoff      31.541284
Undecided       32.020127
dtype: float64
------ "Other" Warnock-Loeffler vote
Kelly Loeffler     34.707898
Raphael Warnock    32.86

## Vote by Demographic

In [150]:
for vote in ['vote_ossoff_perdue', 'vote_warnock_loeffler']:
    for choice in survey[vote].unique():
        for var in demographics:
            print('{} = {}, broken down by {}'.format(vote, choice, var))
            survey_ = survey[survey[vote] == choice]
            survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
            print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['lv_weight'].mean() * 100)
            print('-')
        print('-')
        print('-')

vote_ossoff_perdue = Jon Ossoff, broken down by loc_county
Another county in Georgia    56.560060
Cobb County, GA               7.346536
DeKalb County, GA            10.756736
Fulton County, GA            14.631887
Gwinnett County, GA          10.704781
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by gender
Female               54.168026
Male                 45.349363
Other                 0.357684
Prefer not to say     0.124927
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by race
Black, non-Hispanic    57.191195
Hispanic                8.358398
Other                   6.810838
White, not Hispanic    27.639570
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by education
Completed graduate school     11.541432
Graduated from college        20.627336
Graduated from high school    25.253007
Less than high school         13.597336
Some college, no degree       28.980889
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by income
B

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])



-
vote_warnock_loeffler = Kelly Loeffler, broken down by vote2016
Did not vote        4.561303
Donald Trump       86.635370
Hillary Clinton     6.174854
Other               2.628472
dtype: float64
-
vote_warnock_loeffler = Kelly Loeffler, broken down by vote2020
Did not vote     0.735547
Donald Trump    96.096937
Joe Biden        3.055934
Other            0.111582
dtype: float64
-
-
-


## Demographic by Likely Voter

In [151]:
demographics += ['vote_warnock_loeffler', 'vote_ossoff_perdue']
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- likely voter breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['lv_likely'].value_counts(normalize=True) * survey_.groupby('lv_likely')['weight'].mean() * 100)
        print('-')

## loc_county ##
---- likely voter breakdown for "Another county in Georgia"
Already voted      59.150365
Likely              1.621199
Somewhat likely     1.168048
Very likely        40.716662
dtype: float64
-
---- likely voter breakdown for "Cobb County, GA"
Already voted      56.969284
Likely              0.534410
Somewhat likely     1.241100
Very likely        37.094769
dtype: float64
-
---- likely voter breakdown for "DeKalb County, GA"
Likely              0.686027
Somewhat likely     2.546646
Very likely        90.341492
dtype: float64
-
---- likely voter breakdown for "Fulton County, GA"
Already voted      41.808485
Likely              0.563972
Somewhat likely     0.577118
Very likely        58.275725
dtype: float64
-
---- likely voter breakdown for "Gwinnett County, GA"
Already voted      42.523079
Likely              1.955894
Somewhat likely     0.664384
Very likely        54.491970
dtype: float64
-
## gender ##
---- likely voter breakdown for "Female"
Already voted      47.390

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


## Likely Voter by Demographic

In [152]:
for lvx in survey['lv_likely'].unique():
    if isinstance(lvx, str):
        for var in demographics:
            print('lv_likely = {}, broken down by {}'.format(lvx, var))
            survey_ = survey[survey['lv_likely'] == lvx]
            survey_['weight'] = normalize_weights(survey_['weight'])
            print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
            print('-')

lv_likely = Very likely, broken down by loc_county
Another county in Georgia    62.765224
Cobb County, GA               5.848104
DeKalb County, GA             9.313659
Fulton County, GA            11.591402
Gwinnett County, GA          10.481611
dtype: float64
-
lv_likely = Very likely, broken down by gender
Female    54.553268
Male      45.194839
Other      0.251894
dtype: float64
-
lv_likely = Very likely, broken down by race
Black, non-Hispanic    29.602903
Hispanic                7.997968
Other                   5.573242
White, not Hispanic    56.825887
dtype: float64
-
lv_likely = Very likely, broken down by education


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


Completed graduate school      5.590623
Graduated from college        20.620116
Graduated from high school    29.604378
Less than high school          9.043805
Some college, no degree       35.141078
dtype: float64
-
lv_likely = Very likely, broken down by income
Between $100,000 and $150,000    13.137030
Between $15,000 and $49,999      33.787391
Between $50,000 and $74,999      17.575521
Between $75,000 and $99,999      14.621868
Over $150,000                    10.500993
Under $15,000                    10.377197
dtype: float64
-
lv_likely = Very likely, broken down by age
18-34          27.185037
35-54          48.142626
55-64          12.755983
65 or older    11.916354
dtype: float64
-
lv_likely = Very likely, broken down by vote2016
Did not vote       13.818683
Donald Trump       54.110018
Hillary Clinton    30.134085
Other               1.937214
dtype: float64
-
lv_likely = Very likely, broken down by vote2020
Did not vote     1.360128
Donald Trump    60.117670
Joe Biden       3

## Demographic by Likely Voter Score

In [153]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- likely voter score for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['lv_index'].value_counts(normalize=True) * survey_.groupby('lv_index')['weight'].mean() * 100)
        print('-')

## loc_county ##
---- likely voter score for "Another county in Georgia"
0.11     0.769977
0.13     0.126850
0.40     6.010693
0.59     0.947111
0.63     1.490611
0.83    90.654757
dtype: float64
-
---- likely voter score for "Cobb County, GA"
0.13     0.665447
0.40    17.771933
0.59     0.416537
0.63     0.465896
0.83    80.680187
dtype: float64
-
---- likely voter score for "DeKalb County, GA"
0.40    47.587074
0.59     1.466251
0.63     0.394985
0.83    50.551690
dtype: float64
-
---- likely voter score for "Fulton County, GA"
0.40    19.874984
0.59     0.457714
0.63     0.447288
0.83    79.220014
dtype: float64
-
---- likely voter score for "Gwinnett County, GA"


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


0.11     0.732022
0.40    12.628564
0.59     0.577725
0.63     1.700778
0.83    84.360912
dtype: float64
-
## gender ##
---- likely voter score for "Female"
0.11     1.134394
0.13     0.217858
0.40    13.664324
0.59     0.435074
0.63     0.667893
0.83    83.880456
dtype: float64
-
---- likely voter score for "Male"
0.13     0.044590
0.40     9.216519
0.59     1.330490
0.63     1.722268
0.83    87.686133
dtype: float64
-
---- likely voter score for "Other"
0.40    61.880910
0.63     1.541738
0.83    36.577351
dtype: float64
-
---- likely voter score for "Prefer not to say"
0.63    100.0
dtype: float64
-
## race ##
---- likely voter score for "White, not Hispanic"
0.11     0.815908
0.13     0.133549
0.40     8.389525
0.59     1.306109
0.63     1.521718
0.83    87.833191
dtype: float64
-
---- likely voter score for "Black, non-Hispanic"
0.11     0.474538
0.13     0.194187
0.40    12.637090
0.59     0.092527
0.63     0.430768
0.83    86.170890
dtype: float64
-
---- likely voter score for "

## Likely Voter Score by Demographic

In [154]:
for lvx in sorted(survey['lv_index'].unique())[::-1]:
    for var in demographics:
        print('lv_index = {}, broken down by {}'.format(lvx, var))
        survey_ = survey[survey['lv_index'] == lvx]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')

lv_index = 0.83, broken down by loc_county
Another county in Georgia    70.969833
Cobb County, GA               6.812702
DeKalb County, GA             4.226622
Fulton County, GA             9.277217
Gwinnett County, GA           8.713627
dtype: float64
-
lv_index = 0.83, broken down by gender
Female    50.162415
Male      49.581110
Other      0.256476
dtype: float64
-
lv_index = 0.83, broken down by race
Black, non-Hispanic    32.878895
Hispanic                8.499787
Other                   5.083179
White, not Hispanic    53.538139
dtype: float64
-
lv_index = 0.83, broken down by education
Completed graduate school      8.997678
Graduated from college        17.276275
Graduated from high school    30.177008
Less than high school         16.192179
Some college, no degree       27.356860
dtype: float64
-
lv_index = 0.83, broken down by income
Between $100,000 and $150,000    12.656617
Between $15,000 and $49,999      33.545749
Between $50,000 and $74,999      17.514344
Between $75,000 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])



-
lv_index = 0.13, broken down by vote_warnock_loeffler
Raphael Warnock    47.649279
Undecided          52.350721
dtype: float64
-
lv_index = 0.13, broken down by vote_ossoff_perdue
Jon Ossoff    47.649279
Undecided     52.350721
dtype: float64
-
lv_index = 0.11, broken down by loc_county
Another county in Georgia    88.854486
Gwinnett County, GA          11.145514
dtype: float64
-
lv_index = 0.11, broken down by gender
Female    100.0
dtype: float64
-
lv_index = 0.11, broken down by race
Black, non-Hispanic    26.689842
White, not Hispanic    73.310158
dtype: float64
-
lv_index = 0.11, broken down by education
Graduated from high school    81.602022
Some college, no degree       18.397978
dtype: float64
-
lv_index = 0.11, broken down by income
Between $15,000 and $49,999    41.041444
Between $50,000 and $74,999    32.268714
Under $15,000                  26.689842
dtype: float64
-
lv_index = 0.11, broken down by age
18-34    77.356534
35-54    22.643466
dtype: float64
-
lv_index = 0.

## Demographic by Enthusiasm

In [155]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- enthusiasm breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['enthusiasm'].value_counts(normalize=True) * survey_.groupby('enthusiasm')['weight'].mean() * 100)
        print('-')

## loc_county ##
---- enthusiasm breakdown for "Another county in Georgia"
About the same       13.879291
Less enthusiastic    14.862420
More enthusiastic    71.258289
dtype: float64
-
---- enthusiasm breakdown for "Cobb County, GA"
About the same       23.389811
Less enthusiastic    18.848827
More enthusiastic    57.761363
dtype: float64
-
---- enthusiasm breakdown for "DeKalb County, GA"
About the same       31.642241
Less enthusiastic     1.459160
More enthusiastic    66.898599
dtype: float64
-
---- enthusiasm breakdown for "Fulton County, GA"
About the same       13.616859
Less enthusiastic    10.623790
More enthusiastic    75.759351
dtype: float64
-
---- enthusiasm breakdown for "Gwinnett County, GA"
About the same       11.923260
Less enthusiastic     8.216472
More enthusiastic    79.860268
dtype: float64
-
## gender ##
---- enthusiasm breakdown for "Female"
About the same       17.127393
Less enthusiastic    15.551277
More enthusiastic    67.321330
dtype: float64
-
---- enthusia

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


## Enthusiasm by Demographic

In [156]:
for enth in sorted(survey['enthusiasm'].unique())[::-1]:
    for var in demographics:
        print('enthusiasm = {}, broken down by {}'.format(enth, var))
        survey_ = survey[survey['enthusiasm'] == enth]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')

enthusiasm = More enthusiastic, broken down by loc_county
Another county in Georgia    66.907393
Cobb County, GA               5.849860
DeKalb County, GA             6.708576
Fulton County, GA            10.640807
Gwinnett County, GA           9.893364
dtype: float64

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])



-
enthusiasm = More enthusiastic, broken down by gender
Female    48.286517
Male      50.910270
Other      0.803214
dtype: float64
-
enthusiasm = More enthusiastic, broken down by race
Black, non-Hispanic    33.808535
Hispanic                8.418279
Other                   5.942238
White, not Hispanic    51.830948
dtype: float64
-
enthusiasm = More enthusiastic, broken down by education
Completed graduate school      9.550499
Graduated from college        19.066325
Graduated from high school    28.835499
Less than high school         15.867393
Some college, no degree       26.680285
dtype: float64
-
enthusiasm = More enthusiastic, broken down by income
Between $100,000 and $150,000    14.344167
Between $15,000 and $49,999      34.663313
Between $50,000 and $74,999      16.391669
Between $75,000 and $99,999      10.359263
Over $150,000                    10.446555
Under $15,000                    13.795033
dtype: float64
-
enthusiasm = More enthusiastic, broken down by age
18-34      

## Demographic by Vote Method

In [157]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- vote method breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print(survey_['vote_method'].value_counts(normalize=True) * survey_.groupby('vote_method')['lv_weight'].mean() * 100)
        print('-')

## loc_county ##
---- vote method breakdown for "Another county in Georgia"
Absentee by mail               33.547243
Early vote in person           47.652757
I will vote on election day    18.799999
dtype: float64
-
---- vote method breakdown for "Cobb County, GA"
Absentee by mail               50.716129
Early vote in person           33.862338
I will vote on election day    15.421533
dtype: float64
-
---- vote method breakdown for "DeKalb County, GA"
Absentee by mail               54.444071
Early vote in person           31.181514
I will vote on election day    14.374415
dtype: float64
-
---- vote method breakdown for "Fulton County, GA"
Absentee by mail               32.939703
Early vote in person           46.869472
I will vote on election day    20.190825
dtype: float64
-
---- vote method breakdown for "Gwinnett County, GA"
Absentee by mail               36.713096
Early vote in person           44.471178
I will vote on election day    18.815726
dtype: float64
-
## gender ##
---- vo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])



-
---- vote method breakdown for "Hispanic"
Absentee by mail               32.113756
Early vote in person           50.021780
I will vote on election day    17.864464
dtype: float64
-
---- vote method breakdown for "Other"
Absentee by mail               31.179864
Early vote in person           53.050254
I will vote on election day    15.769882
dtype: float64
-
## education ##
---- vote method breakdown for "Completed graduate school"
Absentee by mail               43.031612
Early vote in person           46.533744
I will vote on election day    10.434644
dtype: float64
-
---- vote method breakdown for "Graduated from high school"
Absentee by mail               43.194551
Early vote in person           38.075494
I will vote on election day    18.729955
dtype: float64
-
---- vote method breakdown for "Graduated from college"
Absentee by mail               28.327088
Early vote in person           52.223197
I will vote on election day    19.449714
dtype: float64
-
---- vote method breakdow

## Vote Method by Demographic

In [158]:
for method in sorted(survey['vote_method'].unique())[::-1]:
    for var in demographics:
        print('vote method = "{}", broken down by {}'.format(method, var))
        survey_ = survey[survey['vote_method'] == method]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')

vote method = "I will vote on election day", broken down by loc_county
Another county in Georgia    90.314362
Cobb County, GA               7.828920
DeKalb County, GA             5.727782
Fulton County, GA            13.354684
Gwinnett County, GA          11.376176
dtype: float64
-
vote method = "I will vote on election day", broken down by gender
Female    63.052638
Male      65.549286
dtype: float64

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])



-
vote method = "I will vote on election day", broken down by race
Black, non-Hispanic    22.076401
Hispanic               11.341858
Other                   5.849370
White, not Hispanic    89.334295
dtype: float64
-
vote method = "I will vote on election day", broken down by education
Completed graduate school      6.663138
Graduated from college        24.116917
Graduated from high school    38.727664
Less than high school         16.029835
Some college, no degree       43.064370
dtype: float64
-
vote method = "I will vote on election day", broken down by income
Between $100,000 and $150,000    15.612657
Between $15,000 and $49,999      36.720956
Between $50,000 and $74,999      17.893626
Between $75,000 and $99,999      19.995126
Over $150,000                    22.697814
Under $15,000                    15.681744
dtype: float64
-
vote method = "I will vote on election day", broken down by age
18-34          18.678473
35-54          72.277893
55-64          19.650058
65 or older    