In [192]:
import pandas as pd

from mlgear.utils import display_columns
from surveyweights import normalize_weights

In [193]:
survey = pd.read_csv('responses_processed_with_default_weights.csv')

## Raw Demographics

In [194]:
demographics = ['loc_county', 'gender', 'race', 'education', 'income', 'age', 'vote2016', 'vote2020']
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    72.455516
Fulton County, GA            10.320285
Cobb County, GA               8.327402
Gwinnett County, GA           6.548043
DeKalb County, GA             2.348754
Name: loc_county, dtype: float64
-
-
## gender ##
Female               53.950178
Male                 44.768683
Other                 1.209964
Prefer not to say     0.071174
Name: gender, dtype: float64
-
-
## race ##
White, not Hispanic    68.113879
Black, non-Hispanic    21.637011
Other                   8.469751
Hispanic                1.779359
Name: race, dtype: float64
-
-
## education ##
Graduated from college        30.960854
Some college, no degree       30.249110
Completed graduate school     17.722420
Graduated from high school    17.295374
Less than high school          3.772242
Name: education, dtype: float64
-
-
## income ##
Between $15,000 and $49,999      36.725979
Between $50,000 and $74,999      19.928826
Under $15,000                    16.441281
Between $75,0

## Demographics after weighting

In [195]:
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * survey.groupby(var)['weight'].mean() * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    66.836368
Cobb County, GA               7.209118
DeKalb County, GA             7.138179
Fulton County, GA             9.997984
Gwinnett County, GA           8.818350
dtype: float64
-
-
## gender ##
Female               51.056062
Male                 48.274189
Other                 0.598637
Prefer not to say     0.071112
dtype: float64
-
-
## race ##
Black, non-Hispanic    32.575141
Hispanic                9.897447
Other                   5.487773
White, not Hispanic    52.039639
dtype: float64
-
-
## education ##
Completed graduate school      9.119156
Graduated from college        18.333012
Graduated from high school    30.075030
Less than high school         14.067680
Some college, no degree       28.405122
dtype: float64
-
-
## income ##
Between $100,000 and $150,000    12.076122
Between $15,000 and $49,999      35.234993
Between $50,000 and $74,999      18.003221
Between $75,000 and $99,999      11.610271
Over $150,000                  

## Expected Vote Demographics (Demographics after Weighting + Likely Voter Model)

In [196]:
for var in demographics:
    print('## {} ##'.format(var))
    print(survey[var].value_counts(normalize=True) * survey.groupby(var)['lv_weight'].mean() * 100)
    print('-')
    print('-')

## loc_county ##
Another county in Georgia    68.822592
Cobb County, GA               6.997357
DeKalb County, GA             5.760525
Fulton County, GA             9.646536
Gwinnett County, GA           8.772989
dtype: float64
-
-
## gender ##
Female               50.364356
Male                 49.141186
Other                 0.436245
Prefer not to say     0.058213
dtype: float64
-
-
## race ##
Black, non-Hispanic    32.583761
Hispanic                9.259468
Other                   5.350309
White, not Hispanic    52.806462
dtype: float64
-
-
## education ##
Completed graduate school      9.112139
Graduated from college        17.868721
Graduated from high school    29.946849
Less than high school         15.095818
Some college, no degree       27.976473
dtype: float64
-
-
## income ##
Between $100,000 and $150,000    12.362446
Between $15,000 and $49,999      34.353384
Between $50,000 and $74,999      17.779579
Between $75,000 and $99,999      12.025944
Over $150,000                  

## Demographic by Vote

In [197]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('-')
        print('---- vote choice for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print('------ "{}" 2016 vote'.format(option))
        print(survey_['vote2016'].value_counts(normalize=True) * survey_.groupby('vote2016')['lv_weight'].mean() * 100)
        print('------ "{}" 2020 vote'.format(option))
        print(survey_['vote2020'].value_counts(normalize=True) * survey_.groupby('vote2020')['lv_weight'].mean() * 100)
        print('------ "{}" Ossoff-Perdue vote'.format(option))
        print(survey_['vote_ossoff_perdue'].value_counts(normalize=True) * survey_.groupby('vote_ossoff_perdue')['lv_weight'].mean() * 100)
        print('------ "{}" Warnock-Loeffler vote'.format(option))
        print(survey_['vote_warnock_loeffler'].value_counts(normalize=True) * survey_.groupby('vote_warnock_loeffler')['lv_weight'].mean() * 100)
    print('-')
    print('-')
    print('-')
    print('-')

## loc_county ##
-
---- vote choice for "Another county in Georgia"
------ "Another county in Georgia" 2016 vote
Did not vote        7.750357
Donald Trump       56.413235
Hillary Clinton    32.334045
Other               3.502364
dtype: float64
------ "Another county in Georgia" 2020 vote
Did not vote     0.717223
Donald Trump    59.393290
Joe Biden       38.376633
Other            1.512854
dtype: float64
------ "Another county in Georgia" Ossoff-Perdue vote
David Perdue    55.606177
Jon Ossoff      38.295047
Undecided        6.098776
dtype: float64
------ "Another county in Georgia" Warnock-Loeffler vote
Kelly Loeffler     57.114737
Raphael Warnock    39.066555
Undecided           3.818709
dtype: float64
-
---- vote choice for "Cobb County, GA"
------ "Cobb County, GA" 2016 vote
Did not vote       10.401862
Donald Trump       39.157145
Hillary Clinton    40.232280
Other              10.208713
dtype: float64
------ "Cobb County, GA" 2020 vote
Did not vote     0.824986
Donald Trump    49

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])



Hillary Clinton    100.0
dtype: float64
------ "Prefer not to say" 2020 vote
Joe Biden    100.0
dtype: float64
------ "Prefer not to say" Ossoff-Perdue vote
Jon Ossoff    100.0
dtype: float64
------ "Prefer not to say" Warnock-Loeffler vote
Raphael Warnock    100.0
dtype: float64
-
-
-
-
## race ##
-
---- vote choice for "White, not Hispanic"
------ "White, not Hispanic" 2016 vote
Did not vote        7.808255
Donald Trump       69.564047
Hillary Clinton    19.373770
Other               3.253928
dtype: float64
------ "White, not Hispanic" 2020 vote
Did not vote     0.429446
Donald Trump    74.472513
Joe Biden       24.803522
Other            0.294519
dtype: float64
------ "White, not Hispanic" Ossoff-Perdue vote
David Perdue    72.487102
Jon Ossoff      24.389785
Undecided        3.123112
dtype: float64
------ "White, not Hispanic" Warnock-Loeffler vote
Kelly Loeffler     72.792661
Raphael Warnock    24.454244
Undecided           2.753095
dtype: float64
-
---- vote choice for "Black, n

dtype: float64
------ "Under $15,000" 2020 vote
Did not vote     0.660769
Donald Trump    36.752175
Joe Biden       60.050361
Other            2.536696
dtype: float64
------ "Under $15,000" Ossoff-Perdue vote
David Perdue    34.541796
Jon Ossoff      54.290522
Undecided       11.167682
dtype: float64
------ "Under $15,000" Warnock-Loeffler vote
Kelly Loeffler     42.125875
Raphael Warnock    53.078049
Undecided           4.796077
dtype: float64
-
---- vote choice for "Between $75,000 and $99,999"
------ "Between $75,000 and $99,999" 2016 vote
Did not vote        2.136437
Donald Trump       54.093474
Hillary Clinton    41.124038
Other               2.646051
dtype: float64
------ "Between $75,000 and $99,999" 2020 vote
Did not vote     0.390205
Donald Trump    64.657180
Joe Biden       34.719565
Other            0.233050
dtype: float64
------ "Between $75,000 and $99,999" Ossoff-Perdue vote
David Perdue    55.431769
Jon Ossoff      42.902935
Undecided        1.665297
dtype: float64
-----

## Vote by Demographic

In [198]:
for vote in ['vote_ossoff_perdue', 'vote_warnock_loeffler']:
    for choice in survey[vote].unique():
        for var in demographics:
            print('{} = {}, broken down by {}'.format(vote, choice, var))
            survey_ = survey[survey[vote] == choice]
            survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
            print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['lv_weight'].mean() * 100)
            print('-')
        print('-')
        print('-')

vote_ossoff_perdue = Jon Ossoff, broken down by loc_county
Another county in Georgia    56.560060
Cobb County, GA               7.346536
DeKalb County, GA            10.756736
Fulton County, GA            14.631887
Gwinnett County, GA          10.704781
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by gender
Female               54.168026
Male                 45.349363
Other                 0.357684
Prefer not to say     0.124927
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by race
Black, non-Hispanic    57.191195
Hispanic                8.358398
Other                   6.810838
White, not Hispanic    27.639570
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by education
Completed graduate school     11.541432
Graduated from college        20.627336
Graduated from high school    25.253007
Less than high school         13.597336
Some college, no degree       28.980889
dtype: float64
-
vote_ossoff_perdue = Jon Ossoff, broken down by income
B

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])


Black, non-Hispanic    57.705027
Hispanic                8.235923
Other                   6.752503
White, not Hispanic    27.306547
dtype: float64
-
vote_warnock_loeffler = Raphael Warnock, broken down by education
Completed graduate school     11.390618
Graduated from college        20.309349
Graduated from high school    27.496883
Less than high school         12.135282
Some college, no degree       28.667868
dtype: float64
-
vote_warnock_loeffler = Raphael Warnock, broken down by income
Between $100,000 and $150,000    12.298416
Between $15,000 and $49,999      36.101831
Between $50,000 and $74,999      16.956646
Between $75,000 and $99,999      10.801747
Over $150,000                     8.387485
Under $15,000                    15.453875
dtype: float64
-
vote_warnock_loeffler = Raphael Warnock, broken down by age
18-34          32.267302
35-54          31.865955
55-64          17.493578
65 or older    18.373165
dtype: float64
-
vote_warnock_loeffler = Raphael Warnock, broken down 

## Demographic by Likely Voter

In [199]:
demographics += ['vote_warnock_loeffler', 'vote_ossoff_perdue']
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- likely voter breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['lv_likely'].value_counts(normalize=True) * survey_.groupby('lv_likely')['weight'].mean() * 100)
        print('-')

## loc_county ##
---- likely voter breakdown for "Another county in Georgia"
Already voted      59.150365
Likely              1.621199
Somewhat likely     1.168048
Very likely        40.716662
dtype: float64
-
---- likely voter breakdown for "Cobb County, GA"
Already voted      56.969284
Likely              0.534410
Somewhat likely     1.241100
Very likely        37.094769
dtype: float64
-
---- likely voter breakdown for "DeKalb County, GA"
Likely              0.686027
Somewhat likely     2.546646
Very likely        90.341492
dtype: float64
-
---- likely voter breakdown for "Fulton County, GA"
Already voted      41.808485
Likely              0.563972
Somewhat likely     0.577118
Very likely        58.275725
dtype: float64
-
---- likely voter breakdown for "Gwinnett County, GA"
Already voted      42.523079
Likely              1.955894
Somewhat likely     0.664384
Very likely        54.491970
dtype: float64
-
## gender ##
---- likely voter breakdown for "Female"
Already voted      47.390

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


Already voted      48.432991
Likely              0.235828
Somewhat likely     1.027049
Very likely        50.957168
dtype: float64
-
---- likely voter breakdown for "Other"
Already voted      77.345116
Likely              2.574614
Somewhat likely     4.966643
Very likely        23.892072
dtype: float64
-
## vote2020 ##
---- likely voter breakdown for "Joe Biden"
Already voted      56.543714
Likely              1.796543
Somewhat likely     0.649863
Very likely        37.057945
dtype: float64
-
---- likely voter breakdown for "Donald Trump"
Already voted      47.386994
Likely              0.534573
Somewhat likely     1.227777
Very likely        51.165998
dtype: float64
-
---- likely voter breakdown for "Did not vote"
Already voted      33.574720
Somewhat likely     9.816883
Very likely        40.095591
dtype: float64
-
---- likely voter breakdown for "Other"
Already voted      70.339655
Likely             28.998653
Somewhat likely     2.467397
Very likely        15.674640
dtype: float64


## Likely Voter by Demographic

In [200]:
for lvx in survey['lv_likely'].unique():
    if isinstance(lvx, str):
        for var in demographics:
            print('lv_likely = {}, broken down by {}'.format(lvx, var))
            survey_ = survey[survey['lv_likely'] == lvx]
            survey_['weight'] = normalize_weights(survey_['weight'])
            print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
            print('-')

lv_likely = Very likely, broken down by loc_county
Another county in Georgia    62.765224
Cobb County, GA               5.848104
DeKalb County, GA             9.313659
Fulton County, GA            11.591402
Gwinnett County, GA          10.481611
dtype: float64
-
lv_likely = Very likely, broken down by gender
Female    54.553268
Male      45.194839
Other      0.251894
dtype: float64
-
lv_likely = Very likely, broken down by race
Black, non-Hispanic    29.602903
Hispanic                7.997968
Other                   5.573242
White, not Hispanic    56.825887
dtype: float64
-
lv_likely = Very likely, broken down by education
Completed graduate school      5.590623
Graduated from college        20.620116
Graduated from high school    29.604378
Less than high school          9.043805
Some college, no degree       35.141078
dtype: float64
-
lv_likely = Very likely, broken down by income
Between $100,000 and $150,000    13.137030
Between $15,000 and $49,999      33.787391
Between $50,000 and

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])



-
lv_likely = Already voted, broken down by race
Black, non-Hispanic    35.343260
Hispanic                9.534948
Other                   4.910288
White, not Hispanic    50.211504
dtype: float64
-
lv_likely = Already voted, broken down by education
Completed graduate school     11.759263
Graduated from college        14.571043
Graduated from high school    31.163545
Less than high school         21.972956
Some college, no degree       20.533193
dtype: float64
-
lv_likely = Already voted, broken down by income
Between $100,000 and $150,000    11.973672
Between $15,000 and $49,999      34.165624
Between $50,000 and $74,999      17.218224
Between $75,000 and $99,999      10.271158
Over $150,000                     9.736065
Under $15,000                    16.635256
dtype: float64
-
lv_likely = Already voted, broken down by age
18-34          17.706019
35-54          27.591495
55-64          23.730673
65 or older    30.971813
dtype: float64
-
lv_likely = Already voted, broken down by vot

## Demographic by Likely Voter Score

In [201]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- likely voter score for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['lv_index'].value_counts(normalize=True) * survey_.groupby('lv_index')['weight'].mean() * 100)
        print('-')

## loc_county ##
---- likely voter score for "Another county in Georgia"
0.11     0.769977
0.13     0.126850
0.40     6.010693
0.59     0.947111
0.63     1.490611
0.83    90.654757
dtype: float64
-
---- likely voter score for "Cobb County, GA"
0.13     0.665447
0.40    17.771933
0.59     0.416537
0.63     0.465896
0.83    80.680187
dtype: float64
-
---- likely voter score for "DeKalb County, GA"
0.40    47.587074
0.59     1.466251
0.63     0.394985
0.83    50.551690
dtype: float64
-
---- likely voter score for "Fulton County, GA"
0.40    19.874984
0.59     0.457714
0.63     0.447288
0.83    79.220014
dtype: float64
-
---- likely voter score for "Gwinnett County, GA"
0.11     0.732022
0.40    12.628564
0.59     0.577725
0.63     1.700778
0.83    84.360912
dtype: float64
-
## gender ##
---- likely voter score for "Female"
0.11     1.134394
0.13     0.217858
0.40    13.664324
0.59     0.435074
0.63     0.667893
0.83    83.880456
dtype: float64
-
---- likely voter score for "Male"
0.13    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


0.11     0.364638
0.13     0.133384
0.40    10.045774
0.59     0.600875
0.63     1.211082
0.83    87.644248
dtype: float64
-
---- likely voter score for "55-64"
0.13     0.378532
0.40     2.413916
0.59     0.821862
0.63     0.119319
0.83    96.266370
dtype: float64
-
---- likely voter score for "65 or older"
0.40     1.022045
0.59     1.786609
0.83    97.191346
dtype: float64
-
## vote2016 ##
---- likely voter score for "Hillary Clinton"
0.40    14.662443
0.59     0.463737
0.63     1.227149
0.83    83.646670
dtype: float64
-
---- likely voter score for "Did not vote"
0.11     4.553675
0.13     1.043763
0.40    31.536471
0.59     0.677924
0.63     4.605375
0.83    57.582791
dtype: float64
-
---- likely voter score for "Donald Trump"
0.40     3.582630
0.59     0.986039
0.63     0.226412
0.83    95.204919
dtype: float64
-
---- likely voter score for "Other"
0.40    11.272938
0.59     4.278954
0.63     2.218129
0.83    82.229979
dtype: float64
-
## vote2020 ##
---- likely voter score for "

## Likely Voter Score by Demographic

In [202]:
for lvx in sorted(survey['lv_index'].unique())[::-1]:
    for var in demographics:
        print('lv_index = {}, broken down by {}'.format(lvx, var))
        survey_ = survey[survey['lv_index'] == lvx]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')

lv_index = 0.83, broken down by loc_county
Another county in Georgia    70.969833
Cobb County, GA               6.812702
DeKalb County, GA             4.226622
Fulton County, GA             9.277217
Gwinnett County, GA           8.713627
dtype: float64
-
lv_index = 0.83, broken down by gender
Female    50.162415
Male      49.581110
Other      0.256476
dtype: float64
-
lv_index = 0.83, broken down by race
Black, non-Hispanic    32.878895
Hispanic                8.499787
Other                   5.083179
White, not Hispanic    53.538139
dtype: float64
-
lv_index = 0.83, broken down by education
Completed graduate school      8.997678
Graduated from college        17.276275
Graduated from high school    30.177008
Less than high school         16.192179
Some college, no degree       27.356860
dtype: float64
-
lv_index = 0.83, broken down by income
Between $100,000 and $150,000    12.656617
Between $15,000 and $49,999      33.545749
Between $50,000 and $74,999      17.514344
Between $75,000 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


David Perdue    52.148176
Jon Ossoff      28.112164
Undecided       19.739660
dtype: float64
-
lv_index = 0.4, broken down by loc_county
Another county in Georgia    34.056393
Cobb County, GA              10.861207
DeKalb County, GA            28.796368
Fulton County, GA            16.845368
Gwinnett County, GA           9.440665
dtype: float64
-
lv_index = 0.4, broken down by gender
Female    59.142101
Male      37.717523
Other      3.140376
dtype: float64
-
lv_index = 0.4, broken down by race
Black, non-Hispanic    34.897526
Hispanic               20.665071
Other                   7.426222
White, not Hispanic    37.011181
dtype: float64
-
lv_index = 0.4, broken down by education
Completed graduate school      9.731217
Graduated from college        27.436348
Graduated from high school    28.383453
Some college, no degree       34.448981
dtype: float64
-
lv_index = 0.4, broken down by income
Between $100,000 and $150,000     9.377761
Between $15,000 and $49,999      49.202558
Between $

## Demographic by Enthusiasm

In [203]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- enthusiasm breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_['enthusiasm'].value_counts(normalize=True) * survey_.groupby('enthusiasm')['weight'].mean() * 100)
        print('-')

## loc_county ##
---- enthusiasm breakdown for "Another county in Georgia"
About the same       13.879291
Less enthusiastic    14.862420
More enthusiastic    71.258289
dtype: float64
-
---- enthusiasm breakdown for "Cobb County, GA"
About the same       23.389811
Less enthusiastic    18.848827
More enthusiastic    57.761363
dtype: float64
-
---- enthusiasm breakdown for "DeKalb County, GA"
About the same       31.642241
Less enthusiastic     1.459160
More enthusiastic    66.898599
dtype: float64
-
---- enthusiasm breakdown for "Fulton County, GA"
About the same       13.616859
Less enthusiastic    10.623790
More enthusiastic    75.759351
dtype: float64
-
---- enthusiasm breakdown for "Gwinnett County, GA"
About the same       11.923260
Less enthusiastic     8.216472
More enthusiastic    79.860268
dtype: float64
-
## gender ##
---- enthusiasm breakdown for "Female"
About the same       17.127393
Less enthusiastic    15.551277
More enthusiastic    67.321330
dtype: float64
-
---- enthusia

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])


About the same       20.273052
Less enthusiastic     7.830951
More enthusiastic    71.895997
dtype: float64
-
---- enthusiasm breakdown for "35-54"
About the same       18.011436
Less enthusiastic    16.112237
More enthusiastic    65.876327
dtype: float64
-
---- enthusiasm breakdown for "55-64"
About the same        5.614022
Less enthusiastic    19.425756
More enthusiastic    74.960222
dtype: float64
-
---- enthusiasm breakdown for "65 or older"
About the same       13.252120
Less enthusiastic    10.056917
More enthusiastic    76.690964
dtype: float64
-
## vote2016 ##
---- enthusiasm breakdown for "Hillary Clinton"
About the same       13.370779
Less enthusiastic     5.808944
More enthusiastic    80.820277
dtype: float64
-
---- enthusiasm breakdown for "Did not vote"
About the same       22.214289
Less enthusiastic    12.446033
More enthusiastic    65.339678
dtype: float64
-
---- enthusiasm breakdown for "Donald Trump"
About the same       15.996731
Less enthusiastic    17.788327
More 

## Enthusiasm by Demographic

In [204]:
for enth in sorted(survey['enthusiasm'].unique())[::-1]:
    for var in demographics:
        print('enthusiasm = {}, broken down by {}'.format(enth, var))
        survey_ = survey[survey['enthusiasm'] == enth]
        survey_['weight'] = normalize_weights(survey_['weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['weight'].mean() * 100)
        print('-')

enthusiasm = More enthusiastic, broken down by loc_county
Another county in Georgia    66.907393
Cobb County, GA               5.849860
DeKalb County, GA             6.708576
Fulton County, GA            10.640807
Gwinnett County, GA           9.893364
dtype: float64
-
enthusiasm = More enthusiastic, broken down by gender
Female    48.286517
Male      50.910270
Other      0.803214
dtype: float64
-
enthusiasm = More enthusiastic, broken down by race
Black, non-Hispanic    33.808535
Hispanic                8.418279
Other                   5.942238
White, not Hispanic    51.830948
dtype: float64
-
enthusiasm = More enthusiastic, broken down by education
Completed graduate school      9.550499
Graduated from college        19.066325
Graduated from high school    28.835499
Less than high school         15.867393
Some college, no degree       26.680285
dtype: float64
-
enthusiasm = More enthusiastic, broken down by income
Between $100,000 and $150,000    14.344167
Between $15,000 and $49,999

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['weight'] = normalize_weights(survey_['weight'])



-
enthusiasm = About the same, broken down by income
Between $100,000 and $150,000     7.638728
Between $15,000 and $49,999      41.622637
Between $50,000 and $74,999      23.176195
Between $75,000 and $99,999      12.244880
Over $150,000                     5.146928
Under $15,000                    10.170633
dtype: float64
-
enthusiasm = About the same, broken down by age
18-34          35.954012
35-54          41.434946
55-64           6.000733
65 or older    16.610310
dtype: float64
-
enthusiasm = About the same, broken down by vote2016
Did not vote       18.072055
Donald Trump       45.115006
Hillary Clinton    33.727589
Other               3.085350
dtype: float64
-
enthusiasm = About the same, broken down by vote2020
Did not vote     4.360222
Donald Trump    47.757837
Joe Biden       46.195693
Other            1.686247
dtype: float64
-
enthusiasm = About the same, broken down by vote_warnock_loeffler
Kelly Loeffler     46.876662
Raphael Warnock    46.118254
Undecided           7.

## Demographic by Vote Method

In [205]:
print(survey['vote_method'].value_counts(normalize=True) * survey.groupby('vote_method')['lv_weight'].mean() * 100)

Absentee by mail               36.171512
Early vote in person           45.384283
I will vote on election day    18.444205
dtype: float64


In [206]:
for var in demographics:
    print('## {} ##'.format(var))
    options = survey[var].unique()
    for option in options:
        print('---- vote method breakdown for "{}"'.format(option))
        survey_ = survey[survey[var] == option]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print(survey_['vote_method'].value_counts(normalize=True) * survey_.groupby('vote_method')['lv_weight'].mean() * 100)
        print('-')

## loc_county ##
---- vote method breakdown for "Another county in Georgia"
Absentee by mail               33.547243
Early vote in person           47.652757
I will vote on election day    18.799999
dtype: float64
-
---- vote method breakdown for "Cobb County, GA"
Absentee by mail               50.716129
Early vote in person           33.862338
I will vote on election day    15.421533
dtype: float64
-
---- vote method breakdown for "DeKalb County, GA"
Absentee by mail               54.444071
Early vote in person           31.181514
I will vote on election day    14.374415
dtype: float64
-
---- vote method breakdown for "Fulton County, GA"
Absentee by mail               32.939703
Early vote in person           46.869472
I will vote on election day    20.190825
dtype: float64
-
---- vote method breakdown for "Gwinnett County, GA"
Absentee by mail               36.713096
Early vote in person           44.471178
I will vote on election day    18.815726
dtype: float64
-
## gender ##
---- vo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])


Absentee by mail               38.915137
Early vote in person           50.502691
I will vote on election day    10.582172
dtype: float64
-
---- vote method breakdown for "35-54"
Absentee by mail               24.781569
Early vote in person           46.652472
I will vote on election day    28.565958
dtype: float64
-
---- vote method breakdown for "55-64"
Absentee by mail               34.091713
Early vote in person           50.047131
I will vote on election day    15.861156
dtype: float64
-
---- vote method breakdown for "65 or older"
Absentee by mail               54.492230
Early vote in person           33.118958
I will vote on election day    12.388812
dtype: float64
-
## vote2016 ##
---- vote method breakdown for "Hillary Clinton"
Absentee by mail               52.196838
Early vote in person           43.749525
I will vote on election day     4.053637
dtype: float64
-
---- vote method breakdown for "Did not vote"
Absentee by mail               27.771261
Early vote in person      

## Vote Method by Demographic

In [207]:
for method in sorted(survey['vote_method'].unique())[::-1]:
    for var in demographics:
        print('vote method = "{}", broken down by {}'.format(method, var))
        survey_ = survey[survey['vote_method'] == method]
        survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])
        print(survey_[var].value_counts(normalize=True) * survey_.groupby(var)['lv_weight'].mean() * 100)
        print('-')

vote method = "I will vote on election day", broken down by loc_county
Another county in Georgia    70.150200
Cobb County, GA               5.850616
DeKalb County, GA             4.489442
Fulton County, GA            10.560039
Gwinnett County, GA           8.949703
dtype: float64
-
vote method = "I will vote on election day", broken down by gender
Female    49.061002
Male      50.938998
dtype: float64
-
vote method = "I will vote on election day", broken down by race
Black, non-Hispanic    17.295468
Hispanic                8.968423
Other                   4.574540
White, not Hispanic    69.161569
dtype: float64
-
vote method = "I will vote on election day", broken down by education
Completed graduate school      5.155111
Graduated from college        18.842857
Graduated from high school    30.410806
Less than high school         12.443163
Some college, no degree       33.148063
dtype: float64
-
vote method = "I will vote on election day", broken down by income
Between $100,000 and $150

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_['lv_weight'] = normalize_weights(survey_['lv_weight'])



-
vote method = "Absentee by mail", broken down by gender
Female    49.587904
Male      50.027251
Other      0.384845
dtype: float64
-
vote method = "Absentee by mail", broken down by race
Black, non-Hispanic    37.293554
Hispanic                8.220732
Other                   4.611970
White, not Hispanic    49.873745
dtype: float64
-
vote method = "Absentee by mail", broken down by education
Completed graduate school     10.840299
Graduated from college        13.993577
Graduated from high school    35.761312
Less than high school         18.025442
Some college, no degree       21.379370
dtype: float64
-
vote method = "Absentee by mail", broken down by income
Between $100,000 and $150,000    10.204454
Between $15,000 and $49,999      43.084117
Between $50,000 and $74,999      13.362282
Between $75,000 and $99,999       6.299921
Over $150,000                     6.789448
Under $15,000                    20.259779
dtype: float64
-
vote method = "Absentee by mail", broken down by age
1