In [45]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import maup
from zipfile import ZipFile
import numpy as np
import os
import shutil

# US House

## Final File - Load

In [50]:
final = gpd.read_file('./va_2018_ushouse/va_2018_ushouse.shp')

In [51]:
final

Unnamed: 0,COUNTYFP,LOCALITY,VTDST,PRECINCT,CON_DIST,G18HORDEM,G18HORREP,G18HORLIB,G18HORWRI,geometry
0,001,Accomack County,000101,Chincoteague,2,533,1039,0,1,"POLYGON Z ((-75.42507 37.89957 0.00000, -75.42..."
1,001,Accomack County,000201,Atlantic,2,171,482,0,2,"POLYGON Z ((-75.59978 37.87664 0.00000, -75.59..."
2,001,Accomack County,000202,Greenbackville,2,307,595,0,1,"POLYGON Z ((-75.49919 37.93416 0.00000, -75.49..."
3,001,Accomack County,000301,New Church,2,583,490,0,0,"POLYGON Z ((-75.64987 37.92702 0.00000, -75.64..."
4,001,Accomack County,000401,Bloxom,2,118,286,0,0,"POLYGON Z ((-75.71556 37.87513 0.00000, -75.71..."
...,...,...,...,...,...,...,...,...,...,...
2458,179,Stafford County,000401,AQUIA,1,615,247,0,0,"POLYGON Z ((-77.39221 38.48583 0.00000, -77.39..."
2459,195,Wise County,000302,East Stone Gap,9,338,1079,0,3,"POLYGON Z ((-82.80429 36.83206 0.00000, -82.80..."
2460,195,Wise County,000301,Big Stone Gap,9,416,954,0,1,"POLYGON Z ((-82.77275 36.84962 0.00000, -82.77..."
2461,089,Henry County,000503,Stanleytown,9,201,855,0,1,"POLYGON Z ((-79.96638 36.76286 0.00000, -79.96..."


# Election Results: US Senate

### Read in election results and append since files split accross districts

In [52]:
district = [str(1), str(2), str(3), str(4), str(5), str(6), str(7), str(8), str(9), str(10), str(11)]

In [53]:
#Party info from: https://ballotpedia.org/United_States_House_of_Representatives_elections_in_Virginia,_2018

candidate_to_party_dict = {'Robert Joseph Wittman': 'G18HORREP',
                           'Lavangelene Aereka Williams': 'G18HORDEM', 
                           'All Others': 'G18HORWRI',  
                           'Elaine Goodman Luria': 'G18HORDEM', 
                           'Scott William Taylor': 'G18HORREP',
                           'Robert Cortez Scott': 'G18HORDEM', 
                           'A. Donald McEachin': 'G18HORDEM', 
                           'Ryan Andrew McAdams': 'G18HORREP',
                           'Peter Joseph Wells': 'G18HORLIB', 
                           'Denver Lee Riggleman, III': 'G18HORREP',
                           'Leslie Corkill Cockburn': 'G18HORDEM', 
                           'Benjamin Lee Cline': 'G18HORREP', 
                           'Jennifer Lynn Lewis': 'G18HORDEM',
                           'Abigail Anne Davis Spanberger': 'G18HORDEM', 
                           'David A. Brat': 'G18HORREP',
                           'Joseph Buckler Walton': 'G18HORLIB', 
                           'Donald Sternoff Beyer, Jr.': 'G18HORDEM',
                           'Thomas Siyoung Oh': 'G18HORREP', 
                           'Howard Morgan Griffith': 'G18HORREP',
                           'Anthony Jude Flaccavento': 'G18HORDEM', 
                           'Jennifer Tosini Wexton': 'G18HORDEM',
                           'Barbara Jean Comstock': 'G18HORREP', 
                           'Gerald Edward Connolly': 'G18HORDEM',
                           'Jeffery Anthony Dove, Jr': 'G18HORREP', 
                           'Stevan Michael Porter': 'G18HORLIB'}

#candidate_list = []

#### Peter peer coding
 - if trouble standardizing, just do it by hand
 - to change the column header names, use .rename()
 - can use loop to print out the 11 names

In [54]:
#og:
path = ('/Users/lilyfalk/sandbox/pdv-va/vest_va/us_house_results_by_district_2018/')
os.chdir(path)
elections_by_district = []
for i in district:
    file_name = 'Virginia_Elections_Database__2018_U_S_House_General_Election_District_'+ i +'_including_precincts.csv'
    results = pd.read_csv(file_name)
    print(i, ' added to list')
    results['CON_DIST'] = i
    for col in results.columns:
        if col in candidate_to_party_dict.keys():
            col_new_name = candidate_to_party_dict.get(col)
            results = results.rename(columns = {col:col_new_name})
    elections_by_district.append(results)

1  added to list
2  added to list
3  added to list
4  added to list
5  added to list
6  added to list
7  added to list
8  added to list
9  added to list
10  added to list
11  added to list


In [55]:
#more efficient mod:
path = ('/Users/lilyfalk/sandbox/pdv-va/vest_va/us_house_results_by_district_2018/')
os.chdir(path)
elections_by_district = []
for i in range(1, 12):
    file_name = 'Virginia_Elections_Database__2018_U_S_House_General_Election_District_'+ str(i) +'_including_precincts.csv'
    results = pd.read_csv(file_name)
    print(i, ' added to list')
    results['CON_DIST'] = i
    elections_by_district.append(results.rename(columns = {col: candidate_to_party_dict.get(col) for col in results.columns if col in candidate_to_party_dict.keys()}
))

1  added to list
2  added to list
3  added to list
4  added to list
5  added to list
6  added to list
7  added to list
8  added to list
9  added to list
10  added to list
11  added to list


In [56]:
df = pd.concat(elections_by_district, axis = 0)

In [57]:
df

Unnamed: 0,County/City,Ward,Pct,G18HORREP,G18HORDEM,G18HORWRI,Total Votes Cast,CON_DIST,G18HORLIB
0,,,,Republican,Democratic,,,1,
1,Caroline County,-,101 - Bowling Green,675,331,0,1006,1,
2,Caroline County,-,102 - Sparta,482,283,1,766,1,
3,Caroline County,-,201 - North Madison,398,469,0,867,1,
4,Caroline County,-,202 - South Madison,596,482,0,1078,1,
...,...,...,...,...,...,...,...,...,...
163,Prince William County,-,711 - Grayson,269,1236,0,1526,11,21
164,Prince William County,-,712 - Leesylvania,477,1075,2,1575,11,21
165,Prince William County,-,Ab - Central Absentee Precinct,1757,5992,5,7858,11,104
166,Prince William County,-,Provisional,6,41,1,48,11,0


In [58]:
set(df['County/City']) - set(final['LOCALITY'])

{'TOTALS', nan}

In [59]:
#Remove totals column from df
df = df[df['County/City'] != 'TOTALS']

In [60]:
df['LOCALITY'] = df['County/City']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['LOCALITY'] = df['County/City']


In [61]:
df = df.iloc[1:]

In [62]:
df = df.drop(['County/City', 'Ward', 'Total Votes Cast'], axis = 1)

In [63]:
df = df.fillna(value = 0)

In [64]:
df = df[(df['G18HORDEM'] != 'Democratic') & (df['G18HORREP'] != 'Republican') & (df['G18HORLIB'] != 'Libertarian')]

In [65]:
county_dict = pd.Series(final['COUNTYFP'].values, index = final['LOCALITY']).to_dict()

In [66]:
set(final['LOCALITY']) - set(df['LOCALITY'])

set()

In [67]:
df['COUNTYFP'] = df['LOCALITY'].map(county_dict)

In [68]:
df['COUNTYFP'].value_counts()

059    250
153    102
810    102
107    100
087     94
      ... 
735      5
830      4
530      3
678      3
720      2
Name: COUNTYFP, Length: 133, dtype: int64

In [69]:
df['G18HORDEM'].value_counts()

0        32
1        24
2        16
3        12
5         9
         ..
1,792     1
1,096     1
869       1
1,305     1
1,300     1
Name: G18HORDEM, Length: 1305, dtype: int64

In [70]:
df['G18HORDEM'] = df['G18HORDEM'].map(lambda x: str(x).replace(',', ''))
df['G18HORREP'] = df['G18HORREP'].map(lambda x: str(x).replace(',', ''))
df['G18HORLIB'] = df['G18HORLIB'].map(lambda x: str(x).replace(',', ''))
df['G18HORWRI'] = df['G18HORWRI'].map(lambda x: str(x).replace(',', ''))

In [71]:
df['G18HORDEM'] = df['G18HORDEM'].astype(str).astype(float).astype(int)
df['G18HORREP'] = df['G18HORREP'].astype(str).astype(float).astype(int)
df['G18HORLIB'] = df['G18HORLIB'].astype(str).astype(float).astype(int)
df['G18HORWRI'] = df['G18HORWRI'].astype(str).astype(float).astype(int)

In [72]:
df['CON_DIST'] = df['CON_DIST'].astype(str)

In [194]:
df[df['LOCALITY'] == '0']

Unnamed: 0,Pct,G18HORREP,G18HORDEM,G18HORWRI,CON_DIST,G18HORLIB,LOCALITY,COUNTYFP


### Need to re allocate absentee votes

In [73]:
def get_absentee_frac(prec_tot, county_tot, absentee_tot):
    '''A spec specifies in first line what function will do
    Returns fraction of county absentee votes for a precinct for a candidate
    
    Keyword arguments: (put inputs to funtions and say what they should be)
    precinct vote total for given candidate (int)
    county vote total for given candidate, absentee and provisional votes not included (int)
    absentee vote total for county for given candidate (int)
    
    "Pure function" bc only relies on inputs and outputs - only variables are ones that were passed as arguments
    '''
    if county_tot != 0:
        prec_frac = prec_tot/county_tot
        absentee_frac = prec_frac*absentee_tot
    else:
        absentee_frac = 0
    return absentee_frac

In [74]:
absentee_and_prov = df[(df['Pct'].map(lambda x: 'Absentee' in str(x))) | (df['Pct'].map(lambda x: 'Provisional' in str(x)))]
absentee_and_prov

Unnamed: 0,Pct,G18HORREP,G18HORDEM,G18HORWRI,CON_DIST,G18HORLIB,LOCALITY,COUNTYFP
13,Ab - Central Absentee Precinct,395,396,1,1,0,Caroline County,033
14,Provisional,5,12,0,1,0,Caroline County,033
19,Ab - Central Absentee Precinct,206,197,0,1,0,Essex County,057
20,Provisional,0,2,0,1,0,Essex County,057
25,Ab - Central Absentee Precinct,232,162,0,1,0,Fauquier County,061
...,...,...,...,...,...,...,...,...
117,Provisional,50,193,1,11,9,Fairfax County,059
124,Ab - Central Absentee Precinct,401,1167,4,11,22,Fairfax City,600
125,Provisional,2,6,1,11,0,Fairfax City,600
165,Ab - Central Absentee Precinct,1757,5992,5,11,104,Prince William County,153


In [127]:
absentee_and_prov[['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']].sum().sum()

333609

In [75]:
groupby_absentee_and_prov_tot = absentee_and_prov.groupby(['COUNTYFP']).sum()
groupby_absentee_and_prov_tot

Unnamed: 0_level_0,G18HORREP,G18HORDEM,G18HORWRI,G18HORLIB
COUNTYFP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001,439,735,0,0
003,1522,3974,11,0
005,162,132,0,0
007,298,186,1,1
009,431,332,0,0
...,...,...,...,...
800,269,2188,110,15
810,6809,8326,16,0
820,253,275,0,0
830,217,460,0,0


In [76]:
groupby_county_df_tot = df.groupby(['COUNTYFP']).sum()
df_no_absent_or_provisional = df[(df['Pct'].map(lambda x: 'Absentee' not in str(x))) & (df['Pct'].map(lambda x: 'Provisional' not in str(x)))
                                & (df['LOCALITY'] != 'TOTALS')]
groupby_county_tot_no_absentee = df_no_absent_or_provisional.groupby('COUNTYFP').sum()

In [128]:
df_no_absent_or_provisional[['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']].sum().sum()

2979602

In [77]:
groupby_absentee_and_prov_tot

Unnamed: 0_level_0,G18HORREP,G18HORDEM,G18HORWRI,G18HORLIB
COUNTYFP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001,439,735,0,0
003,1522,3974,11,0
005,162,132,0,0
007,298,186,1,1
009,431,332,0,0
...,...,...,...,...
800,269,2188,110,15
810,6809,8326,16,0
820,253,275,0,0
830,217,460,0,0


In [189]:
df_with_absentee_reallocated = df_no_absent_or_provisional.copy()
sum_allocated = 0
prec_sum_w_ab = 0
for index, precinct_total_row_by_candidate in df_no_absent_or_provisional.iterrows():
    #print(precinct_total_row_by_candidate)
    county_fips = precinct_total_row_by_candidate['COUNTYFP']
    county_total_row_by_candidate_no_absentee = groupby_county_tot_no_absentee.loc[county_fips]
    county_absentee_and_provisional_by_candidate = groupby_absentee_and_prov_tot.loc[county_fips]
    #print(precinct_total_row_by_candidate, county_total_row_by_candidate_no_absentee, county_absentee_and_provisional_by_candidate)
    for candidate, votes in county_total_row_by_candidate_no_absentee.to_dict().items():
        prec_tot = precinct_total_row_by_candidate[candidate]
        #print('precinct total: ', prec_tot)
        county_tot = votes
        #print('county total: ', county_tot)
        absentee_tot = county_absentee_and_provisional_by_candidate[candidate]
        #print('absentee total: ', absentee_tot)
        absentee_frac = get_absentee_frac(prec_tot, county_tot, absentee_tot)
        #print('absentee votes to allocate to precinct: ', absentee_frac)
        prec_with_absentee_allocation = prec_tot + absentee_frac
        #print('new precinct total: ', prec_with_absentee_allocation)
        print(df_with_absentee_reallocated[['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']].sum().sum())
        df_with_absentee_reallocated.loc[index, candidate] = round(prec_with_absentee_allocation) #index is row, candidate is column

        print(df_with_absentee_reallocated[['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']].sum().sum())
        sum_allocated = sum_allocated + absentee_frac
        prec_sum_w_ab = prec_sum_w_ab + prec_with_absentee_allocation
        break
    break
        ##So, the number of absentee reallocated looks right... must be a problem with the sum of the prec_tot and absentee_frac?
#print(sum_allocated)
#print(prec_tot)
print(prec_sum_w_ab)
df_with_absentee_reallocated.loc[index, candidate]
#df_with_absentee_reallocated.loc[index, candidate]

2979602
2981691.0
725.2325581395348


1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
Name: G18HORREP, dtype: float64

In [190]:
df_with_absentee_reallocated.loc[index, candidate]

1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
1    725.0
Name: G18HORREP, dtype: float64

In [185]:
(2981691.0 - 2979602)

2.8813793103448275

In [161]:
county_total_row_by_candidate_no_absentee.to_dict().items()

dict_items([('G18HORREP', 51398), ('G18HORDEM', 87186), ('G18HORWRI', 275), ('G18HORLIB', 837)])

In [175]:
groupby_county_final[groupby_county_final.index == '033']

Unnamed: 0_level_0,G18HORDEM,G18HORREP,G18HORLIB,G18HORWRI
COUNTYFP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
33,5575,5775,0,6


compute sum of all votes in the 

In [136]:
df_no_absent_or_provisional[['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']].sum().sum()

2979602

In [154]:
df_with_absentee_reallocated['LOCALITY']

Unnamed: 0,Pct,G18HORREP,G18HORDEM,G18HORWRI,CON_DIST,G18HORLIB,LOCALITY,COUNTYFP
1,101 - Bowling Green,602.0,1154.0,1.0,1,25.0,Caroline County,33
2,102 - Sparta,1015.0,2290.0,3.0,1,60.0,Caroline County,33
3,201 - North Madison,548.0,1687.0,2.0,1,37.0,Caroline County,33
4,202 - South Madison,583.0,1153.0,1.0,1,44.0,Caroline County,33
5,301 - Port Royal,338.0,694.0,1.0,1,22.0,Caroline County,33


In [157]:
final.head()

Unnamed: 0,COUNTYFP,LOCALITY,VTDST,PRECINCT,CON_DIST,G18HORDEM,G18HORREP,G18HORLIB,G18HORWRI,geometry
0,1,Accomack County,101,Chincoteague,2,533,1039,0,1,"POLYGON Z ((-75.42507 37.89957 0.00000, -75.42..."
1,1,Accomack County,201,Atlantic,2,171,482,0,2,"POLYGON Z ((-75.59978 37.87664 0.00000, -75.59..."
2,1,Accomack County,202,Greenbackville,2,307,595,0,1,"POLYGON Z ((-75.49919 37.93416 0.00000, -75.49..."
3,1,Accomack County,301,New Church,2,583,490,0,0,"POLYGON Z ((-75.64987 37.92702 0.00000, -75.64..."
4,1,Accomack County,401,Bloxom,2,118,286,0,0,"POLYGON Z ((-75.71556 37.87513 0.00000, -75.71..."


In [152]:
df_with_absentee_reallocated[['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']].sum().sum()

4314522.0

In [150]:
final[['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']].sum().sum()

3313211

In [79]:
groupby_df_with_absentee_reallocated = df_with_absentee_reallocated.groupby('COUNTYFP').sum()

In [80]:
groupby_df_with_absentee_reallocated

Unnamed: 0_level_0,G18HORREP,G18HORDEM,G18HORWRI,G18HORLIB
COUNTYFP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001,10861.0,23253.0,65.0,637.0
003,18631.0,42605.0,109.0,1154.0
005,7717.0,17113.0,47.0,465.0
007,3086.0,6978.0,8.0,188.0
009,6976.0,15069.0,38.0,403.0
...,...,...,...,...
800,24337.0,20147.0,78.0,0.0
810,52318.0,135155.0,282.0,2946.0
820,1284.0,621.0,1.0,0.0
830,1445.0,2474.0,11.0,0.0


In [121]:
groupby_df_with_absentee_reallocated.sum().sum()

4314522.0

In [123]:
groupby_county_final = final.groupby(['COUNTYFP']).sum()

In [125]:
groupby_county_final.sum().sum()

3313211

In [83]:
df_absentee_reallocate_with_final = pd.merge(groupby_df_with_absentee_reallocated, groupby_county_final, on = 'COUNTYFP', how = 'inner', suffixes = ('_x', '_y'))

In [86]:
df_absentee_reallocate_with_final

Unnamed: 0_level_0,G18HORREP_x,G18HORDEM_x,G18HORWRI_x,G18HORLIB_x,G18HORDEM_y,G18HORREP_y,G18HORLIB_y,G18HORWRI_y
COUNTYFP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
001,10861.0,23253.0,65.0,637.0,5729,7025,0,12
003,18631.0,42605.0,109.0,1154.0,34409,18807,0,117
005,7717.0,17113.0,47.0,465.0,1892,3563,0,2
007,3086.0,6978.0,8.0,188.0,1926,3879,51,4
009,6976.0,15069.0,38.0,403.0,4217,8326,0,8
...,...,...,...,...,...,...,...,...
800,24337.0,20147.0,78.0,0.0,23477,5453,150,1922
810,52318.0,135155.0,282.0,2946.0,86822,82586,0,217
820,1284.0,621.0,1.0,0.0,3840,3875,0,4
830,1445.0,2474.0,11.0,0.0,4408,1787,0,16


In [87]:
df.columns

Index(['Pct', 'G18HORREP', 'G18HORDEM', 'G18HORWRI', 'CON_DIST', 'G18HORLIB',
       'LOCALITY', 'COUNTYFP'],
      dtype='object')

In [88]:
column_list = ['G18HORREP', 'G18HORDEM', 'G18HORWRI', 'G18HORLIB']

In [107]:
def sum_checker(df, column_list):
    list_max = 0
    for i in column_list:
        left_sum = df[i+'_x'].sum()
        right_sum = df[i+'_y'].sum()
        if (left_sum == right_sum):
            print('column: ', i, ' have the same sums')
        
        else:
            diff = abs(left_sum - right_sum)
            percent = abs(left_sum - right_sum)/left_sum
            print('column: ', i)
            print('RDH sum is ', str(left_sum))
            print('Partner sum is ', str(right_sum))
            print('their (absolute) difference is ', str(diff))
            print('As a percentage of DF1 this is ', str(round(percent*100,5)) + '%')
            if (percent>list_max):
                list_max = percent
            print('')
    print('The max difference (as a percent of the total of on DF column is: ', str(round(list_max,5)))

In [109]:
sum_checker(df_absentee_reallocate_with_final, column_list)

column:  G18HORREP
RDH sum is  1433850.0
Partner sum is  1408701
their (absolute) difference is  25149.0
As a percentage of DF1 this is  1.75395%

column:  G18HORDEM
RDH sum is  2818213.0
Partner sum is  1867061
their (absolute) difference is  951152.0
As a percentage of DF1 this is  33.75018%

column:  G18HORWRI
RDH sum is  6633.0
Partner sum is  23454
their (absolute) difference is  16821.0
As a percentage of DF1 this is  253.59566%

column:  G18HORLIB
RDH sum is  55826.0
Partner sum is  13995
their (absolute) difference is  41831.0
As a percentage of DF1 this is  74.93104%

The max difference (as a percent of the total of on DF column is:  2.53596


In [99]:
df_cd_groupby = df.groupby(['CON_DIST']).sum() #method=function

In [116]:
df_cd_groupby.sum().sum()

3313211

In [118]:
df.shape

(2754, 8)

In [100]:
final_cd_groupby = final.groupby(['CON_DIST']).sum()

In [119]:
final.shape

(2463, 10)

In [117]:
final_cd_groupby.sum().sum()

3313211

In [104]:
join_df_cd_groupby_with_final = pd.merge(df_cd_groupby, final_cd_groupby, on = 'CON_DIST')

In [105]:
join_df_cd_groupby_with_final

Unnamed: 0_level_0,G18HORREP_x,G18HORDEM_x,G18HORWRI_x,G18HORLIB_x,G18HORDEM_y,G18HORREP_y,G18HORLIB_y,G18HORWRI_y
CON_DIST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,183250,148464,413,0,148464,183250,0,413
10,160841,206356,618,0,206356,160841,0,618
11,83023,219191,513,5546,219191,83023,5546,513
2,133458,139571,399,0,139571,133458,0,399
3,0,198615,19177,0,198615,0,0,19177
4,107706,187642,288,4233,187642,107706,4233,288
5,165339,145040,550,0,145040,165339,0,550
6,167957,113133,318,0,113133,167957,0,318
7,169295,176079,241,4216,176079,169295,4216,241
8,76899,247137,714,0,247137,76899,0,714


In [106]:
sum_checker(join_df_cd_groupby_with_final, column_list)

column:  G18HORREP  have the same sums
column:  G18HORDEM  have the same sums
column:  G18HORWRI  have the same sums
column:  G18HORLIB  have the same sums
The max difference (as a percent of the total of on DF column is:  0


In [112]:
absentee_reall_con_groupby = df_with_absentee_reallocated.groupby(['CON_DIST']).sum()

In [113]:
absent_final_groupby_con = pd.merge()

Unnamed: 0_level_0,G18HORREP,G18HORDEM,G18HORWRI,G18HORLIB
CON_DIST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,134409.0,250556.0,610.0,4565.0
10,122775.0,270522.0,642.0,5336.0
11,84174.0,222318.0,496.0,5542.0
2,95702.0,234421.0,508.0,5132.0
3,120729.0,257761.0,599.0,5105.0
4,149788.0,272746.0,661.0,4952.0
5,178326.0,279905.0,674.0,4917.0
6,135626.0,261229.0,627.0,4823.0
7,128552.0,263225.0,623.0,5250.0
8,88648.0,227613.0,514.0,5429.0


In [114]:
df

Unnamed: 0,Pct,G18HORREP,G18HORDEM,G18HORWRI,CON_DIST,G18HORLIB,LOCALITY,COUNTYFP
1,101 - Bowling Green,675,331,0,1,0,Caroline County,033
2,102 - Sparta,482,283,1,1,0,Caroline County,033
3,201 - North Madison,398,469,0,1,0,Caroline County,033
4,202 - South Madison,596,482,0,1,0,Caroline County,033
5,301 - Port Royal,262,182,0,1,0,Caroline County,033
...,...,...,...,...,...,...,...,...
162,710 - Powells Creek,189,1154,1,11,17,Prince William County,153
163,711 - Grayson,269,1236,0,11,21,Prince William County,153
164,712 - Leesylvania,477,1075,2,11,21,Prince William County,153
165,Ab - Central Absentee Precinct,1757,5992,5,11,104,Prince William County,153


In [115]:
final

Unnamed: 0,COUNTYFP,LOCALITY,VTDST,PRECINCT,CON_DIST,G18HORDEM,G18HORREP,G18HORLIB,G18HORWRI,geometry
0,001,Accomack County,000101,Chincoteague,2,533,1039,0,1,"POLYGON Z ((-75.42507 37.89957 0.00000, -75.42..."
1,001,Accomack County,000201,Atlantic,2,171,482,0,2,"POLYGON Z ((-75.59978 37.87664 0.00000, -75.59..."
2,001,Accomack County,000202,Greenbackville,2,307,595,0,1,"POLYGON Z ((-75.49919 37.93416 0.00000, -75.49..."
3,001,Accomack County,000301,New Church,2,583,490,0,0,"POLYGON Z ((-75.64987 37.92702 0.00000, -75.64..."
4,001,Accomack County,000401,Bloxom,2,118,286,0,0,"POLYGON Z ((-75.71556 37.87513 0.00000, -75.71..."
...,...,...,...,...,...,...,...,...,...,...
2458,179,Stafford County,000401,AQUIA,1,615,247,0,0,"POLYGON Z ((-77.39221 38.48583 0.00000, -77.39..."
2459,195,Wise County,000302,East Stone Gap,9,338,1079,0,3,"POLYGON Z ((-82.80429 36.83206 0.00000, -82.80..."
2460,195,Wise County,000301,Big Stone Gap,9,416,954,0,1,"POLYGON Z ((-82.77275 36.84962 0.00000, -82.77..."
2461,089,Henry County,000503,Stanleytown,9,201,855,0,1,"POLYGON Z ((-79.96638 36.76286 0.00000, -79.96..."
