In [1]:
!pip install numpy
!pip install pandas
!pip install geopandas



In [2]:
import numpy as np
import pandas as pd
import geopandas as gpd
import json
import requests
import urllib.parse
import subprocess
import os

## Cloning Data

In [3]:
gh_api = 'https://api.github.com/'
gh_entity = 'orgs/'
gh_account = 'mggg-states/'
gh_endpoint = 'repos'

gh_api_url = gh_api + gh_entity + gh_account + gh_endpoint

raw_response = requests.get(gh_api_url)
response = json.loads(raw_response.text)
cmds = [['git', 'clone', repo['clone_url']] for repo in response]

cmds

[['git', 'clone', 'https://github.com/mggg-states/PA-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/MA-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/WI-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/AK-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/OH-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/TX-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/GA-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/IL-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/NC-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/UT-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/VA-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/VT-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/MI-shapefiles.git'],
 ['git', 'clone', 'https://github.com/mggg-states/IA-shapefiles.git'],
 ['git

In [4]:
# TODO: add return code checking
list(map(lambda cmd : subprocess.run(cmd), cmds))

[CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/PA-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/MA-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/WI-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/AK-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/OH-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/TX-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/GA-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/IL-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git', 'clone', 'https://github.com/mggg-states/NC-shapefiles.git'], returncode=0),
 CompletedProcess(args=['git

## Summing Data

In [5]:
# Grabbing files
files_to_sum = []
subdirs = []
for _, dirs, _ in os.walk('.'):
    for directory in dirs:
        if not directory.startswith('.'):
            subdirs.append(directory)

for directory in subdirs:
    for path, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.zip'):
                files_to_sum.append(os.path.join(path, file))

files_to_sum

['MN-shapefiles/MN12.zip',
 'MN-shapefiles/MN16.zip',
 'MN-shapefiles/MN14.zip',
 'MN-shapefiles/MN12_18.zip',
 'MN-shapefiles/Archived/MN_16.zip',
 'MN-shapefiles/Archived/MN_14.zip',
 'MN-shapefiles/Archived/MN_10.zip',
 'MN-shapefiles/Archived/MN_12.zip',
 'MN-shapefiles/Archived/MN_08.zip',
 'MN-shapefiles/Archived/mn_08_16.zip',
 'VT-shapefiles/VT_towns.zip',
 'OH-shapefiles/OH_precincts.zip',
 'AK-shapefiles/AK_precincts.zip',
 'GA-shapefiles/GA_precincts.zip',
 'HI-shapefiles/HI_precincts.zip',
 'WI-shapefiles/WI_wards_12_16.zip',
 'AZ-shapefiles/az_precincts.zip',
 'CO-shapefiles/CO_precincts.zip',
 'MA-shapefiles/MA_no_islands_02_10.zip',
 'MA-shapefiles/MA_precincts_12_16.zip',
 'MA-shapefiles/MA_no_islands_12_16.zip',
 'MA-shapefiles/MA_precincts_02_10.zip',
 'MD-shapefiles/MD_precincts_abs.zip',
 'MD-shapefiles/MD_precincts.zip',
 'NC-shapefiles/NC_VTD.zip',
 'UT-shapefiles/UT_precincts.zip',
 'MI-shapefiles/MI_precincts.zip',
 'CT-shapefiles/CT_precincts.zip',
 'RI-shapefi

### File load

In [8]:
mggg_path_to_file = ''

all_totals = {}

offices_to_sum = ['PRES', 'SEN']
years_to_sum = ['18', '16', '14', '12']
parties_to_sum = ['D', 'R']

generated_columns = {office + year + party for office in offices_to_sum 
                                           for year in years_to_sum 
                                           for party in parties_to_sum}
generated_columns

{'PRES12D',
 'PRES12R',
 'PRES14D',
 'PRES14R',
 'PRES16D',
 'PRES16R',
 'PRES18D',
 'PRES18R',
 'SEN12D',
 'SEN12R',
 'SEN14D',
 'SEN14R',
 'SEN16D',
 'SEN16R',
 'SEN18D',
 'SEN18R'}

### Summation Functions

In [15]:
def sum_values(possible_columns, mggg_gdf):
    columns_to_sum = possible_columns.intersection(mggg_gdf.columns)
    totals = [(column, mggg_gdf[column].sum()) for column in columns_to_sum]
    totals.sort(key=lambda x : x[0])
    return totals

def get_mggg_gdf(path_to_zip):
    return gpd.read_file('zip://' + path_to_zip)

# TODO: consistency check of columns? requires a complete list

### Sums

Note: Done manually one-by-one due to inconsistencies across files

In [13]:
# Alaska
mggg_gdf = get_mggg_gdf( 'AK-shapefiles/AK_precincts.zip')
mggg_gdf.columns # for file validation and column checking

Index(['ID', 'AREA', 'DISTRICT', 'NAME', 'POPULATION', 'USH14D', 'USH14R',
       'USH14L', 'PRES16D', 'PRES16R', 'PRES16L', 'PRES16G', 'PRES16C',
       'SEN16D', 'SEN16R', 'SEN16L', 'USH16D', 'USH16R', 'USH16L', 'GOV18D',
       'GOV18R', 'GOV18L', 'USH18D', 'USH18R', 'HDIST', 'TOTPOP', 'WHITE',
       'BLACK', 'AMIN', 'ASIAN', 'NHPI', 'OTHER', 'VAP', 'WVAP', 'BVAP',
       'AMINVAP', 'ASIANVAP', 'NHPIVAP', 'OTHERVAP', '2MOREVAP', '2MORE',
       'geometry'],
      dtype='object')

In [14]:
all_totals['Alaska'] = sum_values(generated_columns, mggg_gdf.head())
all_totals

{'Alaska': [('PRES16D', 771),
  ('PRES16R', 1071),
  ('SEN16D', 219),
  ('SEN16R', 985)]}

In [16]:
# Arizona
mggg_gdf = get_mggg_gdf( 'AZ-shapefiles/az_precincts.zip')
mggg_gdf.columns

Index(['COUNTY', 'CNTYABV', 'CODE', 'PRECINCT', 'PCTNAME', 'AG18D', 'AG18R',
       'GOV18D', 'GOV18R', 'SOS18D', 'SOS18R', 'SSEN18D', 'SSEN18R', 'TRE18D',
       'TRE18R', 'USH18D', 'USH18G', 'USH18R', 'SEN18D', 'SEN18R', 'TOTPOP',
       'NH_WHITE', 'NH_BLACK', 'NH_AMIN', 'NH_ASIAN', 'NH_NHPI', 'NH_OTHER',
       'NH_2MORE', 'HISP', 'H_WHITE', 'H_BLACK', 'H_AMIN', 'H_ASIAN', 'H_NHPI',
       'H_OTHER', 'H_2MORE', 'VAP', 'HVAP', 'WVAP', 'BVAP', 'AMINVAP',
       'ASIANVAP', 'NHPIVAP', 'OTHERVAP', '2MOREVAP', 'CD', 'HDIST', 'SEND',
       'geometry'],
      dtype='object')

In [17]:
all_totals['Arizona'] = sum_values(generated_columns, mggg_gdf.head())
all_totals

{'Alaska': [('PRES16D', 771),
  ('PRES16R', 1071),
  ('SEN16D', 219),
  ('SEN16R', 985)],
 'Arizona': [('SEN18D', 19), ('SEN18R', 23)]}

## Clean Directory

In [None]:
!echo y | rm -r ./*-shapefiles/