In [63]:
import os
import json
import pandas as pd
import geojson

In [64]:
import us
from us import states

In [65]:
d = '../../third_parties/elections-api/data/congress_legislators/'
state_folders = [os.path.join(d, o) for o in os.listdir(d) 
                    if os.path.isdir(os.path.join(d,o))]

In [66]:
congress2data = {}

for state_folder in state_folders:
    state = state_folder.split('/')[-1]
    for filename in os.listdir(state_folder):
        if filename.endswith(".json"):
            with open(os.path.join(state_folder, filename)) as f:
                data = json.load(f)
                aclu_id = data["id"]["aclu_id"]
                name = data["id"].get("ballotpedia")
                official_name = data["name"].get("official_full")
                party = data["terms"][-1]['party']
                district = str(data["terms"][-1].get('district'))
                state = data["terms"][-1]['state']
                legislator_type = data["terms"][-1].get('type')
                congress2data[aclu_id] = [aclu_id, name, official_name, party, district, state, legislator_type]

In [67]:
legislator_df = pd.DataFrame(congress2data.values(), columns=['aclu_id', 'name', 'official_name', 'party', 'district', 'state', 'legislator_type'])


In [68]:
house_legislator_df = legislator_df[legislator_df.legislator_type=='rep'].copy()

In [69]:
def make_code_complete(x):
    if len(x) == 1:
        return "0"+x
    else:
        return x

house_legislator_df['zeroed_district'] = house_legislator_df['district'].map(make_code_complete)

In [70]:
def add_state_fips(x):
    state = x['state']
    return states.__dict__[state].fips + x['zeroed_district']

house_legislator_df['congressional_district'] = house_legislator_df.apply(add_state_fips, axis=1)

In [71]:
house_legislator_df

Unnamed: 0,aclu_id,name,official_name,party,district,state,legislator_type,zeroed_district,congressional_district
1,aclu/elections-api/congress_legislator:1012,Peter Welch,Peter Welch,Democrat,0,VT,rep,00,5000
3,aclu/elections-api/congress_legislator:1000,,"Donald S. Beyer, Jr.",Democrat,8,VA,rep,08,5108
4,aclu/elections-api/congress_legislator:11604,,Abigail Davis Spanberger,Democrat,7,VA,rep,07,5107
5,aclu/elections-api/congress_legislator:11607,Elaine Luria,Elaine G. Luria,Democrat,2,VA,rep,02,5102
6,aclu/elections-api/congress_legislator:11606,,Denver Riggleman,Republican,5,VA,rep,05,5105
...,...,...,...,...,...,...,...,...,...
637,aclu/elections-api/congress_legislator:949,,"John J. Duncan, Jr.",Republican,2,TN,rep,02,4702
638,aclu/elections-api/congress_legislator:946,,David P. Roe,Republican,1,TN,rep,01,4701
639,aclu/elections-api/congress_legislator:947,,Diane Black,Republican,6,TN,rep,06,4706
642,aclu/elections-api/congress_legislator:952,,Scott DesJarlais,Republican,4,TN,rep,04,4704


In [72]:
with open('../../data/congress/cb_2018_us_cd116_500k.geojson') as f:
    districts = geojson.load(f)
    
geoid2dist = {}

for feat in districts['features']:
    coords = feat["geometry"]['coordinates']
    geotype = feat["geometry"]['type']
    geoid = feat['properties']['GEOID']
    geoid2dist[geoid] = feat["geometry"]

In [73]:
def get_dist(x):
    try:
        return geoid2dist[x]
    except:
        print(x)
        return None

house_legislator_df['geometry'] = house_legislator_df.congressional_district.apply(lambda x: get_dist(x))

7800
6600
6600
7200
6900
1100
6000


In [74]:
house_legislator_df[house_legislator_df.geometry.notnull()]

Unnamed: 0,aclu_id,name,official_name,party,district,state,legislator_type,zeroed_district,congressional_district,geometry
1,aclu/elections-api/congress_legislator:1012,Peter Welch,Peter Welch,Democrat,0,VT,rep,00,5000,"{'type': 'Polygon', 'coordinates': [[[-73.4377..."
3,aclu/elections-api/congress_legislator:1000,,"Donald S. Beyer, Jr.",Democrat,8,VA,rep,08,5108,"{'type': 'Polygon', 'coordinates': [[[-77.2457..."
4,aclu/elections-api/congress_legislator:11604,,Abigail Davis Spanberger,Democrat,7,VA,rep,07,5107,"{'type': 'Polygon', 'coordinates': [[[-78.3600..."
5,aclu/elections-api/congress_legislator:11607,Elaine Luria,Elaine G. Luria,Democrat,2,VA,rep,02,5102,"{'type': 'MultiPolygon', 'coordinates': [[[[-7..."
6,aclu/elections-api/congress_legislator:11606,,Denver Riggleman,Republican,5,VA,rep,05,5105,"{'type': 'Polygon', 'coordinates': [[[-80.2319..."
...,...,...,...,...,...,...,...,...,...,...
637,aclu/elections-api/congress_legislator:949,,"John J. Duncan, Jr.",Republican,2,TN,rep,02,4702,"{'type': 'MultiPolygon', 'coordinates': [[[[-8..."
638,aclu/elections-api/congress_legislator:946,,David P. Roe,Republican,1,TN,rep,01,4701,"{'type': 'Polygon', 'coordinates': [[[-83.7916..."
639,aclu/elections-api/congress_legislator:947,,Diane Black,Republican,6,TN,rep,06,4706,"{'type': 'Polygon', 'coordinates': [[[-87.1502..."
642,aclu/elections-api/congress_legislator:952,,Scott DesJarlais,Republican,4,TN,rep,04,4704,"{'type': 'Polygon', 'coordinates': [[[-87.2967..."


In [75]:
house_legislator_df.to_csv("../../data/congress_data.csv", index=False)