# Geo-Network of Electees

Jerry Xu

Updated: 02/17/2020

TODO: this notebook needs more documentation. (02/17/2020)

Given a congressional district, how do state legistor districts overlap with this district? With this information, we can build coalition of state legislators within targeted House Members' Congressional Districts. 

In [1]:
import os
import json
import pandas as pd
import geojson
import requests
import io

In [2]:
import us
from us import states

## 1. Federal Data

`elections-api/data/congress_legislators/` has U.S. congress member information. For each congress legislator, we extract:
- name
- party
- district
- state
- geo shape of congressional district

In [3]:
external_path = '../../external/'
congress_legislators_path = 'elections-api/data/congress_legislators/'

In [4]:
d = os.path.join(external_path, congress_legislators_path)

state_folders = [os.path.join(d, o) for o in os.listdir(d) 
                    if os.path.isdir(os.path.join(d,o))]

In [5]:
congress2data = {}

for state_folder in state_folders:
    state = state_folder.split('/')[-1]
    for filename in os.listdir(state_folder):
        if filename.endswith(".json"):
            with open(os.path.join(state_folder, filename)) as f:
                data = json.load(f)
                aclu_id = data["id"]["aclu_id"]
                official_name = data["name"].get("official_full")
                party = data["terms"][-1]['party']
                district = str(data["terms"][-1].get('district'))
                state = data["terms"][-1]['state']
                legislator_type = data["terms"][-1].get('type')
                congress2data[aclu_id] = [aclu_id, official_name, party, district, state, legislator_type]

In [6]:
legislator_df = pd.DataFrame(congress2data.values(), columns=['aclu_id', 
                                                              'official_name', 
                                                              'party', 
                                                              'district', 
                                                              'state', 
                                                              'legislator_type'])

In [7]:
house_legislator_df = legislator_df[legislator_df.legislator_type=='rep'].copy()

In [8]:
house_legislator_df.sample(5)

Unnamed: 0,aclu_id,official_name,party,district,state,legislator_type
438,aclu/elections-api/congress_legislator:11661,Deborah K. Ross,Democrat,2,NC,rep
373,aclu/elections-api/congress_legislator:988,Ted Poe,Republican,2,TX,rep
233,aclu/elections-api/congress_legislator:580,Mark DeSaulnier,Democrat,11,CA,rep
389,aclu/elections-api/congress_legislator:965,Joe Barton,Republican,6,TX,rep
625,aclu/elections-api/congress_legislator:908,Suzanne Bonamici,Democrat,1,OR,rep


given `state` and `district`, let's make `congressional_district` code

In [9]:
def make_congressional_district(x):
    state = x['state']

    district_id = x['district']
    if len(district_id) == 1:
        district_id = "0"+district_id

    return states.__dict__[state].fips + district_id

In [10]:
house_legislator_df['congressional_district'] = house_legislator_df.apply(make_congressional_district, axis=1)

In [11]:
house_legislator_df.head()

Unnamed: 0,aclu_id,official_name,party,district,state,legislator_type,congressional_district
1,aclu/elections-api/congress_legislator:1012,Peter Welch,Democrat,0,VT,rep,5000
3,aclu/elections-api/congress_legislator:1000,"Donald S. Beyer, Jr.",Democrat,8,VA,rep,5108
4,aclu/elections-api/congress_legislator:11604,Abigail Davis Spanberger,Democrat,7,VA,rep,5107
5,aclu/elections-api/congress_legislator:11607,Elaine G. Luria,Democrat,2,VA,rep,5102
6,aclu/elections-api/congress_legislator:11606,Denver Riggleman,Republican,5,VA,rep,5105


In [12]:
with open('../../data/census/cb_2018_us_cd116_500k.geojson') as f:
    districts = geojson.load(f)
    
geoid2dist = {}

for feat in districts['features']:
    coords = feat["geometry"]['coordinates']
    geotype = feat["geometry"]['type']
    geoid = feat['properties']['GEOID']
    geoid2dist[geoid] = feat["geometry"]

In [13]:
house_legislator_df['geometry'] = house_legislator_df.congressional_district.apply(lambda x: geoid2dist.get(x))

In [14]:
house_legislator_df.drop('geometry', axis=1).to_csv("../../data/congress_data_lite.csv", index=False)

uncomment the following line to save all columns (including geometry)

In [15]:
# house_legislator_df.to_csv("../../data/congress_data.csv", index=False)

## 2. State

`elections-api/data/state_leg/` has congress member information for each state. For each congress legislator, we extract multiple attributes, and geo data of their district. 

In [16]:
external_path = '../../external/'
state_legislators_path = 'elections-api/data/state_leg/'

d = os.path.join(external_path, state_legislators_path)
all_state_folders = [os.path.join(d, o) for o in os.listdir(d) if os.path.isdir(os.path.join(d,o))]

In [17]:
state2files = {}
records = []

for state_folder in all_state_folders:
    state = state_folder.split('/')[-1]
    state2files[state] = []
    for filename in os.listdir(state_folder):
        if filename.endswith(".geojson") and not filename.endswith('display.geojson'):
            state2files[state].append(os.path.join(state_folder, filename))
            with open(os.path.join(state_folder, filename)) as f:
                data = geojson.load(f)
                chamber = data['properties']['chamber']
                district_num = data['properties']['district_num']
                geoid = data['properties']['geoid']
                state = data['properties']['state']
                aclu_id = data['properties']['aclu_id']
                name = data['properties']['name']
                records.append([geoid, district_num, state, aclu_id, chamber, data['geometry'], name])

In [18]:
geo_df = pd.DataFrame.from_records(records, columns=['geoid', 
                                                     'district_num', 
                                                     'state', 
                                                     'aclu_id', 
                                                     'chamber', 
                                                     'geometry',
                                                     'name'])

In [19]:
people_path = 'people/data/'

d = os.path.join(external_path, people_path)
state_folders = [os.path.join(d, o) for o in os.listdir(d) 
                    if os.path.isdir(os.path.join(d,o))]

In [20]:
state2files = {}

for state_folder in state_folders:
    state = state_folder.split('/')[-1]
    state2files[state] = []
    for filename in os.listdir(os.path.join(state_folder, 'legislature')):
        if filename.endswith(".yml"):
            state2files[state].append(os.path.join(state_folder, filename))

In [21]:
url_pattern = 'https://data.openstates.org/people/current/{}.csv'

datas = []
for st in state2files.keys():
    url = url_pattern.format(st)
    r = requests.get(url)
    tmp_df = pd.read_csv(io.BytesIO(r.content), encoding='utf8')
    tmp_df['state'] = st
    datas.append(tmp_df)

In [22]:
people_df = pd.concat(datas)

In [23]:
def get_district_from_people(row):
    return "{}__{}__{}".format(row['state'], row['current_chamber'], str(row['current_district']))

people_df['oen_district'] = people_df.apply(lambda x: get_district_from_people(x), axis=1)

In [24]:
house_mapping_csv_url = 'https://raw.githubusercontent.com/democrats/data/master/elected-officials-roster/state_house_elected_officials.csv'
senate_mapping_csv_url = 'https://raw.githubusercontent.com/democrats/data/master/elected-officials-roster/state_senate_elected_officials.csv'

In [25]:
r = requests.get(house_mapping_csv_url)
house_mapping_df = pd.read_csv(io.BytesIO(r.content), encoding='utf8')

r = requests.get(senate_mapping_csv_url)
senate_mapping_df = pd.read_csv(io.BytesIO(r.content), encoding='utf8')

state_mapping_df = pd.concat([house_mapping_df, senate_mapping_df])

In [26]:
state_mapping_df['complete_geoid'] = state_mapping_df['geoid'].apply(lambda x: "0"+x if len(x)==4 else x)

In [27]:
geoid2district = dict(zip(state_mapping_df.complete_geoid, state_mapping_df.district))

In [28]:
geo_df['district'] = geo_df['geoid'].map(lambda x: geoid2district.get(x))

In [29]:
def get_oen_district_from_geo(row):
    return "{}__{}__{}".format(row['state'], row['chamber'], str(row['district']))

geo_df['oen_district'] = geo_df.apply(lambda x: get_oen_district_from_geo(x), axis=1)

In [30]:
state_df = pd.merge(geo_df, people_df, on='oen_district', suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)')

In [31]:
state_df.head()

Unnamed: 0,geoid,district_num,state,aclu_id,chamber,geometry,name,district,oen_district,id,...,capitol_address,capitol_voice,capitol_fax,district_address,district_voice,district_fax,twitter,youtube,instagram,facebook
0,50ECO,ECO,vt,aclu/elections-api/state_leg:6904,lower,"{'type': 'Polygon', 'coordinates': [[[-72.1141...",Essex-Caledonia-Orleans State House District,Essex-Caledonia-Orleans,vt__lower__Essex-Caledonia-Orleans,ocd-person/dab673f8-4a06-4563-b1ed-6180675cd37c,...,Vermont State House;115 State Street;Montpelie...,,,"P.O. Box 397;Island Pond, VT 05846",802-467-8338,,,,,
1,50E-C,E-C,vt,aclu/elections-api/state_leg:6903,lower,"{'type': 'Polygon', 'coordinates': [[[-71.9581...",Essex-Caledonia State House District,Essex-Caledonia,vt__lower__Essex-Caledonia,ocd-person/3ccfc509-4351-477a-869a-849eb6f10a5d,...,Vermont State House;115 State Street;Montpelie...,,,"115 State St;Montpelier, VT 05633",802-535-4704,,,,,
2,50BEN,BEN,vt,aclu/elections-api/state_leg:6967,upper,"{'type': 'Polygon', 'coordinates': [[[-73.2909...",Bennington State Senate District,Bennington,vt__upper__Bennington,ocd-person/5e6451d0-e410-48e5-854a-bcd0a0a5b3c0,...,Vermont State House;115 State Street;Montpelie...,,,"1292 West Rd.;Bennington, VT 05201",,,,,,
3,50BEN,BEN,vt,aclu/elections-api/state_leg:6967,upper,"{'type': 'Polygon', 'coordinates': [[[-73.2909...",Bennington State Senate District,Bennington,vt__upper__Bennington,ocd-person/ffba43ff-20d1-492d-a5f7-22855c1fbba4,...,Vermont State House;115 State Street;Montpelie...,,,"343 Matteson Rd.;North Bennington, VT 05257",,,,,,
4,50WAC,WAC,vt,aclu/elections-api/state_leg:6953,lower,"{'type': 'Polygon', 'coordinates': [[[-73.0326...",Washington-Chittenden State House District,Washington-Chittenden,vt__lower__Washington-Chittenden,ocd-person/f68d3f17-24ca-4b87-8837-6a30737d21d8,...,Vermont State House;115 State Street;Montpelie...,,,"115 State St.;Montpelier, VT 05633",,,,,,


In [32]:
state_df.drop('geometry', axis=1).to_csv("../../data/state_legsislature_data_lite.csv", index=False)

uncomment the following line to save all columns (including geometry)

In [33]:
# state_df.to_csv("../../data/state_legsislature_data.csv", index=False)

## 3. Geometry

``` I have a pen, I have pineapple / Uh! Pineapple-Pen!```

- I have U.S. Congressional Districts, I have state legislator districts
- Uh! Let's see how they overlap

In [34]:
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.geometry import shape

In [35]:
def get_polygon(x):
    coordinates = x['coordinates']
    collect = []
    def make_polygons(coordinates):
        try:
            collect.append(Polygon(coordinates))
        except:
            for x in coordinates:
                make_polygons(x)
    make_polygons(coordinates)
    return MultiPolygon(collect)

In [36]:
type(house_legislator_df.geometry.iloc[3])

geojson.geometry.MultiPolygon

In [37]:
records = {}

for index, congress_person in house_legislator_df[house_legislator_df.geometry.notnull()].iterrows():
    state = congress_person['state'].lower()
    congressional_district = congress_person['congressional_district']
    records[congressional_district] = []
    
    state_subset_df = state_df[state_df.state == state]
    map_obj = shape(congress_person['geometry'])
    for index, state_person in state_subset_df.iterrows():
        state_map_obj = shape(state_person['geometry'])
        oen_district = state_person['oen_district']
        if map_obj.intersects(state_map_obj): 
            if map_obj.intersection(state_map_obj).area> 0.001:
                records[congressional_district].append(oen_district)

In [38]:
records_list = []

for k, v in records.items():
    records_list.append([k, ", ".join(v)])

In [39]:
df = pd.DataFrame.from_records(records_list, columns=['congressional_district', 'overlapping_oen_district'])

In [40]:
df.head()

Unnamed: 0,congressional_district,overlapping_oen_district
0,5000,"vt__lower__Essex-Caledonia-Orleans, vt__lower_..."
1,5108,"va__lower__49, va__lower__48, va__upper__32, v..."
2,5107,"va__upper__15, va__lower__66, va__upper__4, va..."
3,5102,"va__lower__82, va__upper__14, va__lower__83, v..."
4,5105,"va__lower__22, va__upper__23, va__lower__14, v..."


In [41]:
df.to_csv('../../data/overlapping.csv')