In [195]:
import os
import geojson

In [196]:
d = '../../third_parties/elections-api/data/state_leg/'
state_folders = [os.path.join(d, o) for o in os.listdir(d) 
                    if os.path.isdir(os.path.join(d,o))]

In [197]:
state2files = {}
records = []

for state_folder in state_folders:
    state = state_folder.split('/')[-1]
    state2files[state] = []
    for filename in os.listdir(state_folder):
        if filename.endswith(".geojson") and not filename.endswith('display.geojson'):
            state2files[state].append(os.path.join(state_folder, filename))
            with open(os.path.join(state_folder, filename)) as f:
                data = geojson.load(f)
                chamber = data['properties']['chamber']
                district_num = data['properties']['district_num']
                geoid = data['properties']['geoid']
                state = data['properties']['state']
                aclu_id = data['properties']['aclu_id']
                name = data['properties']['name']
                records.append([geoid, district_num, state, aclu_id, chamber, data['geometry'], name])

In [198]:
geo_df = pd.DataFrame.from_records(records, columns=['geoid', 
                                                     'district_num', 
                                                     'state', 
                                                     'aclu_id', 
                                                     'chamber', 
                                                     'geometry',
                                                     'name'])

In [199]:
state2files.keys()

dict_keys(['vt', 'va', 'sd', 'sc', 'ut', 'ga', 'ms', 'mt', 'mo', 'ma', 'ak', 'ky', 'al', 'nh', 'mn', 'mi', 'ok', 'in', 'co', 'ca', 'ia', 'ct', 'fl', 'wv', 'ri', 'wy', 'tx', 'pr', 'pa', 'nc', 'nd', 'nm', 'nj', 'me', 'ar', 'nv', 'dc', 'md', 'ks', 'ne', 'hi', 'de', 'az', 'ny', 'id', 'oh', 'or', 'il', 'la', 'wi', 'wa', 'tn'])

In [200]:
pd.set_option('display.max_rows', 300)

In [201]:
people_df.shape

(7461, 27)

In [202]:
d = '../../third_parties/people/data/'
state_folders = [os.path.join(d, o) for o in os.listdir(d) 
                    if os.path.isdir(os.path.join(d,o))]

In [203]:
state2files = {}

for state_folder in state_folders:
    state = state_folder.split('/')[-1]
    state2files[state] = []
    for filename in os.listdir(os.path.join(state_folder, 'legislature')):
        if filename.endswith(".yml"):
            state2files[state].append(os.path.join(state_folder, filename))

In [204]:
import yaml
from tqdm import tqdm
import requests
import io
import pandas as pd

In [205]:
url_pattern = 'https://data.openstates.org/people/current/{}.csv'

datas = []
for st in state2files.keys():
    url = url_pattern.format(st)
    r = requests.get(url)
    tmp_df = pd.read_csv(io.BytesIO(r.content), encoding='utf8')
    tmp_df['state'] = st
    datas.append(tmp_df)

In [206]:
people_df = pd.concat(datas)

In [207]:
people_df.shape

(7461, 26)

In [209]:
def get_district_from_people(row):
    return "{}__{}__{}".format(row['state'], row['current_chamber'], str(row['current_district']))

people_df['oen_district'] = people_df.apply(lambda x: get_district_from_people(x), axis=1)

In [210]:
house_mapping_csv_url = 'https://raw.githubusercontent.com/democrats/data/master/elected-officials-roster/state_house_elected_officials.csv'
senate_mapping_csv_url = 'https://raw.githubusercontent.com/democrats/data/master/elected-officials-roster/state_senate_elected_officials.csv'

In [211]:
r = requests.get(house_mapping_csv_url)
house_mapping_df = pd.read_csv(io.BytesIO(r.content), encoding='utf8')

r = requests.get(senate_mapping_csv_url)
senate_mapping_df = pd.read_csv(io.BytesIO(r.content), encoding='utf8')

state_mapping_df = pd.concat([house_mapping_df, senate_mapping_df])

In [212]:
state_mapping_df['complete_geoid'] = state_mapping_df['geoid'].apply(lambda x: "0"+x if len(x)==4 else x)

In [213]:
geoid2district = dict(zip(state_mapping_df.complete_geoid, state_mapping_df.district))

In [214]:
def get_district_from_geoid(x):
    try:
        return geoid2district[x]
    except:
        return None

In [215]:
geo_df['district'] = geo_df['geoid'].map(lambda x: get_district_from_geoid(x))

In [216]:
def get_oen_district_from_geo(row):
    return "{}__{}__{}".format(row['state'], row['chamber'], str(row['district']))

geo_df['oen_district'] = geo_df.apply(lambda x: get_oen_district_from_geo(x), axis=1)

In [217]:
people_df

Unnamed: 0,id,name,current_party,current_district,current_chamber,given_name,family_name,gender,email,biography,...,capitol_fax,district_address,district_voice,district_fax,twitter,youtube,instagram,facebook,state,oen_district
0,ocd-person/24194dba-4d25-4b8c-be33-8ff50fcd8a8f,Alice M. Emmons,Democratic,Windsor-3-2,lower,Alice,Emmons,,aemmons61@hotmail.com,,...,,"318 Summer St.;Springfield, VT 05156",802-885-5893,,,,,,vt,vt__lower__Windsor-3-2
1,ocd-person/c28e9c1f-b40f-48ef-943f-dfe03317b7d3,Alice W. Nitka,Democratic,Windsor,upper,Alice,Nitka,,anitka@leg.state.vt.us,,...,,"P.O. Box 136;Ludlow, VT 05149",802-228-8432,,,,,,vt,vt__upper__Windsor
2,ocd-person/a366bc32-20d8-41a8-a2cf-939650b7a2c2,Alison Clarkson,Democratic,Windsor,upper,Alison,Clarkson,,AClarkson@leg.state.vt.us,,...,,"18 Golf Ave.;Woodstock, VT 05091",802-457-4627,,,,,,vt,vt__upper__Windsor
3,ocd-person/cd4ceb1c-8960-4b10-b6f8-bd0b8ae0c68c,Alyssa Black,Democratic,Chittenden-8-3,lower,,,,,,...,,"115 State St.;Montpelier, VT 05633",802-598-1026,,,,,,vt,vt__lower__Chittenden-8-3
4,ocd-person/f490d01d-80b7-40ae-9577-572bd1c8567a,Amy Sheldon,Democratic,Addison-1,lower,Amy,Sheldon,,asheldon@leg.state.vt.us,,...,,"P.O. Box 311;East Middlebury, VT 05740",802-377-1110,,,,,,vt,vt__lower__Addison-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,ocd-person/37db89c5-6814-450a-8259-5e6d1a046e4f,Tom Leatherwood,Republican,99,lower,Tom,Leatherwood,,rep.tom.leatherwood@capitol.tn.gov,,...,615-253-0294,,,,,,,,tn,tn__lower__99
128,ocd-person/8036dedc-9be1-4935-9a37-df3f95033110,Torrey C. Harris,Democratic,90,lower,Torrey,Harris,,rep.torrey.harris@capitol.tn.gov,,...,615-253-0292,,,,,,,,tn,tn__lower__90
129,ocd-person/ee804d8a-2f07-4271-83a7-0179f4c3445c,Vincent Dixie,Democratic,54,lower,Vincent,Dixie,,rep.vincent.dixie@capitol.tn.gov,,...,615-253-0361,,,,,,,,tn,tn__lower__54
130,ocd-person/5026d19f-d4ec-47b2-b106-f318a45e8585,William Lamberth,Republican,44,lower,William,Lamberth,,rep.william.lamberth@capitol.tn.gov,,...,615-253-0336,,,,,,,,tn,tn__lower__44


In [218]:
state_df = pd.merge(geo_df, people_df, on='oen_district', suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)')

In [219]:
state_df.to_csv("../../data/state_data.csv", index=False)