In [30]:
import pandas as pd
from congress import Congress
import time
import sys
import os
import geopandas
import numpy as np

In [2]:
api_key = 'XXXX'
congress = Congress(api_key)

In [3]:
def get_house_data(congress_obj, start_congress, end_congress=116):
    for i in range(start_congress, end_congress + 1):
        df = pd.DataFrame(columns=['id', 
                'first_name', 'last_name', 'dob', 
                'gender', 'party', 'leadership_role',
                'govtrack_id', 'cspan_id', 'seniority',
                'total_votes', 'missed_votes', 'total_present',
                'state', 'district', 'at_large', 'geoid'])
        chamber_data = congress_obj.members.filter(chamber='house', congress=i)
        members = chamber_data[0]['members']
        for member in members:
            if 'district' not in member.keys():
                district = None
            else:
                district = member['district']
            
            if 'at_large' not in member.keys():
                at_large = None
            else:
                at_large = member['at_large']
                
            if 'geoid' not in member.keys():
                geoid = None
            else:
                geoid = member['geoid']
            
            
            df = df.append({
                'id' : member['id'],
                'first_name' : member['first_name'], 
                'last_name' : member['last_name'], 
                'dob' : member['date_of_birth'], 
                'gender' : member['gender'],
                'party' : member['party'],
                'leadership_role' : member['leadership_role'],
                'govtrack_id' : member['govtrack_id'],
                'cspan_id' : member['cspan_id'],
                'seniority' : member['seniority'],
                'total_votes' : member['total_votes'],
                'missed_votes' : member['missed_votes'],
                'total_present' : member['total_present'],
                'state' : member['state'],
                'district' : district,
                'at_large' : at_large,
                'geoid' : geoid
            }, ignore_index=True)
        
        df.to_csv(
            ('../congress-data/%s/%s_%s.csv' % 
             ('house', 'house', str(i))))
        time.sleep(3)

In [4]:
def get_senate_data(congress_obj, start_congress, end_congress=116):
    for i in range(start_congress, end_congress + 1):
        df = pd.DataFrame(columns=['id', 
                'first_name', 'last_name', 'dob', 
                'gender', 'party', 'leadership_role',
                'govtrack_id', 'cspan_id', 'seniority',
                'total_votes', 'missed_votes', 'total_present',
                'state', 'senate_class'])
        chamber_data = congress_obj.members.filter(chamber='senate', congress=i)
        members = chamber_data[0]['members']
        for member in members:
            
            df = df.append({
                'id' : member['id'],
                'first_name' : member['first_name'], 
                'last_name' : member['last_name'], 
                'dob' : member['date_of_birth'], 
                'gender' : member['gender'],
                'party' : member['party'],
                'leadership_role' : member['leadership_role'],
                'govtrack_id' : member['govtrack_id'],
                'cspan_id' : member['cspan_id'],
                'seniority' : member['seniority'],
                'total_votes' : member['total_votes'],
                'missed_votes' : member['missed_votes'],
                'total_present' : member['total_present'],
                'state' : member['state'],
                'senate_class' : member['senate_class']
            }, ignore_index=True)
        
        df.to_csv(
            ('../congress-data/%s/%s_%s.csv' % 
             ('senate', 'senate', str(i))))
        time.sleep(3)

In [5]:
def set_of_all_ids():
    cwd = os.getcwd()
    chambers = ['house', 'senate']
    id_set = set()
    for chamber in chambers:
        fpath = cwd + '../congress-data/' + chamber + '/'
        files = os.listdir(fpath)
        for file in files:
            whole_path = fpath + file
            df = pd.read_csv(whole_path, index_col=0)
            ids = df['id'].values
            id_set.update(ids)
            
    return id_set

In [6]:
def get_member_data(congress_obj, member_ids):
    #member_ids = set_of_all_ids()
    for member_id in member_ids:
        df = pd.DataFrame(columns=['id', 
                'first_name', 
                'last_name', 
                'dob', 
                'gender', 
                'current_party', 
                'govtrack_id', 
                'cspan_id',       
                'congress', 
                'chamber',
                'state', 
                'district',
                'senate_class', 
                'party', 
                'leadership_role',
                'seniority',
                'total_votes', 
                'missed_votes', 
                'total_present',
                'bills_sponsored', 
                'bills_cosponsored',
                'votes_with_party_pct',
                'votes_against_party_pct'
                'committees', 
                'subcommittees'])
        
        member = congress_obj.members.get(member_id)
        
        mem_id = member['id']
        fname = member['first_name']
        lname = member['last_name']
        dob = member['date_of_birth']
        gender = member['gender']
        curr_party = member['current_party']
        govtrack_id = member['govtrack_id']
        cspan_id = member['cspan_id']


        roles = member['roles']
        for role in roles:
            cong = role['congress']
            cham = role['chamber']
            state = role['state']
            party = role['party']
            lead_role = role['leadership_role']
            seniority = role['seniority']
            total_votes = role['total_votes']
            missed_votes = role['missed_votes']
            total_pres = role['total_present']
            committees = len(role['committees'])
            subcommittees = len(role['subcommittees'])

            if 'district' not in role.keys():
                dist = None
            else:
                dist = role['district']

            if 'senate_class' not in role.keys():
                senate_class = None
            else:
                senate_class = role['senate_class']

            if 'votes_with_party_pct' not in role.keys():
                vote_w_prty_pct = None
            else:
                vote_w_prty_pct = role['votes_with_party_pct']

            if 'votes_against_party_pct' not in role.keys():
                vote_a_prty_pct = None
            else:
                vote_a_prty_pct = role['votes_against_party_pct']
                
                
            if 'bills_sponsored' not in role.keys():
                bills_spons = None
            else:
                bills_spons = role['bills_sponsored']
                
            if 'bills_cosponsored' not in role.keys():
                bills_cospons = None
            else:
                bills_cospons = role['bills_cosponsored']

            df = df.append({
                'id' : mem_id,
                'first_name' : fname, 
                'last_name' : lname, 
                'dob' : dob, 
                'gender' : gender, 
                'current_party' : curr_party, 
                'govtrack_id' : govtrack_id, 
                'cspan_id' : cspan_id,       
                'congress' : cong, 
                'chamber' : cham,
                'state' : state, 
                'district' : dist,
                'senate_class' : senate_class, 
                'party' : party, 
                'leadership_role' : lead_role,
                'seniority' : seniority,
                'total_votes' : total_votes, 
                'missed_votes' : missed_votes, 
                'total_present' : total_pres,
                'bills_sponsored' : bills_spons, 
                'bills_cosponsored' : bills_cospons,
                'votes_with_party_pct' : vote_w_prty_pct,
                'votes_against_party_pct' : vote_a_prty_pct,
                'committees' : committees, 
                'subcommittees' : subcommittees
            }, ignore_index=True)
            
        df.to_csv(
            ('../congress-data/members/%s.csv' % (str(mem_id))))
        time.sleep(3)
        

In [34]:
def add_state_abbrevs(df):
    states = {'Alabama' : 'AL',
              'Alaska' : 'AK',
              'Arizona' : 'AZ',
              'Arkansas' : 'AR',
              'California' : 'CA',
              'Colorado' : 'CO',
              'Connecticut' : 'CT',
              'Delaware' : 'DE',
              'District Of Columbia' : 'DC',
              'Florida' : 'FL',
              'Georgia' : 'GA',
              'Hawaii' : 'HI',
              'Idaho' : 'ID',
              'Illinois' : 'IL',
              'Indiana' : 'IN',
              'Iowa' : 'IA',
              'Kansas' : 'KS',
              'Kentucky' : 'KY',
              'Louisiana' : 'LA',
              'Maine' : 'ME',
              'Maryland' : 'MD',
              'Massachusetts' : 'MA',
              'Michigan' : 'MI',
              'Minnesota' : 'MN',
              'Mississippi' : 'MS',
              'Missouri' : 'MO',
              'Montana' : 'MT',
              'Nebraska' : 'NE',
              'Nevada' : 'NV',
              'New Hampshire' : 'NH',
              'New Jersey' : 'NJ',
              'New Mexico' : 'NM',
              'New York' : 'NY',
              'North Carolina' : 'NC',
              'North Dakota' : 'ND',
              'Ohio' : 'OH',
              'Oklahoma' : 'OK',
              'Oregon' : 'OR',
              'Pennsylvania' : 'PA',
              'Rhode Island' : 'RI',
              'South Carolina' : 'SC',
              'South Dakota' : 'SD',
              'Tennessee' : 'TN',
              'Texas' : 'TX',
              'Utah' : 'UT',
              'Vermont' : 'VT',
              'Virginia' : 'VA',
              'Washington' : 'WA',
              'West Virginia' : 'WV',
              'Wisconsin' : 'WI',
              'Wyoming' : 'WY'}

    series = df['STATENAME'].values

    series = np.array([states[key] for key in series])
    df['STATE ABBREV'] = series

    return df