In [472]:
# importing the requests library
import requests
import json
import numpy as np
from functools import reduce
from tqdm import *

In [462]:
def get_result(url):
    # defining a params dict for the parameters to be sent to the API
    header = { 'X-API-KEY':"gXhJxaWsNLUTfL9UeGwak4hf1F8fcunI7AR7HfCV"}
    # sending get request and saving the response as response object
    r = requests.get(url = url, headers = header)
    # extracting data in json format
    try:
        data = r.json()['results']
    except Exception:
        print("Error getting data")
        return None
    return data

def generate_urls_recent_votes(number):
    urls = []
    offset = 0
    for i in range(0, number):
        url = "https://api.propublica.org/congress/v1/house/votes/recent.json?offset={}".format(i * 20)
        urls.append(url)
    return np.array(urls)

In [463]:
# GET BILL URLS
urls = generate_urls_recent_votes(500) # 10000 BILLS
urls[:-10]

array(['https://api.propublica.org/congress/v1/house/votes/recent.json?offset=0',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=20',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=40',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=60',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=80',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=100',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=120',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=140',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=160',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=180',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=200',
       'https://api.propublica.org/congress/v1/house/votes/recent.json?offset=220'

In [464]:
recent_votes_results = [get_result(url) for url in urls]
removed_nones_recent_votes_results = [x for x in recent_votes_results if x is not None]
recent_votes_results_flat = [vote for votes in removed_nones_recent_votes_results for vote in votes['votes']]

In [476]:
# BILLS INFORMATION TABLE

def get_bill(vote):
    bill_uri = vote['bill']['api_uri']
    if bill_uri is None:
        return None
    bill = get_result(bill_uri)[0]
    if bill is not None:
        return bill

In [477]:
# PROCESS VOTES

def process_vote(vote):    
    bill = get_bill(vote)
    if bill is None:
        return None, None
    
    bill_slug = bill['bill_slug']
    bill_summary = bill['summary']
    bill_date = vote['date']
    bill_type = bill['bill_type']
    congress = bill['congress']
    bill_uri = bill['bill_uri']
    subjects = bill['primary_subject']
    
    cols = ['bill_slug', 'summary', 'date', 'bill_type', 'congress', 'bill_uri', 'subjects']
    values = [[bill_slug, bill_summary,bill_date,bill_type, congress, bill_uri, subjects]]
    bill_row_df = pd.DataFrame(values, columns=cols)
    
    actual_vote = get_result(vote['vote_uri'])
    positions = actual_vote['votes']['vote']['positions']
    
    vote_df = pd.DataFrame(positions)
    vote_df = vote_df.rename(columns={'vote_position': bill_slug}).drop('dw_nominate', 1)
    return vote_df, bill_row_df

In [478]:
# HACK TO CREATE DUMMY TABLE - SKETCH
vote_df, bill_row = process_vote(recent_votes_results_flat[0])
vote_df.drop(vote_df.columns[len(vote_df.columns)-1], axis=1, inplace=True)
vote_df 

Unnamed: 0,district,member_id,name,party,state
0,5,A000374,Ralph Abraham,R,LA
1,12,A000370,Alma Adams,D,NC
2,4,A000055,Robert B. Aderholt,R,AL
3,31,A000371,Pete Aguilar,D,CA
4,12,A000372,Rick Allen,R,GA
5,3,A000367,Justin Amash,R,MI
6,2,A000369,Mark Amodei,R,NV
7,19,A000375,Jodey Arrington,R,TX
8,36,B001291,Brian Babin,R,TX
9,2,B001298,Don Bacon,R,NE


In [480]:
votes_df = vote_df
bills_df = pd.DataFrame()
count = 0
for vote in tqdm(recent_votes_results_flat):
    try:
        vote_df, bill_row = process_vote(vote)
        votes_df = pd.merge(votes_df, vote_df, on=['member_id', 'name', 'district', 'state', 'party'], how='outer')
        bills_df = bills_df.append(bill_row)
    except:
        continue


  0%|          | 0/9600 [00:00<?, ?it/s][A
  0%|          | 1/9600 [00:00<1:05:34,  2.44it/s][A
  0%|          | 2/9600 [00:00<58:20,  2.74it/s]  [A
  0%|          | 3/9600 [00:01<58:37,  2.73it/s][A
  0%|          | 4/9600 [00:01<59:42,  2.68it/s][A
  0%|          | 5/9600 [00:01<57:39,  2.77it/s][A
  0%|          | 6/9600 [00:02<57:52,  2.76it/s][A
  0%|          | 7/9600 [00:02<1:01:11,  2.61it/s][A
  0%|          | 8/9600 [00:02<1:00:35,  2.64it/s][A
  0%|          | 9/9600 [00:03<55:17,  2.89it/s]  [A
  0%|          | 11/9600 [00:03<57:18,  2.79it/s][A
  0%|          | 12/9600 [00:04<1:12:47,  2.20it/s][A
  0%|          | 14/9600 [00:05<1:06:11,  2.41it/s][A
  0%|          | 15/9600 [00:05<1:17:34,  2.06it/s][A
  0%|          | 16/9600 [00:06<1:10:44,  2.26it/s][A
  0%|          | 17/9600 [00:06<1:07:59,  2.35it/s][A
  0%|          | 18/9600 [00:07<1:06:32,  2.40it/s][A
  0%|          | 19/9600 [00:07<59:22,  2.69it/s]  [A
  0%|          | 20/9600 [00:07<1:11:26

In [469]:
votes_df

Unnamed: 0,district,member_id,name,party,state
0,5,A000374,Ralph Abraham,R,LA
1,12,A000370,Alma Adams,D,NC
2,4,A000055,Robert B. Aderholt,R,AL
3,31,A000371,Pete Aguilar,D,CA
4,12,A000372,Rick Allen,R,GA
5,3,A000367,Justin Amash,R,MI
6,2,A000369,Mark Amodei,R,NV
7,19,A000375,Jodey Arrington,R,TX
8,36,B001291,Brian Babin,R,TX
9,2,B001298,Don Bacon,R,NE


In [470]:
bills_df.loc[bills_df['bill_slug'].isin(['hres619'])]

KeyError: 'bill_slug'

In [471]:
votes_df.to_csv('./voting_data_2000.csv')
bills_df.to_csv('./bill_data_2000.csv')

In [317]:
# FROM HERE ON ONLY TRASH

ob = get_result(recent_votes_results_flat[0]['bill']['api_uri'])

In [328]:
ob[0].keys()

dict_keys(['bill_id', 'bill_slug', 'congress', 'bill', 'bill_type', 'number', 'bill_uri', 'title', 'short_title', 'sponsor_title', 'sponsor', 'sponsor_id', 'sponsor_uri', 'sponsor_party', 'sponsor_state', 'gpo_pdf_uri', 'congressdotgov_url', 'govtrack_url', 'introduced_date', 'active', 'last_vote', 'house_passage', 'senate_passage', 'enacted', 'vetoed', 'cosponsors', 'cosponsors_by_party', 'withdrawn_cosponsors', 'primary_subject', 'committees', 'committee_codes', 'subcommittee_codes', 'latest_major_action_date', 'latest_major_action', 'house_passage_vote', 'senate_passage_vote', 'summary', 'summary_short', 'versions', 'actions', 'votes'])

In [230]:
# REPRESENTATIVES VS VOTES TABLE

def get_vote_table(vote):
    actual_vote = get_result(vote['vote_uri'])
    positions = actual_vote['votes']['vote']['positions']
    bill_id = actual_vote['votes']['vote']['bill']['bill_id']
    vote_df = pd.DataFrame(positions)
    vote_df = vote_df.rename(columns={'vote_position': bill_id}).drop('dw_nominate', 1)
    return vote_df

frames = [get_vote_table(vote) for vote in recent_votes_results_flat]

In [239]:
frames[7]

Unnamed: 0,district,member_id,name,party,state,hr2874-115
0,5,A000374,Ralph Abraham,R,LA,No
1,12,A000370,Alma Adams,D,NC,No
2,4,A000055,Robert B. Aderholt,R,AL,Yes
3,31,A000371,Pete Aguilar,D,CA,No
4,12,A000372,Rick Allen,R,GA,Yes
5,3,A000367,Justin Amash,R,MI,No
6,2,A000369,Mark Amodei,R,NV,Yes
7,19,A000375,Jodey Arrington,R,TX,Yes
8,36,B001291,Brian Babin,R,TX,Yes
9,2,B001298,Don Bacon,R,NE,Yes


In [242]:
def merge_votes(votes_table_list):
    return reduce(lambda left,right: pd.merge(left,right,on=['member_id', 'name', 'district', 'state', 'party'], how='outer'), votes_table_list)

final_df = merge_votes(frames)
final_df.to_csv('./initial_data.csv')

In [253]:
bill_slugs = [bill['bill_slug'] for bill in bills]
bill_slugs
bills

[{'active': False,
  'bill_id': 'hr5858-113',
  'bill_slug': 'hr5858',
  'bill_type': 'hr',
  'bill_uri': 'https://api.propublica.org/congress/v1/113/bills/hr5858.json',
  'committee_codes': ['HSJU'],
  'committees': 'House Judiciary Committee',
  'congressdotgov_url': 'https://www.congress.gov/bill/113th-congress/house-bill/5858',
  'cosponsors': 0,
  'cosponsors_by_party': {},
  'enacted': None,
  'govtrack_url': 'https://www.govtrack.us/congress/bills/113/hr5858',
  'gpo_pdf_uri': None,
  'house_passage': None,
  'introduced_date': '2014-12-11',
  'last_vote': None,
  'latest_major_action': 'Referred to the House Committee on the Judiciary.',
  'latest_major_action_date': '2014-12-11',
  'number': 'H.R.5858',
  'primary_subject': 'Crime and Law Enforcement',
  'senate_passage': None,
  'short_title': 'Build TRUST Act of 2014',
  'sponsor_id': 'J000032',
  'sponsor_name': 'Sheila Jackson Lee',
  'sponsor_party': 'D',
  'sponsor_state': 'TX',
  'sponsor_title': 'Rep.',
  'sponsor_uri'