In [None]:
import pandas as pd
import numpy as np
import requests
import json
import os
import re

data_folder = 'data/'

In [None]:
API_KEY = ""

with open('api_key.txt') as f:
    API_KEY = f.read()

HEADERS = {"X-Api-Key": API_KEY}

In [None]:
def create_folder(path):
    if not os.path.exists(path):
        os.makedirs(path)

create_folder(data_folder)
create_folder(data_folder + 'commitees')
create_folder(data_folder + 'senate_members')
create_folder(data_folder + 'votes')
create_folder(data_folder + 'lobby')
create_folder(data_folder + 'house_members')

In [None]:
# Request config
req_str = "https://api.propublica.org/congress/v1/senate/votes/recent.json?offset={offset}"

res = requests.get(req_str.format(offset=460), headers = HEADERS)

print(json.dumps(res.json(), indent=4))

# Fetching senate members (congress 80 to 115)

In [None]:
u = "https://api.propublica.org/congress/v1/{congress}/House/members.json"

for i in range(110, 115 + 1):
    results = requests.get(url = u.format(congress=i), headers=HEADERS)
    df = pd.io.json.json_normalize(results.json()['results'][0]['members'])
    df.to_csv("data/senate_members/senate_members_{congress}.csv".format(congress=i))

In [None]:
df.head()

# Fetching vote data from senate

In [None]:
# Request config
u = "https://api.propublica.org/congress/v1/senate/votes/recent.json?offset={offset}"

votes_list = []
data_available = True
request_offset = 0

while data_available :
    res = requests.get(url = u.format(offset = request_offset), headers = HEADERS)
    if('results' in res.json()):
        data_available = int(res.json()['results']['num_results']) > 0

        if data_available:
            votes = res.json()['results']['votes']
            votes_list.append(pd.io.json.json_normalize(votes, record_prefix=True))
    else:
        print(str(request_offset) + ' - Error: ' + res.json()['error'])
    request_offset += 20

In [None]:
df = pd.concat(votes_list,sort=True)
df.to_csv(data_folder + "votes/votes.csv")

In [None]:
df.head()

# Fetching Commitees from senate

In [None]:
# Request config
u = "https://api.propublica.org/congress/v1/{congress}/senate/committees.json"

for i in range(114, 115 + 1):
    create_folder(data_folder + 'commitees/commitees_' + str(i))
    results_commitee = requests.get(url = u.format(congress=i), headers=HEADERS)
    df = pd.io.json.json_normalize(results_commitee.json()['results'][0]['committees'])
    df_list = []
    for commitee_id in df['id']:
        subcommitee_id = df[df['id'] == commitee_id]['subcommittees']

        for subcommitee in subcommitee_id.values[0]:
            results_sub = requests.get(url = subcommitee['api_uri'], headers=HEADERS)
            if 'results' in results_sub.json():
                df_sub = pd.io.json.json_normalize(results_sub.json()['results'][0]['current_members'])
                df_sub['subcomitee'] = subcommitee['id']
                df_sub['comitee'] = commitee_id
                df_list.append(df_sub)

    if len(df_list) > 0:
        df_merged = pd.concat(df_list)
        df_merged.to_csv(data_folder + 'commitees/commitees_{congress}/members.csv'.format(congress = i,commitee_id = elem))

# Fetching Lobbying from senate

In [None]:
re.compile("\((.*)\"([\w ]+)\"(.*)\)")

In [None]:
# Request config
u = "https://api.propublica.org/congress/v1/lobbying/latest.json?offset={offset}"

votes_list = []
data_available = True
request_offset = 0

while data_available :
    res = requests.get(url = u.format(offset = request_offset), headers = HEADERS)
    j  = res.text.replace("\\\"","").replace("\\","").replace("\")", ")").replace("(\"", "(")

    try:
        res = json.loads(j)       
        if('results' in res):
            #print(request_offset)
            data_available = int(res['results'][0]['num_results']) > 0

            if data_available:
                votes = res['results'][0]['lobbying_representations']
                votes_list.append(pd.io.json.json_normalize(votes, record_prefix=True))
        else:
            print(str(request_offset) + ' - Error: ' + res['error'])
    except:
        print(str(request_offset) + ' - Error: Json File badly encoded')
    request_offset += 20

In [None]:
df = pd.concat(votes_list,sort=True)
df.to_csv(data_folder + "lobby/lobby.csv")

In [None]:
df.head()

# Getting voting position by member

In [None]:
raw_senators = pd.DataFrame()

for i in range (115, 80 - 1, -1):
    df = pd.read_csv("data/senate_members/senate_members_{congress}.csv".format(congress = i))
    df['congress'] = i
    raw_senators = pd.concat([raw_senators, df], sort=False)
    
senators_id = raw_senators['id'].unique()

In [None]:
u = "https://api.propublica.org/congress/v1/members/{member_id}/votes.json?offset={offset}"

import os
import json

completed_ids = os.listdir("data/votes")

for senator_id in senators_id[2::4]:

    if "votes_{id}.csv".format(id=senator_id) in completed_ids:
        continue
        
    error_raised = False
    votes_list = []
    data_available = True
    request_offset = 0

    while data_available :
        res = requests.get(url = u.format(member_id = senator_id, offset = request_offset), headers = HEADERS)
        jObj = json.loads(res.text.replace('\n', ' '))
                
        #print(res.text)
        if(res.status_code == 200):
            data_available = int(jObj['results'][0]['num_results']) > 0

            if data_available:
                print(str(senator_id) + " offset: " + str(request_offset), end='\r')
                votes = jObj['results'][0]['votes']
                votes_list.append(pd.io.json.json_normalize(votes, record_prefix=True))
                
        else:
            print(str(res.status_code))
            error_raised = True
            break
        
        request_offset += 20
        
    if not error_raised and len(votes_list) > 0:
        df = pd.concat(votes_list,sort=False)
        df.to_csv(data_folder + "votes/votes_{id}.csv".format(id=senator_id), index=False)
