In [1]:
import os
import pandas as pd
import numpy as np
import requests

import pickle

import matplotlib.pyplot as plt
%matplotlib inline 

In [2]:
API_KEY = os.environ.get('SENATORS', None)
DATA_PATH = '../data/all-votes'

In [3]:
years = np.arange(1997, 2019, 1)
months = np.arange(1, 13, 1)

## Get general information about all voted laws for the last 20 years

In [4]:
laws = []
for y in years:
    for m in months:
        url = 'https://api.propublica.org/congress/v1/senate/votes/{}/{}.json'.format(y, m)
#         req = requests.get(url, headers={'X-API-Key': API_KEY}).json()
        results = req['results']['votes']
        for r in results:
            d = {}
            d['year'] = y
            d['month'] = m
            d['congress'] = r['congress']
            d['vote_uri'] = r['vote_uri']
            d['bill_api_uri'] = r['bill'].get('api_uri', None)
            d['vote_type'] = r['vote_type']
            d['result'] = r['result']
            d['date'] = r['date']
            d['total_yes'] = r['total']['yes']
            d['total_no'] = r['total']['no']
            d['total_not_voting'] = r['total']['not_voting']
            d['democrats_no'] = r['democratic']['no']
            d['democrats_yes'] = r['democratic']['yes']
            d['republican_no'] = r['republican']['no']
            d['republican_yes'] = r['republican']['yes']
            d['independent_no'] = r['independent']['no']
            d['independent_yes'] = r['independent']['yes']
            laws.append(d)

df = pd.DataFrame(laws)
print(df.shape)
df.to_csv(DATA_PATH + '/laws_20years.csv', index=False)

(6834, 16)


In [4]:
all_laws = pd.read_csv(DATA_PATH + '/laws_20years.csv')

## Get general information about congress members of the last 20 years

In [40]:
congresses_members = []
for congress in all_laws['congress'].unique():
    url = 'https://api.propublica.org/congress/v1/{}/senate/members.json'.format(congress)
#     req = requests.get(url, headers={'X-API-Key': API_KEY}).json()
    members = req['results'][0]['members']
    infos = [{'congress': congress, 
              'name': m['first_name'] + ' ' + m['last_name'],
              'id': m['id'], 
              'api_uri': m['api_uri'], 
              'party': m['party'],
              'gender': m['gender'], 
              'seniority': m['seniority'],
              'dw_nominate': m['dw_nominate']
             } for m in members]
    congresses_members += infos

df = pd.DataFrame(congresses_members)
print(df.shape)
df.to_csv(DATA_PATH + '/members_20years.csv', index=False)

(1133, 8)


In [8]:
all_members = pd.read_csv(DATA_PATH + '/members_20years.csv')

In [139]:
print(all_members.id.nunique())

231


## Build adjacency matrix for every single voted laws for the last 20 years

In [5]:
def get_adjacency(members, votes):
    n = len(members)
    adjancency = np.zeros((n, n))
    
    for i, i_id in enumerate(members):
        for j, j_id in enumerate(members):
            if i != j:
                vote_i = votes.loc[i_id]['vote_position'].lower() if i_id in votes.index else 'not voting'
                vote_j = votes.loc[j_id]['vote_position'].lower() if j_id in votes.index else 'not voting'
                if vote_i == vote_j and vote_i != 'not voting':
                    adjancency[i, j] = 1
    
    return adjancency

In [35]:
# all_adjacency = {}

for i in range(0, len(all_laws)):
    row = all_laws.iloc[i]
    url = row['vote_uri']
    keyname = '_'.join(url.split('/')[4:]).split('.')[0]

    if keyname not in all_adjacency:
        print(keyname)
        congress = row['congress']
        members = all_members[all_members['congress'] == congress].sort_values(['party', 'id'])
        members = members.set_index('id')
        req = requests.get(url, headers={'X-API-Key': API_KEY}).json()
        results = pd.DataFrame(req['results']['votes']['vote']['positions'])
        results = results.set_index('member_id')
    
        all_adjacency[keyname] = get_adjacency(members.index, results)

    else:
        continue

In [33]:
with open(DATA_PATH + '/all_adjacency.pickle', 'wb') as handle:
    pickle.dump(all_adjacency, handle, protocol=pickle.HIGHEST_PROTOCOL)

# del all_adjancency

In [206]:
with open(DATA_PATH + '/all_adjacency.pickle', 'rb') as handle:
    all_adjacency = pickle.load(handle)

In [209]:
len(all_adjancency.keys())

6834

In [43]:
all_bills = []

for i in range(0, len(all_laws)):
    row = all_laws.iloc[i]
    url = row['bill_api_uri']
    keyname = '_'.join(row['vote_uri'].split('/')[4:]).split('.')[0]

    if keyname not in all_bills and url is not np.nan:
        d = {}
        
        print(keyname)
        try:
#             req = requests.get(url, headers={'X-API-Key': API_KEY}).json()
            results = req['results'][0]
            d['law_id'] = keyname
            d['sponsor_party'] = results['sponsor_party']
            d['sponsor_id'] = results['sponsor_id']
            d['primary_subject'] = results['primary_subject']
            d['committees'] = results['committees']
            d['short_title'] = results['short_title']
    
            all_bills.append(d)
        except:
            pass

    else:
        continue

In [42]:
pd.DataFrame(all_bills).to_csv(DATA_PATH + '/bills_20.years.csv')

In [25]:
congress115 = list(all_laws[all_laws.congress == 115]['vote_uri'])

df = pd.DataFrame()

for i, url in enumerate(congress115):
    print(i)
    req = requests.get(url, headers={'X-API-Key': API_KEY}).json()
    d = pd.DataFrame(req['results']['votes']['vote']['positions'])
    d['vote_uri'] = url
    
    df = pd.concat([df, d])
    
df.to_csv(DATA_PATH + '/votes_115.csv', index=False)

In [27]:
votes = pd.read_csv(DATA_PATH + '/votes_115.csv')

In [28]:
votes.shape

(59891, 7)

In [38]:
laws_ids = list(all_laws['vote_uri'])

# all_positions = {}

for i, url in enumerate(laws_ids):
    if url not in all_positions:
        print(i)
        req = requests.get(url, headers={'X-API-Key': API_KEY}).json()
        d = pd.DataFrame(req['results']['votes']['vote']['positions'])
        d['vote_uri'] = url
        all_positions[url] = d

5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888


In [42]:
df = pd.concat(all_positions.values())
df.to_csv(DATA_PATH + '/all_votes_positions.csv', index=False)

In [40]:
df.shape

(683034, 7)