# **How trustworthy is your local representative?**

#### Data Sources:
politifact api documentation: http://static.politifact.com/api/v2apidoc.html <br>
govtrack api documentation: https://www.govtrack.us/developers/api <br>
propublica: https://projects.propublica.org/api-docs/congress-api/ <br>
congressional misconduct database: https://www.govtrack.us/misconduct <br>
news api: https://newsapi.org/docs <br>
votesmart: https://github.com/votesmart/python-votesmart/tree/master

## To do: April 15

1. Crawl truth-o-meter data ✅
2. Join truth-o-meter, misconduct db, and propublica ✅
3. Structure bill vote **(in-progress)**
4. Incorporate news headlines
5. incorprate tweets (?)


In [1]:
import requests

import json

import pandas as pd
import numpy as np
import datetime as dt

from IPython.display import clear_output

import os
import time
import ast

import requests
from lxml import html

## Get Politifact Data
politifact api documentation: http://static.politifact.com/api/v2apidoc.html 

In [2]:
# Some Functions

def get_politifact(limit,offset):
    '''Crawls data from politifact API'''
    pf_url = 'http://www.politifact.com/api/v/2/statement/'

    params = {
        'format' : 'json',
        'edition__edition_slug':'truth-o-meter',
        'limit' : limit,
        'offset' : offset,
        'order_by' : 'ruling_date',
    }

    resp = requests.get(pf_url,params=params).json()
    pf = pd.DataFrame(resp['objects'])
    return pf

def restart_politifact(file_path):
    """Restart Politifact Crawl"""
    df = get_politifact(100,0)
    df.to_json(file_path)
    
def refresh_politifact(file_path):
    """Refresh Politifact CSV"""
    cols = get_politifact(1,0).columns
    df = pd.read_json(file_path)
    print('Pre-refresh length:',len(df))
    df = df.drop_duplicates('statement')[cols]
    print('Post-refreshed length:',len(df))
    df.to_json(file_path)
    
def preview_cols(df):
    for col in df.columns:
        display(df[col].head(3))

### get_politifact.py script (markdown)

Unix commands: <br>
pause: ctr + c <br>
resume: fg <br>
```python
import time
import pandas as pd
import requests
import json
import os

print('\nGet Politifact\n')

limit = 100
file_name = 'Politifact.json'
folder_name = 'Data'

file_name = '/' + file_name
folder_name = '/' + folder_name

json_path = os.getcwd()+folder_name+file_name

def get_politifact(limit,offset):
    """Crawl Politifact API"""
    pf_url = 'http://www.politifact.com/api/v/2/statement/'
    params = {
        'format' : 'json',
        'edition__edition_slug':'truth-o-meter',
        'limit' : limit,
        'offset' : offset,
        'order_by' : 'ruling_date'}
    resp = requests.get(pf_url,params=params).json()
    pf = pd.DataFrame(resp['objects'])
    return pf

if not os.path.isdir(os.getcwd()+folder_name):
    os.mkdir(os.getcwd()+folder_name)
    print('* New dir made: ',os.getcwd()+folder_name)

if not os.path.isfile(json_path):
    t1 = time.time()
    df = get_politifact(100,0)
    df.to_json(json_path)
    print('* New file made:',json_path,'\n')
    print('data_length:',len(df))
    timer = time.time() - t1
    print('runtime: {rt} seconds'.format(rt=round(timer,2)))
    time.sleep(5)
print('*'*20)

run = True
while run:
    try:
        t1 = time.time()
        old_df = pd.read_json(json_path)
        old_offset = len(old_df)+1

        print('data_length:',len(old_df))
        print('Offset:',old_offset)
        print('Getting data...')

        new_df = get_politifact(limit,old_offset)
        cols = new_df.columns

        df = old_df.append(new_df).drop_duplicates('statement').reset_index()[cols]
        df.to_json(json_path)
        
        print('---')
        print('data_length:',len(df))
        
        exp = old_offset+limit
        print('Expected offset:',exp)
        print('---')

        new_offset = len(df)+1
        print('New offset:',new_offset)

        timer = time.time() - t1
        print('runtime: {rt} seconds'.format(rt=round(timer,2)))

        if exp < new_offset:
            print('#'*10,'OFFSET ERROR','#'*10)
        elif new_offset > exp:
            print('OFFSET Diff:',new_offset-exp)

        if new_offset == old_offset:
            print('#'*10,'OFFSET STABLE','#'*10)
            break

        time.sleep(5)
    except:
        print('#'*10,'EXCEPT ACTIVATED','#'*10)
        run = False
    print('='*20)
```

# Politifact

In [3]:
json_path = os.getcwd()+'/Data/Politifact.json'
pf = pd.read_json(json_path)
pf.sort_values('statement_date',ascending=False,inplace=True)
pf.reset_index(inplace=True)

pf['statement_date'] = pd.to_datetime(pf['statement_date'])
pf['ruling_date'] = pd.to_datetime(pf['ruling_date'])
pf['statement'] = (pf['statement'].str.replace('<p>',"").str.replace('</p>','')
                   .str.replace('"','').str.replace('&quot;','')
                   .str.replace('\r','').str.replace('\n','').str.replace('&#39;','\''))
pf['ruling_comments'] = (pf['ruling_comments'].str.replace('<p>',"").str.replace('</p>','')
                   .str.replace('"','').str.replace('&quot;','')
                   .str.replace('\r','').str.replace('\n','').str.replace('&#39;','\''))

pf['ruling_slug'] = [ruling['ruling_slug'] for ruling in pf['ruling']]

pf['name_slug'] = [speaker['name_slug'] for speaker in pf['speaker']]
pf['home_state'] = [speaker['home_state'] for speaker in pf['speaker']]
pf['party'] = [speaker['party']['party_slug'] for speaker in pf['speaker']]
pf['first_name'] = [speaker['first_name'] for speaker in pf['speaker']]
pf['last_name'] = [speaker['last_name'] for speaker in pf['speaker']]

pf['statement_type'] = [statement['statement_type'] for statement in pf['statement_type']]

pf['subject_slug'] = [[subject['subject_slug'] for subject in subjects] for subjects in pf['subject']]



In [4]:
pf['speaker'][0]

{'barely_true_count': 1,
 'current_job': 'U.S. Senator',
 'false_count': 0,
 'first_name': 'Tina',
 'half_true_count': 0,
 'home_state': 'Minnesota',
 'id': 4282,
 'last_name': 'Smith',
 'mostly_true_count': 0,
 'name_slug': 'tina-smith',
 'pants_count': 0,
 'party': {'id': 1,
  'party': 'Democrat',
  'party_slug': 'democrat',
  'resource_uri': '/api/v/2/party/1/'},
 'photo': 'http://static.politifact.com.s3.amazonaws.com/politifact/mugs/Tina_Smith_mug.jpg',
 'primary_edition': {'edition': 'National',
  'edition_slug': 'truth-o-meter',
  'id': 1,
  'meter_name': 'The Truth-O-Meter<sup>TM</sup>',
  'resource_uri': '/api/v/2/edition/1/'},
 'promise_meter_cutout': None,
 'resource_uri': '/api/v/2/person/4282/',
 'total_count': 1,
 'true_count': 0,
 'website': 'https://www.smith.senate.gov/'}

In [5]:
# Test to see that the cleaning method worked

sub = 'last_name'
main = 'speaker'
test_num = 10

import random
test = [random.randint(0,len(pf)) for i in range(test_num)]
for n in test:
    print(pf[sub].iloc[n] == pf.iloc[n][main][sub],'\t',pf[sub].iloc[n],pf.iloc[n][main][sub])

True 	 Castro Castro
True 	 Carson Carson
True 	 Obama Obama
True 	 Nixon Nixon
True 	 Sestak Sestak
True 	 Cruz Cruz
True 	 Manafort Manafort
True 	 Goolsbee Goolsbee
True 	 Gingrich Gingrich
True 	 U.S. Chamber of Commerce U.S. Chamber of Commerce


In [6]:
pf.columns

Index(['index', 'art', 'author', 'canonical_url', 'edition', 'editor',
       'facebook_headline', 'id', 'in_future', 'is_pundit', 'make_public',
       'preview', 'researcher', 'resource_uri', 'ruling', 'ruling_comments',
       'ruling_comments_date', 'ruling_date', 'ruling_headline',
       'ruling_headline_slug', 'ruling_link_text', 'ruling_state',
       'source_documents', 'sources', 'speaker', 'statement',
       'statement_context', 'statement_date', 'statement_type', 'subject',
       'target', 'twitter_headline', 'ruling_slug', 'name_slug', 'home_state',
       'party', 'first_name', 'last_name', 'subject_slug'],
      dtype='object')

In [7]:
politifact = pf[['ruling_slug','first_name','last_name','name_slug',
                 'home_state','party','statement',
                 'subject_slug','ruling_comments','ruling_date',
                 'statement_context','statement_type','statement_date',
                 'twitter_headline','sources']]
politifact.head()

Unnamed: 0,ruling_slug,first_name,last_name,name_slug,home_state,party,statement,subject_slug,ruling_comments,ruling_date,statement_context,statement_type,statement_date,twitter_headline,sources
0,barely-true,Tina,Smith,tina-smith,Minnesota,democrat,"On average, American women only earn 80 cents ...","[diversity, jobs, women]",On April 10 -- Equal Pay Day -- Sen. Tina Smit...,2018-04-13 12:27:18,a tweet,Claim,2018-04-10,Do women get only 80 percent of the pay men do...,"<p>Tina Smith, <a href=""https://twitter.com/Se..."
1,mostly-true,Donald,Trump,donald-trump,New York,republican,When a car is sent to the United States from C...,"[china, trade]",President Donald Trump took to Twitter to bemo...,2018-04-09 17:06:15,in a tweet,Claim,2018-04-09,Does China slap a 25 percent tariff on America...,"<p><a href=""https://twitter.com/realDonaldTrum..."
2,barely-true,Donald,Trump,donald-trump,New York,republican,EPA administrator Scott Pruitt's short-term re...,[ethics],When Environmental Protection Agency administr...,2018-04-10 11:09:40,a tweet,Claim,2018-04-07,Did EPA's Pruitt pay below market rent?,"<p>Donald Trump, <a href=""https://twitter.com/..."
3,barely-true,Donald,Trump,donald-trump,New York,republican,Says Scott Pruitt&rsquo;s security spending wa...,"[environment, federal-budget, homeland-security]",President Donald Trump came out in defense of ...,2018-04-10 11:08:25,a tweet,Claim,2018-04-07,Comparing Scott Pruitt's security spending wit...,"<p>Donald Trump, <a href=""https://twitter.com/..."
4,barely-true,Donald,Trump,donald-trump,New York,republican,This will be the last time &mdash; April &mdas...,[taxes],If you are struggling to finish and file your ...,2018-04-09 08:00:00,an event in West Virginia,Claim,2018-04-05,Can taxpayers expect card-sized tax forms next...,"<p>Donald Trump, <a href=""https://www.whitehou..."


# Govtrack (Deprecated)
govtrack api documentation: https://www.govtrack.us/developers/api 

In [8]:
# Data on legislators

person = 'https://www.govtrack.us/api/v2/person'
role = 'https://www.govtrack.us/api/v2/role'

params = {
    'current' : 'true'
}

resp = requests.get(person).json()

legis = pd.DataFrame(resp['objects'])
hillary = legis[legis['lastname']=='Clinton']['bioguideid'].iloc[0]

# Propublica
propublica: https://projects.propublica.org/api-docs/congress-api/ 

In [9]:
from congress import Congress
propublica_key = 'wAxQ7sF8gcXCBRnY3lzegT23aljM4saALOb6JPlR'
congress = Congress(propublica_key)

cong = 80 # Earliest congress is 80
cong = 115 # Latest is 115

st = congress.members.filter('senate',congress = cong)
hs = congress.members.filter('house', congress = cong)

senate_cols = ['id','lis_id','state','state_rank','party',
               'first_name','last_name','middle_name',
               'gender','date_of_birth','in_office',
               'votes_with_party_pct','missed_votes_pct','missed_votes',
               'next_election','title','seniority', # Seniority = years served
               'leadership_role','dw_nominate', # look into dw_nominate
               'twitter_account']

house_cols = ['id','state','district','party',
              'first_name','last_name','middle_name',
              'gender','date_of_birth','in_office',
              'votes_with_party_pct','missed_votes_pct','missed_votes',
              'next_election','title','seniority',
              'leadership_role','dw_nominate',
              'twitter_account']

senate = pd.DataFrame(st[0]['members'])[senate_cols]
house = pd.DataFrame(hs[0]['members'])[house_cols]

### Creating Foreign Key for Propublica and Politifact

In [10]:
# Connecting Senate to Politifact with name slug
senate['name_slug'] = senate['first_name'].str.lower() + '-' + senate['last_name'].str.lower()

filter1 = senate[~senate['name_slug'].isin(politifact['name_slug'])]
filter2 = filter1[filter1['last_name'].isin(politifact['last_name'])]
sen_not_pf = filter1[~filter1['last_name'].isin(politifact['last_name'])]

new_slugs = list()

for i in range(len(filter2)):
    name = ['first_name','last_name']
    col = 'last_name'
    query = filter2['last_name'].iloc[i]
    result = politifact[politifact[col] == query]
    slug = result['name_slug'].iloc[0]
    
    new_slugs.append(slug)
    '''
    print(filter2[name].iloc[i])
    display(result[name])
    print(slug)
    print('='*30)
    '''
    
new_slugs = [slug if slug != 'joe-kennedy' else 'john-kennedy' for slug in new_slugs]
new_slugs = [slug if slug != 'mark-udall' else 'tom-udall' for slug in new_slugs]

# Change name slugs
filter2.loc[:,'name_slug'] = new_slugs

for index in filter2.index:
    condition = senate.loc[index]['name_slug'] == filter2.loc[index]['name_slug']
    if not condition:
        print('old:',senate.loc[index]['name_slug'],'\t', filter2.loc[index]['name_slug'])
        senate.loc[index,'name_slug'] = filter2.loc[index]['name_slug']
        print('new:',senate.loc[index]['name_slug'],'\t', filter2.loc[index]['name_slug'])
    print('='*20)

old: shelley-capito 	 shelley-moore-capito
new: shelley-moore-capito 	 shelley-moore-capito
old: benjamin-cardin 	 ben-cardin
new: ben-cardin 	 ben-cardin
old: christopher-coons 	 chris-coons
new: chris-coons 	 chris-coons
old: michael-enzi 	 mike-enzi
new: mike-enzi 	 mike-enzi
old: charles-grassley 	 chuck-grassley
new: chuck-grassley 	 chuck-grassley
old: christopher-murphy 	 chris-murphy
new: chris-murphy 	 chris-murphy
old: jim-risch 	 james-risch
new: james-risch 	 james-risch
old: bernard-sanders 	 bernie-s
new: bernie-s 	 bernie-s
old: patrick-toomey 	 pat-toomey
new: pat-toomey 	 pat-toomey
old: chris-van hollen 	 chris-van-hollen
new: chris-van-hollen 	 chris-van-hollen


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [11]:
# # Connecting house to Politifact with name_slug

house['name_slug'] = house['first_name'].str.lower() + '-' + house['last_name'].str.lower()

filter1 = house[~house['name_slug'].isin(politifact['name_slug'])]
filter2 = filter1[filter1['last_name'].isin(politifact['last_name'])]
rep_not_pf = filter1[~filter1['last_name'].isin(politifact['last_name'])]

new_slugs = list()
count = 0
for i in range(len(filter2)):
    name = ['first_name','last_name']
    col = 'last_name'
    
    match = filter2['last_name'].iloc[i]
    
    result = politifact[politifact[col] == match]
    
    nf = 2
    result = result[result['first_name'].str[:nf] == filter2['first_name'].str[:nf].iloc[i]]

    if len(result) > 0:
        slug = result['name_slug'].iloc[0]
        new_slugs.append(slug)
        count += 1
        '''
        print('#new slug appended#')
        print(filter2[name].iloc[i])
        display(result[name])
        print('slug:',slug)
        print('='*30)
        '''
    else:
        slug = filter2['name_slug'].iloc[i]
        new_slugs.append(slug)

# Change name slugs
filter2.loc[:,'name_slug'] = new_slugs

for index in filter2.index:
    condition = house.loc[index]['name_slug'] == filter2.loc[index]['name_slug']
    if not condition:
        print('old:',house.loc[index]['name_slug'],'\t', filter2.loc[index]['name_slug'])
        house.loc[index,'name_slug'] = filter2.loc[index]['name_slug']
        print('new:',house.loc[index]['name_slug'],'\t', filter2.loc[index]['name_slug'])
        print('='*20)

old: donald-beyer 	 don-beyer
new: don-beyer 	 don-beyer
old: david-brat 	 dave-brat
new: dave-brat 	 dave-brat
old: andré-carson 	 andre-carson
new: andre-carson 	 andre-carson
old: joaquín-castro 	 joaquin-castro
new: joaquin-castro 	 joaquin-castro
old: gerald-connolly 	 gerry-connolly
new: gerry-connolly 	 gerry-connolly
old: jeffrey-duncan 	 jeff-duncan
new: jeff-duncan 	 jeff-duncan
old: sheila-jackson lee 	 sheila-jackson-lee
new: sheila-jackson-lee 	 sheila-jackson-lee
old: eddie-johnson 	 eddie-bernice-johnson
new: eddie-bernice-johnson 	 eddie-bernice-johnson
old: joseph-kennedy 	 joe-kennedy
new: joe-kennedy 	 joe-kennedy
old: raúl-labrador 	 raul-labrador
new: raul-labrador 	 raul-labrador
old: cathy-mcmorris rodgers 	 cathy-mcmorris-rodgers
new: cathy-mcmorris-rodgers 	 cathy-mcmorris-rodgers
old: beto-o'rourke 	 beto-orourke
new: beto-orourke 	 beto-orourke
old: debbie-wasserman schultz 	 debbie-wasserman-schultz
new: debbie-wasserman-schultz 	 debbie-wasserman-schultz


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


### Connecting to Congressional Misconduct DB

Use Xie_PPDS_HW3.ipynb to scrape data from govtrack misconduct database

In [12]:
from sqlalchemy import create_engine

conn_string = 'mysql://{user}:{password}@{host}:{port}/?charset=utf8'.format(
    user     = 'root', 
    password = input('password'), 
    host     = '127.0.0.01', 
    port     = 3306, 
    encoding = 'utf-8'
)

clear_output()

pw = 'dwdstudent2015'
engine = create_engine(conn_string)

engine.execute('USE congressional_misconduct')

query = 'SELECT * FROM misconduct_entries'
misconduct = pd.read_sql(query,con=engine)

### Merging Politifact, Propublica, and Congressional Misconduct DB

In [13]:
# Merge misconduct with senate
pd.merge(misconduct,senate[senate_cols[:12]],
         how='inner',left_on='bioguide',right_on='id').head()

Unnamed: 0,entry_index,member_name,bioguide,party_x,district,member,member_page,paragraph,id,lis_id,state,state_rank,party_y,first_name,last_name,middle_name,gender,date_of_birth,in_office,votes_with_party_pct
0,370,Sen. Robert “Bob” Menéndez,M000639,Democrat,[D-NJ],Sen. Robert “Bob” Menéndez [D-NJ],https://www.govtrack.us/congress/members/rober...,Menendez is being investigated for unspecified...,M000639,S306,NJ,senior,D,Robert,Menendez,,M,1954-01-01,True,92.45
1,373,Sen. Alan “Al” Franken,F000457,Democrat,"[D-MN, 2009-2017]","Sen. Alan “Al” Franken [D-MN, 2009-2017]",https://www.govtrack.us/congress/members/alan_...,"On December 7, 2017, Senator Franken announced...",F000457,S332,MN,junior,D,Al,Franken,,M,1951-05-21,False,97.44


In [14]:
# Merge misconduct with house
pd.merge(misconduct,house[house_cols[:12]]
         ,how='inner',left_on='bioguide',right_on='id').sort_values('bioguide').head()

Unnamed: 0,entry_index,member_name,bioguide,party_x,district_x,member,member_page,paragraph,id,state,district_y,party_y,first_name,last_name,middle_name,gender,date_of_birth,in_office,votes_with_party_pct,missed_votes_pct
7,293,Rep. Robert Aderholt,A000055,Republican,[R-AL4],Rep. Robert Aderholt [R-AL4],https://www.govtrack.us/congress/members/rober...,In 2010 Aderholt was investigated for keeping ...,A000055,AL,4,R,Robert,Aderholt,B.,M,1965-07-22,True,96.72,3.4
58,380,Rep. Robert Brady,B001227,Democrat,[D-PA1],Rep. Robert Brady [D-PA1],https://www.govtrack.us/congress/members/rober...,Brady faced an allegation of leading a crimina...,B001227,PA,1,D,Robert,Brady,A.,M,1945-04-07,True,94.3,3.4
52,365,Rep. Madeleine Bordallo,B001245,Democrat,[D-GU0],Rep. Madeleine Bordallo [D-GU0],https://www.govtrack.us/congress/members/madel...,In 2017 Bardallo was investigated for receivin...,B001245,GU,At-Large,D,Madeleine,Bordallo,Z.,F,1933-05-31,True,,
10,297,Rep. George “G.K.” Butterfield,B001251,Democrat,[D-NC1],Rep. George “G.K.” Butterfield [D-NC1],https://www.govtrack.us/congress/members/georg...,In 2010 Butterfield was investigated for keepi...,B001251,NC,1,D,G.,Butterfield,K.,M,1947-04-27,True,94.4,5.86
19,308,Rep. Vern Buchanan,B001260,Republican,[R-FL16],Rep. Vern Buchanan [R-FL16],https://www.govtrack.us/congress/members/vern_...,In 2011 Buchanan was investigated for failing ...,B001260,FL,16,R,Vern,Buchanan,,M,1951-05-08,True,96.47,7.03


In [15]:
# Merge Senate and Misconduct
df = pd.merge(politifact,senate,
         how='inner',left_on='name_slug',right_on='name_slug')
cols = df.columns[:len(politifact.columns)+10]
df[cols].sort_values('name_slug').head()

Unnamed: 0,ruling_slug,first_name_x,last_name_x,name_slug,home_state,party_x,statement,subject_slug,ruling_comments,ruling_date,...,id,lis_id,state,state_rank,party_y,first_name_y,last_name_y,middle_name,gender,date_of_birth
315,mostly-true,Al,Franken,al-franken,Minnesota,democrat,President (Ronald) Reagan did a similar thing....,[immigration],"Sen. Al Franken, D-Minn., took to the airwaves...",2017-09-08 15:23:05,...,F000457,S332,MN,junior,D,Al,Franken,,M,1951-05-21
635,mostly-true,Amy,Klobuchar,amy-klobuchar,Minnesota,democrat,Children who witness domestic violence are twi...,"[crime, criminal-justice, women]",The release of a video showing NFL running bac...,2014-09-16 11:44:47,...,K000367,S311,MN,senior,D,Amy,Klobuchar,,F,1960-05-25
634,true,Amy,Klobuchar,amy-klobuchar,Minnesota,democrat,"<div dir=ltr style=color: rgb(0, 0, 0); font-f...",[energy],Considering the United States&rsquo; strength ...,2015-01-04 18:37:38,...,K000367,S311,MN,senior,D,Amy,Klobuchar,,F,1960-05-25
633,mostly-true,Amy,Klobuchar,amy-klobuchar,Minnesota,democrat,Human trafficking is the third-biggest crimina...,"[crime, human-rights]","Sen. Amy Klobuchar, D-Minn., told delegates at...",2016-07-26 22:45:27,...,K000367,S311,MN,senior,D,Amy,Klobuchar,,F,1960-05-25
78,mostly-true,Angus,King,angus-king,Maine,independent,"China, my understanding is, supplies 4 percent...","[china, foreign-policy, trade]",President Donald Trump said his administration...,2018-03-08 09:30:00,...,K000383,S363,ME,junior,I,Angus,King,,M,1944-03-31


In [16]:
# Merge House and Misconduct
df = pd.merge(politifact,house,
         how='inner',left_on='name_slug',right_on='name_slug')
cols = df.columns[:len(politifact.columns)+10]
df[cols].sort_values('name_slug').head()

Unnamed: 0,ruling_slug,first_name_x,last_name_x,name_slug,home_state,party_x,statement,subject_slug,ruling_comments,ruling_date,...,id,state,district,party_y,first_name_y,last_name_y,middle_name,gender,date_of_birth,in_office
179,false,Adam,Kinzinger,adam-kinzinger,Illinois,republican,The current debate over authorizing military a...,"[congress, history, military, terrorism]",Months after the Islamic State emerged as a th...,2015-02-19 14:29:29,...,K000378,IL,16,R,Adam,Kinzinger,,M,1978-02-27,True
181,half-true,Adam,Kinzinger,adam-kinzinger,Illinois,republican,"Since Debbie Halvorson has been in politics, I...","[economy, message-machine]",With unemployment rates near their post-World ...,2010-10-07 10:07:49,...,K000378,IL,16,R,Adam,Kinzinger,,M,1978-02-27,True
180,false,Adam,Kinzinger,adam-kinzinger,Illinois,republican,"<div style=color: rgb(0, 0, 0); font-family: a...","[federal-budget, military]",Midterm elections are less than two weeks away...,2014-10-26 17:41:57,...,K000378,IL,16,R,Adam,Kinzinger,,M,1978-02-27,True
107,true,Adam,Schiff,adam-schiff,California,democrat,I think what the authors of the (25th) Amendme...,"[congress, disability, legal-issues]",A Democratic lawmaker who disapproves of Presi...,2017-08-29 13:09:42,...,S001150,CA,28,D,Adam,Schiff,B.,M,1960-06-22,True
2,mostly-true,Alex,Mooney,alex-mooney,West Virginia,republican,"From 2000-10, U.S. manufacturing employment sh...","[economy, jobs]",There&rsquo;s no question that manufacturing e...,2018-04-04 14:36:24,...,M001195,WV,2,R,Alex,Mooney,,M,1971-06-05,True


# Bills and Votes

In [17]:
# Preview html
url = congress.votes.by_month('house')['votes'][0]['source']
response = requests.get(url)
print(response.text[:1000])

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE rollcall-vote PUBLIC "-//US Congress//DTDs/vote v1.0 20031119 //EN" "http://clerk.house.gov/evs/vote.dtd">
<?xml-stylesheet type="text/xsl" href="http://clerk.house.gov/evs/vote.xsl"?>
<rollcall-vote>
<vote-metadata>
<majority>R</majority>
<congress>115</congress>
<session>2nd</session>
<chamber>U.S. House of Representatives</chamber>
<rollcall-num>141</rollcall-num>
<legis-num>S 167</legis-num>
<vote-question>On Motion to Suspend the Rules and Pass</vote-question>
<vote-type>2/3 YEA-AND-NAY</vote-type>
<vote-result>Passed</vote-result>
<action-date>16-Apr-2018</action-date>
<action-time time-etz="19:03">7:03 PM</action-time>
<vote-desc>To designate a National Memorial to Fallen Educators at the National Teachers Hall of Fame in Emporia, Kansas</vote-desc>
<vote-totals>
<totals-by-party-header>
<party-header>Party</party-header>
<yea-header>Yeas</yea-header>
<nay-header>Nays</nay-header>
<present-header>Answered â

In [18]:
url = congress.votes.by_month('house')['votes'][0]['source']

response = requests.get(url)
doc = html.fromstring(response.content)

print(doc.xpath('//vote-desc')[0].text_content())
print(doc.xpath('//legis-num')[0].text_content())
print('*'*20)

n = 0
for record in doc.xpath('//vote-data/recorded-vote'):
    while n < 3:
        print(n)
        rep_id = record.xpath('./legislator')[0].get('name-id')
        party = record.xpath('./legislator')[0].get('party')
        state = record.xpath('./legislator')[0].get('state')
        name = record.xpath('./legislator')[0].text_content()
        vote = record.xpath('./vote')[0].text_content()

        print(name,party+'-'+state)
        print(rep_id)
        print(vote)
        print('-'*5)
        n += 1
    
    


To designate a National Memorial to Fallen Educators at the National Teachers Hall of Fame in Emporia, Kansas
S 167
********************
0
Abraham R-LA
A000374
Yea
-----
1
Abraham R-LA
A000374
Yea
-----
2
Abraham R-LA
A000374
Yea
-----


In [19]:
print(congress.votes.by_month('senate')['votes'][1]['url'])

https://www.senate.gov/legislative/LIS/roll_call_lists/roll_call_vote_cfm.cfm?congress=115&session=2&vote=00073


In [20]:
url = congress.votes.by_month('senate')['votes'][1]['source']

response = requests.get(url)
doc = html.fromstring(response.content)

print(doc.xpath('//vote_title')[0].text_content())
print('*'*20)

n = 0
for member in doc.xpath('//member'):
    while n < 2:
        print(n)
        print(member.xpath('./member_full')[0].text_content())
        print(member.xpath('./first_name')[0].text_content())
        print(member.xpath('./vote_cast')[0].text_content())
        print(member.xpath('./lis_member_id')[0].text_content())
        print('-'*10)
        n+=1

Motion to Invoke Cloture Re: Rebecca Grady Jennings, of Kentucky, to be U.S. District Judge for the Western District of Kentucky
********************
0
Alexander (R-TN)
Lamar
Yea
S289
----------
1
Alexander (R-TN)
Lamar
Yea
S289
----------


In [21]:
# compare
congress.members.compare('S000033','C001098','senate',congress=cong)

{'agree_percent': 16.4,
 'chamber': 'Senate',
 'common_votes': 378,
 'congress': '115',
 'disagree_percent': 83.6,
 'disagree_votes': 316,
 'first_member_api_uri': 'https://api.propublica.org/congress/v1/members/S000033.json',
 'first_member_id': 'S000033',
 'second_member_api_uri': 'https://api.propublica.org/congress/v1/members/C001098.json',
 'second_member_id': 'C001098'}

In [22]:
#bills
pd.DataFrame(congress.bills.by_member(senate['id'][0])['bills']).head(3)

Unnamed: 0,active,bill_id,bill_type,bill_uri,committees,congress,congressdotgov_url,cosponsors,cosponsors_by_party,enacted,...,sponsor_id,sponsor_name,sponsor_party,sponsor_state,sponsor_title,sponsor_uri,summary,summary_short,title,vetoed
0,False,s2680-115,s,https://api.propublica.org/congress/v1/115/bil...,"Senate Health, Education, Labor, and Pensions ...",115,https://www.congress.gov/bill/115th-congress/s...,1,{'D': 1},,...,A000360,Lamar Alexander,R,TN,Sen.,https://api.propublica.org/congress/v1/members...,,,A bill to address the opioid crisis.,
1,False,s2513-115,s,https://api.propublica.org/congress/v1/115/bil...,"Senate Health, Education, Labor, and Pensions ...",115,https://www.congress.gov/bill/115th-congress/s...,15,{'R': 15},,...,A000360,Lamar Alexander,R,TN,Sen.,https://api.propublica.org/congress/v1/members...,,,A bill to improve school safety and mental hea...,
2,False,s2509-115,s,https://api.propublica.org/congress/v1/115/bil...,Senate Energy and Natural Resources Committee,115,https://www.congress.gov/bill/115th-congress/s...,7,"{'R': 4, 'D': 2, 'I': 1}",,...,A000360,Lamar Alexander,R,TN,Sen.,https://api.propublica.org/congress/v1/members...,,,A bill to establish the National Park Restorat...,


In [23]:
# same thing, above is probably better
pd.DataFrame(congress.members.bills(senate['id'][0])['bills']).head(3)

Unnamed: 0,active,bill_id,bill_type,bill_uri,committees,congress,congressdotgov_url,cosponsors,cosponsors_by_party,enacted,...,sponsor_id,sponsor_name,sponsor_party,sponsor_state,sponsor_title,sponsor_uri,summary,summary_short,title,vetoed
0,False,s2680-115,s,https://api.propublica.org/congress/v1/115/bil...,"Senate Health, Education, Labor, and Pensions ...",115,https://www.congress.gov/bill/115th-congress/s...,1,{'D': 1},,...,A000360,Lamar Alexander,R,TN,Sen.,https://api.propublica.org/congress/v1/members...,,,A bill to address the opioid crisis.,
1,False,s2513-115,s,https://api.propublica.org/congress/v1/115/bil...,"Senate Health, Education, Labor, and Pensions ...",115,https://www.congress.gov/bill/115th-congress/s...,15,{'R': 15},,...,A000360,Lamar Alexander,R,TN,Sen.,https://api.propublica.org/congress/v1/members...,,,A bill to improve school safety and mental hea...,
2,False,s2509-115,s,https://api.propublica.org/congress/v1/115/bil...,Senate Energy and Natural Resources Committee,115,https://www.congress.gov/bill/115th-congress/s...,7,"{'R': 4, 'D': 2, 'I': 1}",,...,A000360,Lamar Alexander,R,TN,Sen.,https://api.propublica.org/congress/v1/members...,,,A bill to establish the National Park Restorat...,


In [24]:
print(dir(congress.bills))

['BASE_URI', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'amendments', 'apikey', 'by_member', 'cosponsors', 'fetch', 'get', 'http', 'introduced', 'major', 'passed', 'recent', 'related', 'subjects', 'upcoming', 'updated']


In [25]:
pd.DataFrame(congress.bills.passed('senate')['bills']).head()

Unnamed: 0,active,bill_id,bill_slug,bill_type,bill_uri,committee_codes,committees,congressdotgov_url,cosponsors,cosponsors_by_party,...,sponsor_name,sponsor_party,sponsor_state,sponsor_title,sponsor_uri,subcommittee_codes,summary,summary_short,title,vetoed
0,True,sres468-115,sres468,sres,https://api.propublica.org/congress/v1/115/bil...,[],,https://www.congress.gov/bill/115th-congress/s...,5,"{'R': 2, 'D': 3}",...,Ron Wyden,D,OR,Sen.,https://api.propublica.org/congress/v1/members...,[],,,"A resolution designating May 19, 2018, as ""Kid...",
1,True,sres467-115,sres467,sres,https://api.propublica.org/congress/v1/115/bil...,[],,https://www.congress.gov/bill/115th-congress/s...,0,{},...,Mitch McConnell,R,KY,Sen.,https://api.propublica.org/congress/v1/members...,[],,,A resolution notifying the House of Representa...,
2,True,sres466-115,sres466,sres,https://api.propublica.org/congress/v1/115/bil...,[],,https://www.congress.gov/bill/115th-congress/s...,0,{},...,Mitch McConnell,R,KY,Sen.,https://api.propublica.org/congress/v1/members...,[],,,A resolution notifying the President of the Un...,
3,True,sres465-115,sres465,sres,https://api.propublica.org/congress/v1/115/bil...,[],,https://www.congress.gov/bill/115th-congress/s...,0,{},...,Mitch McConnell,R,KY,Sen.,https://api.propublica.org/congress/v1/members...,[],,,A resolution electing Michael C. Stenger as Se...,
4,True,s167-115,s167,s,https://api.propublica.org/congress/v1/115/bil...,"[HSII, SSEG]",House Natural Resources Committee,https://www.congress.gov/bill/115th-congress/s...,4,"{'R': 1, 'D': 3}",...,Jerry Moran,R,KS,Sen.,https://api.propublica.org/congress/v1/members...,[HSII10],National Memorial to Fallen Educators Act (Sec...,National Memorial to Fallen Educators Act (Sec...,A bill to designate a National Memorial to Fal...,


In [26]:
pd.DataFrame(congress.bills.subjects('s2513',congress=cong))['subjects'].head()

0    {'name': 'Elementary and secondary education',...
1    {'name': 'Congressional oversight', 'url_name'...
2    {'name': 'Crime prevention', 'url_name': 'crim...
3    {'name': 'Child health', 'url_name': 'child-he...
4    {'name': 'Building construction', 'url_name': ...
Name: subjects, dtype: object

In [27]:
pd.DataFrame(congress.members.bills(hillary)['bills']).head()

Unnamed: 0,active,bill_id,bill_type,bill_uri,committees,congress,congressdotgov_url,cosponsors,cosponsors_by_party,enacted,...,sponsor_id,sponsor_name,sponsor_party,sponsor_state,sponsor_title,sponsor_uri,summary,summary_short,title,vetoed
0,,s211-111,s,https://api.propublica.org/congress/v1/111/bil...,Senate Finance,111,https://www.congress.gov/bill/111th-congress/s...,61,"{'R': 16, 'D': 43, 'ID': 1, 'I': 1}",,...,C001041,Hillary Rodham Clinton,D,NY,,https://api.propublica.org/congress/v1/members...,,,Calling for 2-1-1 Act of 2009,
1,,s182-111,s,https://api.propublica.org/congress/v1/111/bil...,Senate Finance,111,https://www.congress.gov/bill/111th-congress/s...,42,"{'D': 41, 'I': 1}",,...,C001041,Hillary Rodham Clinton,D,NY,,https://api.propublica.org/congress/v1/members...,,,Paycheck Fairness Act,
2,,sres727-110,sres,https://api.propublica.org/congress/v1/110/bil...,,110,https://www.congress.gov/bill/110th-congress/s...,3,{'D': 3},,...,C001041,Hillary Rodham Clinton,D,NY,,https://api.propublica.org/congress/v1/members...,,,A resolution honoring the victims of the bombi...,
3,,sres720-110,sres,https://api.propublica.org/congress/v1/110/bil...,"Senate Health, Education, Labor, and Pensions",110,https://www.congress.gov/bill/110th-congress/s...,2,"{'D': 1, 'R': 1}",,...,C001041,Hillary Rodham Clinton,D,NY,,https://api.propublica.org/congress/v1/members...,,,A resolution supporting the goals and ideals o...,
4,,s19-110,s,https://api.propublica.org/congress/v1/110/bil...,Senate Finance,110,https://www.congress.gov/bill/110th-congress/s...,0,{},,...,C001041,Hillary Rodham Clinton,D,NY,,https://api.propublica.org/congress/v1/members...,,,A bill to amend the Internal Revenue Code of 1...,
