# Congressional Votes
- Pull Voting data by House Rep
- Join with Bill data on Bill ID
- Understand relationship between Bill sponsorship with party lines, ...

In [2]:
from urllib.request import urlopen
import bs4
import requests
from lxml import html
from lxml.cssselect import CSSSelector
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## House Bill Data
- Identify bill_action range of values
- 

In [103]:
xml_range = range(1,5)
bill_rollnum = []
bill_chamber = []
bill_action = []
bill_date = []
bill_voteurl = []

In [104]:
for i in xml_range:
    bill_url = 'https://www.govinfo.gov/bulkdata/BILLSTATUS/115/hr/BILLSTATUS-115hr' + str(i) + '.xml'
    bill_source = requests.get(bill_url)
    bill_ntree = html.document_fromstring(bill_source.content)
    
    bill_rollnum_text = bill_ntree.xpath('//recordedvote[descendant::chamber/text()="House"]/rollnumber/text()')
    bill_rollnum.append(bill_rollnum_text)
    bill_chamber_text = bill_ntree.xpath('//recordedvote/chamber[text()="House"]/text()')
    bill_chamber.append(bill_chamber_text)
    bill_action_text = bill_ntree.xpath('//recordedvote[descendant::chamber/text()="House"]/fullactionname/text()')
    bill_action.append(bill_action_text)
    bill_date_text = bill_ntree.xpath('//recordedvote[descendant::chamber/text()="House"]/date/text()')
    bill_date.append(bill_date_text)
    bill_voteurl_text = bill_ntree.xpath('//recordedvote[descendant::chamber/text()="House"]/url/text()')
    bill_voteurl.append(bill_voteurl_text)

In [105]:
bill_rollnumf = [val for sublist in bill_rollnum for val in sublist]
bill_chamberf = [val for sublist in bill_chamber for val in sublist]
bill_actionf = [val for sublist in bill_action for val in sublist]
bill_datef = [val for sublist in bill_date for val in sublist]
bill_voteurlf = [val for sublist in bill_voteurl for val in sublist]

In [107]:
bill_voteurlf

['http://clerk.house.gov/evs/2017/roll699.xml',
 'http://clerk.house.gov/evs/2017/roll692.xml',
 'http://clerk.house.gov/evs/2017/roll691.xml',
 'http://clerk.house.gov/evs/2017/roll654.xml',
 'http://clerk.house.gov/evs/2017/roll653.xml',
 'http://clerk.house.gov/evs/2017/roll637.xml',
 'http://clerk.house.gov/evs/2018/roll165.xml',
 'http://clerk.house.gov/evs/2018/roll164.xml']

In [123]:
votingdf = pd.DataFrame({'bill_rollnum':bill_rollnumf, 'bill_chamber':bill_chamberf, 'bill_action':bill_actionf, 
                         'bill_date':bill_datef, 'bill_voteurl':bill_voteurlf})

In [124]:
col_seq = ['bill_date','bill_rollnum','bill_chamber','bill_action','bill_voteurl']
votingdf = votingdf.reindex(columns=col_seq)

In [121]:
votingdf = ['bill_date','bill_rollnum','bill_chamber','bill_action','bill_voteurl']
votingdf

['bill_date', 'bill_rollnum', 'bill_chamber', 'bill_action', 'bill_voteurl']

# House Vote Data - By Rep

In [149]:
voteurl_list = votingdf.bill_voteurl.tolist()
vote_legisnum = []
vote_nameid = []
vote_rollnum = []
vote_record = []

In [129]:
voteurl_list

['http://clerk.house.gov/evs/2017/roll699.xml',
 'http://clerk.house.gov/evs/2017/roll692.xml',
 'http://clerk.house.gov/evs/2017/roll691.xml',
 'http://clerk.house.gov/evs/2017/roll654.xml',
 'http://clerk.house.gov/evs/2017/roll653.xml',
 'http://clerk.house.gov/evs/2017/roll637.xml',
 'http://clerk.house.gov/evs/2018/roll165.xml',
 'http://clerk.house.gov/evs/2018/roll164.xml']

In [151]:
for i in range(1,len(voteurl_list)):
    vote_url = '{}'.format(voteurl_list[i])
    vote_source = requests.get(vote_url)
    vote_ntree = html.document_fromstring(vote_source.content)
    
    vote_legisnum_text = vote_ntree.xpath('//legis-num/text()')
    vote_legisnum.append(vote_legisnum_text)
    vote_rollnum_text = vote_ntree.xpath('//rollcall-num/text()')
    vote_rollnum.append(vote_rollnum_text)
    vote_nameid_text = vote_ntree.xpath('//recorded-vote/legislator/@name-id')
    vote_nameid.append(vote_nameid_text)
    vote_record_text = vote_ntree.xpath('//recorded-vote/vote/text()')
    vote_record.append(vote_record_text)

In [152]:
votedf = pd.DataFrame({'vote_legisnum':vote_legisnum, 'vote_rollnum':vote_rollnum, 
                       'vote_nameid':vote_nameid, 'vote_record':vote_record})

In [154]:
col_seq = ['vote_legisnum','vote_rollnum','vote_nameid','vote_record']
votedf = votedf.reindex(columns=col_seq)

In [155]:
votedf

Unnamed: 0,vote_legisnum,vote_rollnum,vote_nameid,vote_record
0,[H R 1],[692],"[A000374, A000370, A000055, A000371, A000372, ...","[Yea, Nay, Yea, Nay, Yea, Yea, Yea, Yea, Yea, ..."
1,[H R 1],[691],"[A000374, A000370, A000055, A000371, A000372, ...","[Nay, Yea, Nay, Yea, Nay, Nay, Nay, Nay, Nay, ..."
2,[H R 1],[654],"[A000374, A000370, A000055, A000371, A000372, ...","[Nay, Yea, Nay, Yea, Nay, Nay, Nay, Nay, Nay, ..."
3,[H R 1],[653],"[A000374, A000370, A000055, A000371, A000372, ...","[Yea, Nay, Yea, Nay, Yea, Nay, Yea, Yea, Yea, ..."
4,[H R 1],[637],"[A000374, A000370, A000055, A000371, A000372, ...","[Yea, Nay, Yea, Nay, Yea, Yea, Yea, Yea, Yea, ..."
5,[H R 4],[165],"[A000374, A000370, A000055, A000371, A000372, ...","[Yea, Yea, Yea, Yea, Yea, Nay, Yea, Yea, Yea, ..."
6,[H R 4],[164],"[A000374, A000370, A000055, A000371, A000372, ...","[No, Aye, No, Aye, No, No, No, No, No, No, No,..."


In [169]:
votedf_flat = pd.melt(votedf, id_vars=['vote_nameid','vote_record'], 
                        value_vars=['vote_legisnum','vote_rollnum'])


In [180]:
def extend_iloc(votedf):
    cols_to_flatten = [colname for colname in votedf.columns if isinstance(votedf.iloc[0][colname], list)]
    # Row numbers to repeat 
    lens = votedf[cols_to_flatten[0]].apply(len)
    vals = range(votedf.shape[0])
    ilocations = np.repeat(vals, lens)
    # Replicate rows and add flattened column of lists
    with_idxs = [(i, c) for (i, c) in enumerate(votedf.columns) if c not in cols_to_flatten]
    col_idxs = list(zip(*with_idxs)[0])
    new_votedf = votedf.iloc[ilocations, col_idxs].copy()

    # Flatten columns of lists
    for col_target in cols_to_flatten:
        col_flat = [item for sublist in votedf[col_target] for item in sublist]
        new_votedf[col_target] = col_flat

    return new_votedf

In [188]:
votedf.iloc[0]

vote_legisnum                                              [H R 1]
vote_rollnum                                                 [692]
vote_nameid      [A000374, A000370, A000055, A000371, A000372, ...
vote_record      [Yea, Nay, Yea, Nay, Yea, Yea, Yea, Yea, Yea, ...
Name: 0, dtype: object

In [191]:
np.repeat(vals, lens)

NameError: name 'vals' is not defined

In [179]:
extend_iloc(votedf)

TypeError: 'zip' object is not subscriptable

In [165]:
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

In [160]:
type(matrix)

numpy.ndarray

In [166]:
matrix.flatten()

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
ntree.xpath('//recorded-vote/vote/text()')

['Yea',
 'Nay',
 'Yea',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Nay',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Nay',
 'Yea',
 'Yea',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Nay',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Not Voting',
 'Yea',
 'Nay',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Nay',
 'Nay',
 'Yea',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Nay',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Nay',
 'Yea',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Nay',
 'Nay',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Nay',
 'Yea',
 'Nay',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Yea',
 'Yea',
 'Yea',
 'Nay',
 'Yea',
 'Nay',
 'Nay',
 'Nay',
 'Yea',
 'Nay',
 

## House Bios

In [13]:
https://xml.house.gov/MemberData/MemberData.xml

SyntaxError: invalid syntax (<ipython-input-13-0714b07055fd>, line 1)

In [181]:
party = ntree.xpath('//party/text()')
yea = ntree.xpath('//yea-total/text()')
nay = ntree.xpath('//nay-total/text()')
present = ntree.xpath('//present-total/text()')
not_voting = ntree.xpath('//not-voting-total/text()')

party = [*party, 'Total']

In [182]:
congress = ntree.xpath('//congress/text()')
rollnum = ntree.xpath('//rollcall-num/text()')

congress = [*congress, *congress, *congress, *congress]
rollnum = [*rollnum, *rollnum, *rollnum, *rollnum]

In [184]:
votingdf = pd.DataFrame({'congress':congress, 'rollnum':rollnum, 'party':party, 'yea':yea, 'nay':nay, 'present':present, 'not_voting':not_voting})
votingdf = votingdf[['congress', 'rollnum', 'party', 'yea', 'nay', 'present', 'not_voting']]

In [185]:
votingdf

Unnamed: 0,congress,rollnum,party,yea,nay,present,not_voting
0,115,699,Republican,224,12,0,3
1,115,699,Democratic,0,189,0,4
2,115,699,Independent,0,0,0,0
3,115,699,Total,224,201,0,7


In [174]:
votingdf_flat = pd.melt(votingdf, id_vars=['congress','rollnum','party'], 
                        value_vars=['yea','nay','present','not_voting'], 
                        var_name='vote_type', value_name='vote_count')

In [None]:
votingdf_flat

In [186]:
votingdf_699 = votingdf

In [179]:
votingdf_692

Unnamed: 0,congress,rollnum,party,yea,nay,present,not_voting
0,115,692,Republican,227,12,0,0
1,115,692,Democratic,0,191,0,2
2,115,692,Independent,0,0,0,0
3,115,692,Total,227,203,0,2


In [187]:
votingdf_699

Unnamed: 0,congress,rollnum,party,yea,nay,present,not_voting
0,115,699,Republican,224,12,0,3
1,115,699,Democratic,0,189,0,4
2,115,699,Independent,0,0,0,0
3,115,699,Total,224,201,0,7
