In [1]:
import pymongo
import numpy as np

client = pymongo.MongoClient()

# This is the MongoDB database and collection where we've imported all the vote data.
votes = client.congress.votes

# Print a sample vote datum
from pprint import pprint
v = votes.find_one()
pprint(v)

{u'_id': ObjectId('594efd8be3bdea33acd1e6f1'),
 u'bill': {u'congress': 60, u'number': 23464, u'type': u'hr'},
 u'category': u'unknown',
 u'chamber': u'h',
 u'congress': 60,
 u'date': u'1909-02-24',
 u'date_unparsed': u'FEB. 24, 1909',
 u'number': 303,
 u'presidents_position': {u'option': u'Not Voting',
                          u'voteview_votecode_extra': None},
 u'question': u'TO RECEDE FROM HOUSE DISAGREEMENT TO SENATE AMENDMENT AND AGREE TO SAME WITH AN AMENDMENT TO H. R. 23464 WHICH AMENDMENT PROVIDES TO INCREASE THE SALARY OF THE PRESIDENT OF THE U.S. FROM $50,000 TO $75,000 PER ANNUM WITHOUT TRAVELING EXPENSES INSTEAD OF AGREEING TO SENATE AMENDMENT WHICH WOULD INCREASE SAME TO $100,000.',
 u'requires': u'unknown',
 u'result': u'unknown',
 u'session': u'2',
 u'source_url': u'http://www.voteview.com',
 u'type': u'TO RECEDE FROM HOUSE DISAGREEMENT TO SENATE AMENDMENT AND AGREE TO SAME WITH AN AMENDMENT TO H. R. 23464 WHICH AMENDMENT PROVIDES TO INCREASE THE SALARY OF THE PRESIDENT 

In [2]:
# Note: for convenience we will use 'bills' to refer to both bills and joint resolutions.
# Some handy information:
# https://github.com/unitedstates/congress/wiki/votes
#
# Possible categories:
# passage, passage-suspension, amendment, cloture, nomination, treaty, recommit,
# quorum, leadership, conviction, veto-override, procedural, or unknown

bill_votes = votes.find({'bill': {'$exists': True}})
print "Total votes with associated bills:", bill_votes.count()
print votes.count()

# Votes on passage of bills
h = votes.find({'bill.type': {'$in': ['hr', 'hjres', 's', 'sjres']}, 'category': 'passage'})
amendments = votes.find({'category': 'amendment'})
print "Number of votes on 'passage':", h.count()
print "Number of amendment votes: ", amendments.count()

h = votes.find({'bill.type': {'$in': ['hr', 'hjres', 's', 'sjres']}})
print "Number of votes on bills or joint res:", h.count()

Total votes with associated bills: 101845
104090
Number of votes on 'passage': 3351
Number of amendment votes:  9084
Number of votes on bills or joint res: 65954


In [3]:
# That's so few. What if we include the "unknown" category?
h = votes.find({'bill.type': {'$in': ['hr', 'hjres', 's', 'sjres']}, 'category': {'$in': ['passage', 'unknown']}})
print "Including unknown", h.count()

# Hmm. What does it mean to be 'unknown'?
v = votes.find_one({'bill.type': 'hr', 'bill.congress': {'$gt': 111}, 'category': 'unknown'})
pprint(v)

Including unknown 50574
{u'_id': ObjectId('594efdf7e3bdea33acd299ea'),
 u'bill': {u'congress': 114, u'number': 1599, u'type': u'hr'},
 u'category': u'unknown',
 u'chamber': u'h',
 u'congress': 114,
 u'date': u'2015-07-23T14:07:00-04:00',
 u'number': 463,
 u'question': u'On agreeing to the Polis of Colorado Amendment to the title: H R 1599 Safe and Accurate Food Labeling Act of 2015',
 u'requires': u'1/2',
 u'result': u'Failed',
 u'result_text': u'Failed',
 u'session': u'2015',
 u'source_url': u'http://clerk.house.gov/evs/2015/roll463.xml',
 u'subject': u'Safe and Accurate Food Labeling Act of 2015',
 u'type': u'On agreeing to the Polis of Colorado Amendment to the title',
 u'updated_at': u'2016-12-25T10:04:17-05:00',
 u'vote_id': u'h463-114.2015',
 u'votes': {u'Nay': [{u'display_name': u'Abraham',
                      u'id': u'A000374',
                      u'party': u'R',
                      u'state': u'LA'},
                     {u'display_name': u'Adams',
                      u

In [5]:
# Okay, let's try this.

x = votes.find({'bill.type': {'$in': ['hr', 'hjres', 's', 'sjres']}, 'category': 'passage'})
def bill_id(b):
    return "[%s%d-%d]" % (b['type'], b['congress'], b['number'])

def normalize(decision):
    table = {
        'Yea': 'Aye',
        'Aye': 'Aye',
        'No': 'Nay',
        'Nay': 'Nay',
        'Not Voting': 'Not Voting',
        'Present': 'Present'
    }
    return table.get(decision, 'Unknown')

def get_examples_for_decision(vote_data, vote_decision):
    members = vote_data['votes'].get(vote_decision, [])
    vote_id = vote_data['vote_id']
    vote_decision = normalize(vote_decision)
    return [(m['id'], vote_data['vote_id'], vote_decision) for m in members]
    
def vote_per_member(vote_data):
    for decision in vote_data['votes'].keys():
        for x in get_examples_for_decision(vote_data, decision):
            yield x
            
def get_votes(cursor, N=1):
    for _ in xrange(N):
        v = next(cursor)
        for x in vote_per_member(v):
            yield x
            
def all_decisions(cursor):
    decisions = set()
    for v in cursor:
        decisions.update(v['votes'].keys())
    return decisions

# print all_decisions(x)
for pt in get_votes(x):
    print pt

(u'A000109', u'h313-104.1996', 'Aye')
(u'A000215', u'h313-104.1996', 'Aye')
(u'A000217', u'h313-104.1996', 'Aye')
(u'B000013', u'h313-104.1996', 'Aye')
(u'B000078', u'h313-104.1996', 'Aye')
(u'B000072', u'h313-104.1996', 'Aye')
(u'B000104', u'h313-104.1996', 'Aye')
(u'B000169', u'h313-104.1996', 'Aye')
(u'B000179', u'h313-104.1996', 'Aye')
(u'B000208', u'h313-104.1996', 'Aye')
(u'B000213', u'h313-104.1996', 'Aye')
(u'B000220', u'h313-104.1996', 'Aye')
(u'B000229', u'h313-104.1996', 'Aye')
(u'B000403', u'h313-104.1996', 'Aye')
(u'B000461', u'h313-104.1996', 'Aye')
(u'B000463', u'h313-104.1996', 'Aye')
(u'B000556', u'h313-104.1996', 'Aye')
(u'B000586', u'h313-104.1996', 'Aye')
(u'B000589', u'h313-104.1996', 'Aye')
(u'B000617', u'h313-104.1996', 'Aye')
(u'B000622', u'h313-104.1996', 'Aye')
(u'B000817', u'h313-104.1996', 'Aye')
(u'B000953', u'h313-104.1996', 'Aye')
(u'B000996', u'h313-104.1996', 'Aye')
(u'B001063', u'h313-104.1996', 'Aye')
(u'B001066', u'h313-104.1996', 'Aye')
(u'B001135',