## Prerequisite
Download http://parltrack.euwiki.org/dumps/ep_meps_current.json.xz and http://parltrack.euwiki.org/dumps/ep_votes.json.xz, unpack in `./tmp`.

##Extracting Votes information

Process the complete `ep_votes.20190501.json` file to extract the list of votes with the vote of each MEP.
This results in two files : 
 - `votes_summary.json` contains a map of `{ [vote_id: string]: {voteid, title, ts, issue_type, url, report} }`
 - `meps_votes.csv` contains a table with votes as columns and MEPs as rows (plus some MEPs details)

In [5]:
import ijson
import itertools
import json
import dateutil.parser
import datetime

EPOCH = datetime.datetime.utcfromtimestamp(0)
def unix_millis(dt):
    return (dt - EPOCH).total_seconds() * 1000.0

with open('meps_summary.json') as json_file:  
    meps = json.load(json_file)

with open('tmp/ep_votes.20190501.json') as votes_file:
    votes_iter = ijson.items(votes_file, 'item')

    # map of vote summary, i.e. { [vote_id:string] : {voteid, title, ts} }
    votes_list = {}
    
    # map of MEP votes, i.e. { [mep_id:string] : { [vote_id:string] : 0|1|-1 } }
    all_mep_votes = {}
    
    def gather_mep_votes(response_votes, response_value, ):
        # concatenate votes across groups
        all_votes = itertools.chain.from_iterable([group['votes'] for group in response_votes['groups']])
        for mep_vote in all_votes:
            # build all_mep_votes map as we go along - not all MEPs are mentioned in every vote
            mep_id = str(mep_vote['ep_id'])
            if mep_id not in all_mep_votes: 
                all_mep_votes[mep_id] = {}
            all_mep_votes[mep_id][vote['voteid']] = response_value      

    for idx, vote in enumerate(votes_iter):
        print ('\r > vote %s' % idx, end = '')

        votes_list[vote['voteid']] = {
            'voteid' : vote['voteid'],
            'title' : vote['title'],
            'issue_type' : vote.get('issue_type', None) ,
            'url' : vote['url'],
            'report' : vote.get('report', None),
            'ts' : unix_millis(dateutil.parser.parse(vote['ts']))
        }

        if 'Abstain' in vote: gather_mep_votes(vote['Abstain'], 0)
        if 'For' in vote: gather_mep_votes(vote['For'], 1)
        if 'Against' in vote: gather_mep_votes(vote['Against'], -1)
    
# Write votes_summary.json file 
with open('votes_summary2.json', 'w') as outfile:  
    json.dump(votes_list, outfile)
    
    
vote_ids = votes_list.keys()
# define CSV headers
all_mep_votes_table_headers = ['mep_id', 'group', 'country', 'active'] + vote_ids

mep_ids = meps.keys()

mep_votes_array = [all_mep_votes_table_headers] 
for mep_id in mep_ids:
    mep_row = [mep_id, meps[mep_id]['current_group'], meps[mep_id]['country'], meps[mep_id]['active']]
    mep_votes = [ (all_mep_votes[mep_id][vote_id] if (mep_id in all_mep_votes and vote_id in all_mep_votes[mep_id]) else None) for vote_id in vote_ids ] 
    mep_votes_array.append(mep_row + mep_votes)


# Write table of MEPs votes (votes as columns, one MEP per line) to meps_votes.csv file 
import csv
with open('meps_votes2.csv', 'w') as outfile:  
    wtr = csv.writer(outfile, delimiter=',')
    wtr.writerows (mep_votes_array)

TypeError: can only concatenate list (not "dict_keys") to list

In [32]:
# clean up memory
del meps
del votes_list
del all_mep_votes
del mep_votes_array