## Prerequisite
Download http://parltrack.euwiki.org/dumps/ep_meps_current.json.xz and http://parltrack.euwiki.org/dumps/ep_votes.json.xz, unpack in `./tmp`.


## Extracting MEPs information

Process the complete `./tmp/ep_meps_current.20190501.json` file to extract the list of MEPs with relevant details, mapped by mep_id, i.e. :
```
   MEP_summary = 
       {
         mep_id : string,
         name : string,
         surname : string,
         active : boolean,
         current_group : string,
         current_constituency : string,
         country : string,
         gender : string,
         picture : string,
         birthdate : number
       }
   result = { [mep_id: string]: MEP_summary; }
```

Resulting map is saved to `./computed/meps_summary.json` .

In [69]:
import ijson
import itertools
import json
import dateutil.parser
import datetime

EPOCH = datetime.datetime.utcfromtimestamp(0)
def unix_millis(dt):
    return (dt - EPOCH).total_seconds() * 1000.0

with open('tmp/ep_meps_current.20190501.json') as ep_meps_current_file:  
    meps_iter = ijson.items(ep_meps_current_file, 'item')
    
    # Convert to map of 
    #   {
    #     'mep_id' : string,
    #     'name' : string,
    #     'surname' : string,
    #     'active' : boolean,
    #     'current_group' : string,
    #     'current_constituency' : string,
    #     'country' : string,
    #     'gender' : string,
    #     'picture' : string,
    #     'birthdate' : number
    #   }

    meps = {}
    for idx, mep in enumerate(meps_iter):
            print '\r > processing mep ',idx,

            # sort groups by date, desc
            groups = ( sorted(mep['Groups'], key=lambda g: dateutil.parser.parse(g['end']), reverse=True) ) if ('Groups' in mep) else None 

            # sort constituencies by date, desc
            constituencies = ( sorted(mep['Constituencies'], key=lambda c: dateutil.parser.parse(c['end']) if (c and 'end' in c) else EPOCH,reverse=True)) if ('Constituencies' in mep) else None 
            
            mep_newobj = {
                'mep_id': mep['UserID'],
                'name': mep['Name']['family'],
                'surname': mep['Name']['sur'],
                'active': mep['active'],
                'current_group': groups and groups[0]['groupid'],
                'current_constituency' : constituencies and constituencies[0]['party'],
                'country' : groups and groups[0].get('country', None),
                'gender' : mep.get('Gender', None),
                'birthdate' : unix_millis(dateutil.parser.parse(mep['Birth']['date'])) if 'Birth' in mep else None,
                'picture' : mep['Photo'],
                'email' : mep.get('Mail', None),
                'eu_homepage' : mep['meta']['url'],   
            }
            meps[mep['UserID']] = mep_newobj
        
with open('computed/meps_summary.json', 'w') as outfile:  
    json.dump(meps, outfile)


 > processing mep  61

KeyboardInterrupt: 

Extracting Votes information
============================

Process the complete `ep_votes.20190501.json` file to extract the list of votes with the vote of each MEP.
```
   MEP_summary = 
       {
         mep_id : string,
         name : string,
         surname : string,
         active : boolean,
         current_group : string,
         current_constituency : string,
         country : string,
         gender : string,
         picture : string,
         birthdate : number
       }
   result = { [mep_id: string]: MEP_summary; }
```

Resulting map is saved to `meps_summary.json` .

In [83]:
with open('tmp/ep_votes.20190501.json') as votes_file:
    votes_iter = ijson.items(votes_file, 'item')

    # map of vote summary, i.e. { [vote_id:string] : {voteid, title, ts} }
    votes_list = {}
    
    # map of MEP votes, i.e. { [mep_id:string] : { [vote_id:string] : 0|1|-1 } }
    all_mep_votes = {}
    
    def gather_mep_votes(response_votes, response_value, ):
        # concatenate votes across groups
        all_votes = itertools.chain.from_iterable([group['votes'] for group in response_votes['groups']])
        for mep_vote in all_votes:
            # build all_mep_votes map as we go along - not all MEPs are mentioned in every vote
            if mep_vote['ep_id'] not in all_mep_votes: 
                all_mep_votes[mep_vote['ep_id']] = {}
            all_mep_votes[mep_vote['ep_id']][vote['voteid']] = response_value      

    for idx, vote in enumerate(votes_iter):
        print '\r > vote ',idx,

        votes_list[vote['voteid']] = { k: vote[k] for k in ['voteid', 'title', 'ts'] }

        if 'Abstain' in vote: gather_mep_votes(vote['Abstain'], 0)
        if 'For' in vote: gather_mep_votes(vote['For'], 1)
        if 'Against' in vote: gather_mep_votes(vote['Against'], -1)
        if idx > 10: break
    
    
# Write votes_summary.json file 
with open('computed/votes_summary.json', 'w') as outfile:  
    json.dump(votes_list, outfile)
    
    
vote_ids = votes_list.keys()
# define CSV headers
all_mep_votes_table_headers = ['mep_id', 'group', 'country'] + vote_ids

mep_ids = all_mep_votes.keys()
print meps.keys()
print [ meps[mep_id]['country'] for mep_id in mep_ids ]
mep_votes_array = [all_mep_votes_table_headers] + [ 
    ([mep_id, meps[mep_id]['current_group'], meps[mep_id]['country']] + 
     [ (all_mep_votes[mep_id][vote['voteid']] if vote['voteid'] in all_mep_votes[mep_id] else None) for vote in vote_ids ] )
    for mep_id in mep_ids]


# Write table of MEPs votes (votes as columns, one MEP per line) to meps_votes.csv file 
import csv
with open('computed/meps_votes.csv', 'w') as outfile:  
    wtr = csv.writer(outfile, delimiter=',')
    for x in arr : wtr.writerows (mep_votes_array)

 > vote  11 [2307, 28422, 110984, 23788, 4746, 110987, 28174, 941, 2195, 96750, 39321, 108570, 33775, 96732, 110365, 5537, 4388, 111017, 111018, 41003, 33964, 1965, 96814, 111496, 96946, 28214, 111033, 23866, 96702, 97344, 5956, 107977, 96846, 109649, 96850, 2131, 2202, 4537, 99416, 1380, 96986, 99419, 111068, 96862, 96992, 96738, 103132, 105192, 28137, 39916, 124782, 111026, 97009, 96659, 23413, 108329, 105849, 107386, 97021, 111019, 1941]


KeyError: 124928