In [19]:
import json
import os
import glob

import requests
import pandas as pd

In [6]:
YEAR = 2018
LIMIT = 1000
BASE_URL = 'https://www.belpa.be/wsExportDataTable?limit={limit}&offset={offset}&lg=fr&budget_year=54&sort=none&&sortType=ASC&'

In [31]:
!mkdir -p data

In [18]:
offset = 0
while True:
    filename = 'data/be_raw_%s_%s.json' % (YEAR, offset)
    if os.path.exists(filename):
        offset += LIMIT
        continue
    print(offset)
    response = requests.get(BASE_URL.format(
        limit=LIMIT,
        offset=offset
    ))
    data = response.json()
    with open(filename, 'w') as f:
        json.dump(data['data'][0], f)
    if not data['pager']['nextAvalaible']:
        break
    offset += LIMIT

31000
32000
33000
34000
35000
36000
37000
38000


In [29]:


def parse_data(data):
    for amount_key in data['amount'].keys():
        if amount_key.endswith(('_total', '_feaga')):
            # ignore total and total of feaga (sub feaga amounts are present)
            continue
        scheme = amount_key.replace('field_mnt_', '')
        yield {
            'recipient_name': data['organisation']['label'],
            'recipient_postcode': data['organisation']['code_postal'],
            'recipient_location': data['organisation']['state'],
            'year': int(data['year']),
            'scheme': scheme,
            'amount': float(data['amount'][amount_key]),
            'currency': 'EUR',
            'country': 'BE',
            'recipient_id': 'BE-{}-{}'.format(YEAR, data['organisation']['id'])
        }

def get_data(year):
    for filename in glob.glob('data/be_raw_{}_*'.format(year)):
        with open(filename) as f:
            for x in json.load(f):
                yield from parse_data(x)
            
df = pd.DataFrame(get_data(2018))
df.head()

Unnamed: 0,amount,country,currency,recipient_id,recipient_location,recipient_name,recipient_postcode,scheme,year
0,-2505.23,BE,EUR,BE-2018-8045,Sombreffe,ELIARD ETIENNE - COULON SABINE EP.,5140,feader,2018
1,-2505.23,BE,EUR,BE-2018-8045,Sombreffe,ELIARD ETIENNE - COULON SABINE EP.,5140,vb1_6,2018
2,5840.87,BE,EUR,BE-2018-8045,Sombreffe,LOSSON MICHEL,5140,ii1,2018
3,241.41,BE,EUR,BE-2018-8045,Sombreffe,LOSSON MICHEL,5140,ii10,2018
4,3623.39,BE,EUR,BE-2018-8045,Sombreffe,LOSSON MICHEL,5140,ii3,2018


In [30]:
df.to_csv('be_{}.csv.gz'.format(YEAR), index=False, encoding='utf-8', compression='gzip')