In [11]:
import json
import os
import glob

import requests
import pandas as pd

In [12]:
YEAR = 2022
BUDGET_YEAR = 300  # check on website
LIMIT = 1000
BASE_URL = 'https://www.belpa.be/wsExportDataTable?limit={limit}&offset={offset}&lg=fr&budget_year={budget_year}&sort=none&&sortType=ASC&'

In [13]:
!mkdir -p data

In [14]:
offset = 0
while True:
    filename = 'data/be_raw_%s_%s.json' % (YEAR, offset)
    if os.path.exists(filename):
        offset += LIMIT
        continue
    print(offset)
    url = BASE_URL.format(
        limit=LIMIT,
        offset=offset,
        budget_year=BUDGET_YEAR
    )
    print(url)
    response = requests.get(url)
    data = response.json()
    with open(filename, 'w') as f:
        json.dump(data['data'][0], f)
    if not data['pager']['nextAvalaible']:
        break
    offset += LIMIT

38000
https://www.belpa.be/wsExportDataTable?limit=1000&offset=38000&lg=fr&budget_year=300&sort=none&&sortType=ASC&


In [15]:
def parse_data(data):
    for amount_key in data['amount'].keys():
        if amount_key.endswith(('_total', '_feaga')):
            # ignore total and total of feaga (sub feaga amounts are present)
            continue
        scheme = amount_key.replace('field_mnt_', '')
        yield {
            'recipient_name': data['organisation']['label'],
            'recipient_postcode': data['organisation']['code_postal'],
            'recipient_location': data['organisation']['state'],
            'year': int(data['year']),
            'scheme': scheme,
            'amount': float(data['amount'][amount_key]),
            'currency': 'EUR',
            'country': 'BE',
            'recipient_id': 'BE-{}-{}'.format(YEAR, data['organisation']['id'])
        }

def get_data(year):
    for filename in glob.glob('data/be_raw_{}_*'.format(year)):
        with open(filename) as f:
            for x in json.load(f):
                yield from parse_data(x)
            
df = pd.DataFrame(get_data(YEAR))
df.head()

Unnamed: 0,recipient_name,recipient_postcode,recipient_location,year,scheme,amount,currency,country,recipient_id
0,CORNET ALBERT,BE-6987,Rendeux,2022,feader,293.5,EUR,BE,BE-2022-8282
1,CORNET ALBERT,BE-6987,Rendeux,2022,ii1,476.99,EUR,BE,BE-2022-8282
2,CORNET ALBERT,BE-6987,Rendeux,2022,ii3,722.01,EUR,BE,BE-2022-8282
3,CORNET ALBERT,BE-6987,Rendeux,2022,ii4,454.09,EUR,BE,BE-2022-8282
4,CORNET ALBERT,BE-6987,Rendeux,2022,iv18,293.5,EUR,BE,BE-2022-8282


In [16]:
df.to_csv('be_{}.csv.gz'.format(YEAR), index=False, encoding='utf-8', compression='gzip')

In [17]:
len(df)

171625