In [1]:
import requests
from lxml import html

import pandas as pd

In [2]:
YEARS = [2017]

In [3]:
BASE_URL = 'http://saturn.etat.lu/mafea/result.do'
ENCODING = 'ISO-8859-1'

In [4]:
response = requests.get(BASE_URL)
root = html.fromstring(response.text)
COMMUNES = root.xpath('//select[@name="commune"]/option/@value')
AMOUNTS = root.xpath('//select[@name="value"]/option/@value')

In [5]:
def get_queries():
    for year in YEARS:
        for c in COMMUNES:
            yield (year, c,)


def download_year(year, commune):
    print('Running with', year, commune)
    session = requests.Session()

    for amount_range in AMOUNTS:
        response = session.post(BASE_URL, data={
            'year': str(year),
            'fond': '',
            'commune': commune,
            'name': '',
            'value': amount_range,
            'action': 'Rechercher'
        })
        response.encoding = ENCODING
        if 'Votre recherche ne retourne aucun résultat' in response.text:
            continue
        try:
            yield from extract_recipients(year, commune, amount_range, html.fromstring(response.text))
        except Exception as e:
            print('Exception at', year, commune, amount_range)
            raise e
    print('Done with', year, commune)

def extract_recipients(year, commune, amount_range, dom):
    for div in dom.xpath('.//div[@class="fieldsetWrapper"]'):
        results = div.xpath('.//table[@class="results"]//tr[@class="results"]')
        name_row = results[0]
        name_tds = name_row.xpath('./td/text()')
        if len(name_tds) == 3:
            name = '%s %s' % (name_tds[1], name_tds[0])
            location = name_tds[2]
            recipient_id = None
        else:
            name = None
            recipient_id = name_tds[0]
            location = name_tds[1]

        if recipient_id is None:
            recipient_id = 'LU-%s-%s' % (location, name)
        else:
            recipient_id = 'LU-%s-%s' % (year, recipient_id)

        value_rows = results[1:-1]
        for value_row in value_rows:
            val = value_row.xpath('./td/text()')
            scheme = val[1]
            amount = float(val[2].replace('€', ''))
            year = int(val[0])
            yield {
                'recipient_id': recipient_id,
                'recipient_name': name,
                'recipient_postcode': None,
                'recipient_location': location,
                'scheme': scheme,
                'amount': amount,
                'currency': 'EUR',
                'country': 'LU',
                'year': year,
            }

In [6]:
df = pd.DataFrame()
for year, commune in get_queries():
    df = pd.concat([df, pd.DataFrame(download_year(year, commune))])
    print(len(df))
    df.to_csv('lu_%s.csv.gz' % year, compression='gzip', index=False)

Running with 2017 (B) Attert
Done with 2017 (B) Attert
1
Running with 2017 (B) ARLON
Done with 2017 (B) ARLON
6
Running with 2017 (B) Bastogne
Done with 2017 (B) Bastogne
8
Running with 2017 (B) Bertogne
Done with 2017 (B) Bertogne
10
Running with 2017 (B) Burg-Reuland
Done with 2017 (B) Burg-Reuland
15
Running with 2017 (B) BASTOGNE
Done with 2017 (B) BASTOGNE
17
Running with 2017 (B) Messancy
Done with 2017 (B) Messancy
22
Running with 2017 (B) NAMUR
Done with 2017 (B) NAMUR
24
Running with 2017 (D) Bollendorf
Done with 2017 (D) Bollendorf
26
Running with 2017 (D) BITBURG-PRÜM
Done with 2017 (D) BITBURG-PRÜM
26
Running with 2017 (D) Schleiden
Done with 2017 (D) Schleiden
29
Running with 2017 (D) Trier-Land
Done with 2017 (D) Trier-Land
35
Running with 2017 (D) TRIER-SAARBURG
Done with 2017 (D) TRIER-SAARBURG
43
Running with 2017 (F) Berg-sur-Moselle
Done with 2017 (F) Berg-sur-Moselle
45
Running with 2017 (F) DÉPARTEMENT MOSELLE
Done with 2017 (F) DÉPARTEMENT MOSELLE
45
Running with 

In [7]:
df.head()

Unnamed: 0,amount,country,currency,recipient_id,recipient_location,recipient_name,recipient_postcode,scheme,year
0,21246.32,LU,EUR,LU-(B) Attert-ASSISTANCE TECHNIQUE STEFFEN MAT...,(B) Attert,ASSISTANCE TECHNIQUE STEFFEN MATHIEU,,IV/A.25,2017
0,128.29,LU,EUR,LU-2017-B0025,(B) ARLON,,,II.1,2017
1,19.6,LU,EUR,LU-2017-B0025,(B) ARLON,,,II.4,2017
2,126.36,LU,EUR,LU-2017-B0025,(B) ARLON,,,II.6,2017
3,374.44,LU,EUR,LU-2017-B0163,(B) ARLON,,,II.1,2017
