In [192]:
import requests
from bs4 import BeautifulSoup

# get the the main page for a list of communes
url_root = "https://elections.public.lu"
# specify the year
year = "2018"

r = requests.get(url_root + "/fr/elections-legislatives/" + year + "/resultats.html")

In [193]:
# get the good parts with the name and url of each commune
soup = BeautifulSoup(r.text, 'html.parser')

coms = soup.find(id="communes").find_all("li", class_="town")
    
com_results = {}

for com in coms:
    #print(com.prettify())
    com_name = com.a.text
    com_url  = url_root + com.a['href']
    
    com_results[com_name] = {"url": com_url}

In [194]:
party_names_list = []

# Go through each commune
for com_name in com_results:
    print(com_name)
    
    # 'pass' by reference
    com = com_results[com_name]

    # get the page
    cr = requests.get(com['url'])
    csoup = BeautifulSoup(cr.text, 'html.parser')

    cresults = csoup.find("section", id="results")
    # get the commune's election results (cer)
    cer_subtable = cresults.find_all("tbody", class_="suffrages-parti")

    com["Results"] = {}
    
    # cer_subtable length is equal to the number of parties in the commune    
    for party_res in cer_subtable:
        party_name = party_res.find(class_="suffrages-parti-name").text
        
        if party_name not in party_names_list:
            party_names_list.append(party_name)
        
        # We will only retrieve the first row that contains the totals
        # for the party, not the individual breakdown per candidate
        # see party_res.find_all("tr")[1]
        p_values = party_res.find_all("tr")[0].find_all("td")
        votes_list = int(p_values[0].text.replace(' ', ''))
        votes_candidate = int(p_values[1].text.replace(' ', ''))
        votes_total = int(p_values[2].text.replace(' ', ''))

        com["Results"][party_name] = {"vot_list": votes_list, "vot_cand": votes_candidate, "vot_total": votes_total}
    
    # go through each section of the statistics
    for data_section in csoup.find_all("div", class_="lux-number"):
        heading = data_section.h3.text

        if heading == "Bureaux":
            stats = data_section.find_all("li")
            
            ps_count = int(stats[0].span.text.replace(' ', ''))
            ps_lv_only = int(stats[1].span.text.replace(' ', ''))
            ps_complete = int(stats[1].span.text.replace(' ', ''))

            com[heading] = {"ps_count": ps_count, "ps_lv_only": ps_lv_only, "ps_complete": ps_complete}

        if heading == "Candidats":
            stats = data_section.find_all("li")

            cand_total = int(stats[0].span.text.replace(' ', ''))
            cand_women = int(stats[1].span.text.replace(' ', ''))
            cand_men = int(stats[2].span.text.replace(' ', ''))

            com[heading] = {"cand_women": cand_women, "cand_men": cand_men, "cand_total": cand_total}

        if heading == "Electeurs":
            stats = data_section.find_all("li")
            
            el_registered = int(stats[0].span.text.replace(' ', ''))
            
            # next item is present in 2023, not 2018, and ...
            if len(stats) > 1:
                el_postal_ballots = int(stats[1].span.text.replace(' ', ''))
            else:
                el_postal_ballots = -999
            
            com[heading] = {"el_registered": el_registered, "el_postal_ballots": el_postal_ballots}

        if heading == "Bulletins":
            stats = data_section.find_all("li")

            bl_in_box = int(stats[0].span.text.replace(' ', ''))
            bl_valid = int(stats[1].span.text.replace(' ', ''))
            bl_blank = int(stats[2].span.text.replace(' ', ''))
            bl_invalid = int(stats[3].span.text.replace(' ', ''))
            bl_postal = int(stats[4].span.text.replace(' ', ''))

            com[heading] = {"bl_in_box": bl_in_box, "bl_valid": bl_valid, "bl_blank": bl_blank,
                            "bl_invalid": bl_invalid, "bl_postal": bl_postal}

        if heading == "Suffrages":
            stats = data_section.find_all("li")

            vot_count = int(stats[0].span.text.replace(' ', ''))
            vot_total_cast = int(stats[1].span.text.replace(' ', ''))

            com[heading] = {"vot_count": vot_count, "vot_total_cast": vot_total_cast}
            
        # end of sections
        
    # end of commune
    

Beaufort
Bech
Beckerich
Berdorf
Bertrange
Bettembourg
Bettendorf
Betzdorf
Bissen
Biwer
Boulaide
Bourscheid
Bous
Clervaux
Colmar-Berg
Consdorf
Contern
Dalheim
Diekirch
Differdange
Dippach
Dudelange
Echternach
Ell
Erpeldange-sur-Sûre
Esch-sur-Alzette
Esch-sur-Sûre
Ettelbruck
Feulen
Fischbach
Flaxweiler
Frisange
Garnich
Goesdorf
Grevenmacher
Grosbous
Habscht
Heffingen
Helperknapp
Hesperange
Junglinster
Käerjeng
Kayl
Kehlen
Kiischpelt
Koerich
Kopstal
Lac de la Haute-Sûre
Larochette
Lenningen
Leudelange
Lintgen
Lorentzweiler
Luxembourg
Mamer
Manternach
Mersch
Mertert
Mertzig
Mondercange
Mondorf-les-Bains
Niederanven
Nommern
Parc Hosingen
Pétange
Préizerdaul
Putscheid
Rambrouch
Reckange-sur-Mess
Redange/Attert
Reisdorf
Remich
Roeser
Rosport-Mompach
Rumelange
Saeul
Sandweiler
Sanem
Schengen
Schieren
Schifflange
Schuttrange
Stadtbredimus
Steinfort
Steinsel
Strassen
Tandel
Troisvierges
Useldange
Vallée de l'Ernz
Vianden
Vichten
Wahl
Waldbillig
Waldbredimus
Walferdange
Weiler-la-Tour
Weiswampach

In [195]:
# Save data in JSON format
import json

json_object = json.dumps(com_results, indent=4)
with open("data/" + year + "_legislative_election_results.json", "w") as fh:
    fh.write(json_object)

In [196]:
# Save data in CSV format
with open("data/" + year + "_legislative_election_results.csv", "w") as fh:
    #print(com_results)
    header = True
    headerline = ["com_name"]
    
    for com_name in com_results:
        # remove the source URL
        if 'url' in com_results[com_name]:
            del com_results[com_name]['url']

        combined = [com_name]
        for section in com_results[com_name]:
            if header:
                headerline += com_results[com_name][section]
                
            # need to handle with care, make sure party values are correctly aligned
            if section == "Results":
                for pname in party_names_list:
                    if pname in com_results[com_name][section]:
                        combined.append(com_results[com_name][section][pname]["vot_total"])
                    else:
                        combined.append(0)
            else:
                combined += com_results[com_name][section].values()
            
        # write header
        if header:
            header = False
            fh.write(','.join(headerline) + '\n')
        
        # write line of data
        fh.write(','.join(str(x) for x in combined) + '\n')
