In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import requests
import re

In [2]:
base = 'https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2017/'

In [3]:
departements = (
    pd
    .read_xml(
        f'{base}resultatsT1/index.xml',
        xpath='.//Departement',
        #encoding='latin1',
        dtype="object"
    )
)

departements

Unnamed: 0,CodDpt,CodMinDpt,CodDpt3Car,LibDpt,DateDerMaj,HeureDerMaj,DateDerExtract,HeureDerExtract,Complet
0,01,01,001,Ain,12-06-2017,12:49:01,26-06-2017,10:04:10,O
1,02,02,002,Aisne,11-06-2017,21:40:40,26-06-2017,10:04:14,O
2,03,03,003,Allier,12-06-2017,10:17:04,26-06-2017,10:04:14,O
3,04,04,004,Alpes-de-Haute-Provence,12-06-2017,11:47:57,26-06-2017,10:04:16,O
4,05,05,005,Hautes-Alpes,12-06-2017,11:40:19,26-06-2017,10:04:19,O
...,...,...,...,...,...,...,...,...,...
102,ZX,977,977,Saint-Martin/Saint-Barthélemy,11-06-2017,17:24:44,26-06-2017,10:14:26,O
103,ZW,986,986,Wallis et Futuna,11-06-2017,15:10:40,26-06-2017,10:14:26,O
104,ZP,987,987,Polynésie française,07-06-2017,00:09:13,26-06-2017,10:14:41,O
105,ZN,988,988,Nouvelle-Calédonie,11-06-2017,18:24:49,26-06-2017,10:14:42,O


In [4]:
circonscriptions = (
    pd.concat(
        [
            pd
            .read_xml(
                f'{base}resultatsT1/{c["CodDpt3Car"]}/{c["CodDpt3Car"]}CIR.xml',
                xpath='.//Circonscription',
                #encoding='latin1',
                dtype="object"
            )
            .assign(
                CodDpt3Car = c['CodDpt3Car']
            )
            for idx, c in tqdm(list(departements.iterrows()))
        ]
    )
)

circonscriptions

  0%|          | 0/107 [00:00<?, ?it/s]

Unnamed: 0,CodCirLg,Tours,CodDpt3Car
0,01,\n,001
1,02,\n,001
2,03,\n,001
3,04,\n,001
4,05,\n,001
...,...,...,...
6,07,\n,099
7,08,\n,099
8,09,\n,099
9,10,\n,099


In [5]:
def get_tour(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]//Candidat',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
                NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
                RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
                RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
        )
    except:
        print('error', url)
    
    return results

t1_resultats = (
    pd.concat(
        [
            get_tour(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_resultats

  0%|          | 0/577 [00:00<?, ?it/s]

Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNua,LibNua,NbVoix,RapportExprime,RapportInscrit,Elu,CodCirc,RapportExprimes,RapportInscrits
0,1,MALLET,Laurent,M.,MDM,Modem,13534,3389,1637,Bal.,00101,33.89,16.37
1,2,BLATRIX-CONTAT,Florence,Mme,SOC,Parti socialiste,3687,923,446,non,00101,9.23,4.46
2,3,LÉPAGNOT,Maude,Mme,EXG,Extrême gauche,293,073,035,non,00101,0.73,0.35
3,4,MARTIN ZEMLIK,Fabrine,Mme,FI,La France insoumise,3874,970,468,non,00101,9.70,4.68
4,5,BRETON,Xavier,M.,LR,Les Républicains,10693,2678,1293,Bal.,00101,26.78,12.93
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,10,AGUIAR,Jennifer,Mme,DIV,Divers,192,076,021,non,09911,0.76,0.21
10,11,ALNET,Myriem,Mme,DIV,Divers,74,029,008,non,09911,0.29,0.08
11,12,NESENSHON,Frédéric,M.,DVD,Divers droite,92,036,010,non,09911,0.36,0.10
12,13,COCHARD,Sébastien,M.,FN,Front National,733,289,079,non,09911,2.89,0.79


In [6]:
t1_resultats.to_csv('lg2017/t1_resultats.csv', index=False)

In [7]:
t2_resultats = (
    pd.concat(
        [
            get_tour(c,2)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t2_resultats

  0%|          | 0/577 [00:00<?, ?it/s]

error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2017/resultatsT2/056/05604.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2017/resultatsT2/075/07501.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2017/resultatsT2/080/08005.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2017/resultatsT2/986/98601.xml


Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNua,LibNua,NbVoix,RapportExprime,RapportInscrit,Elu,CodCirc,RapportExprimes,RapportInscrits
0,1,MALLET,Laurent,M.,MDM,Modem,15114,4625,1828,non,00101,46.25,18.28
1,5,BRETON,Xavier,M.,LR,Les Républicains,17564,5375,2125,oui,00101,53.75,21.25
0,6,BEGUET,Marie Jeanne,Mme,MDM,Modem,17319,4828,1852,non,00102,48.28,18.52
1,8,DE LA VERPILLIÈRE,Charles,M.,LR,Les Républicains,18556,5172,1984,oui,00102,51.72,19.84
0,4,GIVERNET,Olga,Mme,REM,La République en marche,16552,6186,2191,oui,00103,61.86,21.91
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,25,AÏCHI,Leila,Mme,DVD,Divers droite,4491,4034,417,non,09909,40.34,4.17
0,6,LAKRAFI,Amal Amélia,Mme,REM,La République en marche,12397,7125,1240,oui,09910,71.25,12.40
1,9,MARSAUD,Alain,M.,LR,Les Républicains,5002,2875,500,non,09910,28.75,5.00
0,9,MARIANI,Thierry,M.,LR,Les Républicains,5676,2828,612,non,09911,28.28,6.12


In [8]:
t2_resultats.to_csv('lg2017/t2_resultats.csv', index=False)

In [9]:
def get_inscrits(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]/Mentions/Inscrits',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
            #     # NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
            #     # RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
            #     # RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
            .rename({'Nombre': 'Inscrits'}, axis=1)
        )
    except e:
        print('error', url)
        print(e)
    
    return results

t1_inscrits = (
    pd.concat(
        [
            get_inscrits(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_inscrits

  0%|          | 0/577 [00:00<?, ?it/s]

Unnamed: 0,Inscrits,CodCirc
0,82694,00101
0,93520,00102
0,75614,00103
0,89390,00104
0,75359,00105
...,...,...
0,105955,09907
0,121399,09908
0,107796,09909
0,99374,09910


In [10]:
t1_inscrits.to_csv('lg2017/t1_inscrits.csv', index=False)