In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import requests
import re

In [2]:
base = 'https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/'

In [3]:
departements = (
    pd
    .read_xml(
        f'{base}resultatsT1/index.xml',
        xpath='.//Dpt',
        encoding='latin1',
        dtype="object"
    )
)

departements

Unnamed: 0,CodReg,CodReg3Car,CodDpt,CodDpt3Car,CodMinDpt,LibDpt,DateClotureDpt,HeureClotureDpt,Clos
0,82,082,01,001,01,AIN,12-06-2012,10:35:28,CLOS
1,22,022,02,002,02,AISNE,12-06-2012,10:35:31,CLOS
2,83,083,03,003,03,ALLIER,12-06-2012,10:35:32,CLOS
3,93,093,04,004,04,ALPES DE HAUTE PROVENCE,12-06-2012,10:35:33,CLOS
4,93,093,05,005,05,HAUTES ALPES,12-06-2012,10:35:34,CLOS
...,...,...,...,...,...,...,...,...,...
102,000,000,ZP,987,987,POLYNESIE FRANCAISE,12-06-2012,10:35:25,CLOS
103,000,000,ZS,975,975,SAINT PIERRE ET MIQUELON,12-06-2012,10:35:25,CLOS
104,000,000,ZW,986,986,WALLIS-ET-FUTUNA,12-06-2012,10:35:25,CLOS
105,000,000,ZX,977,977,SAINT-MARTIN/SAINT-BARTHELEMY,12-06-2012,10:35:26,CLOS


In [4]:
circonscriptions = (
    pd.concat(
        [
            pd
            .read_xml(
                f'{base}resultatsT1/{c["CodDpt3Car"]}/{c["CodDpt3Car"]}cir.xml',
                xpath='.//Circonscription',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodDpt3Car = c['CodDpt3Car']
            )
            for idx, c in tqdm(list(departements.iterrows()))
        ]
    )
)

circonscriptions

  0%|          | 0/107 [00:00<?, ?it/s]

Unnamed: 0,CodCirLg,Tour,CodDpt3Car
0,01,,001
1,02,,001
2,03,,001
3,04,,001
4,05,,001
...,...,...,...
6,07,,099
7,08,,099
8,09,,099
9,10,,099


In [5]:
def get_tour(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]//Candidat',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
                NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
                RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
                RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
        )
    except:
        print('error', url)
    
    return results

t1_resultats = (
    pd.concat(
        [
            get_tour(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_resultats

  0%|          | 0/577 [00:00<?, ?it/s]

Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNua,LibNua,NbVoix,RapportExprime,RapportInscrit,Elu,CodCirc,RapportExprimes,RapportInscrits
0,1,ALLOUACHE,Nadia,Mme,VEC,Europe-Ecologie-Les Verts,1161,247,146,non,00101,2.47,1.46
1,2,LEPAGNOT,Maude,Mme,EXG,Extrême gauche,141,030,017,non,00101,0.30,0.17
2,3,BAUDOUIN,Grégory,M.,DVD,Divers droite,296,063,037,non,00101,0.63,0.37
3,4,FLECHON,Jacques,M.,DVD,Divers droite,220,047,027,non,00101,0.47,0.27
4,5,PERRIN,Clément,M.,FN,Front National,7290,1549,922,non,00101,15.49,9.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,16,VILLARD,Marc,M.,SOC,Socialiste,5819,2665,734,Bal.,09911,26.65,7.34
16,17,BRETEAU,Sébastien,M.,DVD,Divers droite,1070,490,135,non,09911,4.90,1.35
17,18,BALLOUHEY,Claude,M.,FG,Front de gauche,403,185,050,non,09911,1.85,0.50
18,19,ARCIZET,Romain,M.,AUT,Autres,217,099,027,non,09911,0.99,0.27


In [6]:
t1_resultats.to_csv('lg2012/t1_resultats.csv', index=False)

In [7]:
t2_resultats = (
    pd.concat(
        [
            get_tour(c,2)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t2_resultats

  0%|          | 0/577 [00:00<?, ?it/s]

error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/006/00606.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/006/00607.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/008/00803.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/009/00901.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/015/01502.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/016/01601.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/016/01603.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/resultatsT2/019/01901.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/

Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNua,LibNua,NbVoix,RapportExprime,RapportInscrit,Elu,CodCirc,RapportExprimes,RapportInscrits
0,1,DEBAT,Jean-François,M.,SOC,Socialiste,22743,4841,2876,non,00101,48.41,28.76
1,2,BRETON,Xavier,M.,UMP,Union pour un Mouvement Populaire,24233,5159,3064,oui,00101,51.59,30.64
0,1,RAYMOND,Michel,M.,DVG,Divers gauche,19529,3876,2246,non,00102,38.76,22.46
1,2,DE LA VERPILLIERE,Charles,M.,UMP,Union pour un Mouvement Populaire,22327,4431,2568,oui,00102,44.31,25.68
2,3,EYRAUD,Olivier,M.,FN,Front National,8530,1693,981,non,00102,16.93,9.81
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,2,AMIRSHAHI,Pouria,M.,SOC,Socialiste,10851,6239,1117,oui,09909,62.39,11.17
0,1,CHAOUI,Jean-Daniel,M.,SOC,Socialiste,9631,4687,1054,non,09910,46.87,10.54
1,2,MARSAUD,Alain,M.,UMP,Union pour un Mouvement Populaire,10919,5313,1195,oui,09910,53.13,11.95
0,1,MARIANI,Thierry,M.,UMP,Union pour un Mouvement Populaire,10390,5215,1316,oui,09911,52.15,13.16


In [8]:
t2_resultats.to_csv('lg2012/t2_resultats.csv', index=False)

In [9]:
def get_inscrits(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]/Mentions/Inscrits',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
            #     # NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
            #     # RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
            #     # RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
            .rename({'Nombre': 'Inscrits'}, axis=1)
        )
    except e:
        print('error', url)
        print(e)
    
    return results

t1_inscrits = (
    pd.concat(
        [
            get_inscrits(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_inscrits

  0%|          | 0/577 [00:00<?, ?it/s]

Unnamed: 0,Inscrits,CodCirc
0,79066,00101
0,86982,00102
0,69837,00103
0,84591,00104
0,73718,00105
...,...,...
0,89033,09907
0,109411,09908
0,96769,09909
0,91600,09910


In [10]:
t1_inscrits.to_csv('lg2012/t1_inscrits.csv', index=False)