In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import requests
import re

In [2]:
base = 'https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/'

In [3]:
departements = (
    pd
    .read_xml(
        f'{base}resultatsT1/index.xml',
        xpath='.//Dpt',
        encoding='latin1',
        dtype="object"
    )
)

departements

Unnamed: 0,CodReg,CodReg3Car,CodDpt,CodDpt3Car,CodMinDpt,LibDpt,DateClotureDpt,HeureClotureDpt,Clos
0,82,082,01,001,01,AIN,05-03-2012,14:41:02,CLOS
1,22,022,02,002,02,AISNE,05-03-2012,14:41:02,CLOS
2,83,083,03,003,03,ALLIER,05-03-2012,14:41:02,CLOS
3,93,093,04,004,04,ALPES DE HAUTE PROVENCE,05-03-2012,14:41:02,CLOS
4,93,093,05,005,05,HAUTES ALPES,05-03-2012,14:41:02,CLOS
...,...,...,...,...,...,...,...,...,...
100,000,000,ZM,976,976,MAYOTTE,05-03-2012,14:41:02,CLOS
101,000,000,ZN,988,988,NOUVELLE CALEDONIE,05-03-2012,14:41:02,CLOS
102,000,000,ZP,987,987,POLYNESIE FRANCAISE,05-03-2012,14:41:02,CLOS
103,000,000,ZS,975,975,SAINT PIERRE ET MIQUELON,05-03-2012,14:41:02,CLOS


In [4]:
circonscriptions = (
    pd.concat(
        [
            pd
            .read_xml(
                f'{base}resultatsT1/{c["CodDpt3Car"]}/{c["CodDpt3Car"]}cir.xml',
                xpath='.//Circonscription',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodDpt3Car = c['CodDpt3Car']
            )
            for idx, c in tqdm(list(departements.iterrows()))
        ]
    )
)

circonscriptions

  0%|          | 0/105 [00:00<?, ?it/s]

Unnamed: 0,CodCirLg,Tour,CodDpt3Car
0,01,,001
1,02,,001
2,03,,001
3,04,,001
0,01,,002
...,...,...,...
1,02,,988
0,01,,987
1,02,,987
0,01,,975


In [5]:
def get_tour(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]//Candidat',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
                NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
                RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
                RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
        )
    except:
        print('error', url)
    
    return results

t1_resultats = (
    pd.concat(
        [
            get_tour(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_resultats

  0%|          | 0/577 [00:00<?, ?it/s]

Unnamed: 0,NomPsn,PrenomPsn,CivilitePsn,CodNua,LibNua,NbVoix,RapportExprime,RapportInscrit,Elu,CodCirc,RapportExprimes,RapportInscrits
0,DUTHU,Monique,Mme,VEC,Les Verts,1411,275,162,non,00101,2.75,1.62
1,CATHERIN,Marie-Lucienne,Mme,FN,Front national,2086,406,240,non,00101,4.06,2.40
2,DEBAT,Jean-François,M.,SOC,Socialiste,16415,3198,1895,Bal.,00101,31.98,18.95
3,HURET,Odile,Mme,DIV,Divers,448,087,051,non,00101,0.87,0.51
4,GUENARD-GERBAUD,Carole,Mme,EXG,Extrême gauche,1162,226,134,non,00101,2.26,1.34
...,...,...,...,...,...,...,...,...,...,...,...,...
0,LIKUVALU,Albert,M.,SOC,Socialiste,2424,3114,2172,Bal.,98601,31.14,21.72
1,BRIAL,Victor,M.,UMP,Union pour un Mouvement Populaire,2625,3372,2352,Bal.,98601,33.72,23.52
2,ILALIO,Atonio,M.,UDFD,UDF-Mouvement Démocrate,973,1250,871,non,98601,12.50,8.71
3,TAPUTAI,Pesamino,M.,UDFD,UDF-Mouvement Démocrate,661,849,592,non,98601,8.49,5.92


In [6]:
t1_resultats.to_csv('lg2007/t1_resultats.csv', index=False)

In [None]:
t2_resultats = (
    pd.concat(
        [
            get_tour(c,2)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t2_resultats

  0%|          | 0/577 [00:00<?, ?it/s]

error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/001/00102.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/001/00104.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/002/00202.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/006/00602.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/006/00603.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/006/00604.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/006/00605.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/resultatsT2/006/00606.xml
error https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2007/

In [None]:
t2_resultats.to_csv('lg2007/t2_resultats.csv', index=False)

In [None]:
def get_inscrits(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]/Mentions/Inscrits',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
            #     # NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
            #     # RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
            #     # RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
            .rename({'Nombre': 'Inscrits'}, axis=1)
        )
    except e:
        print('error', url)
        print(e)
    
    return results

t1_inscrits = (
    pd.concat(
        [
            get_inscrits(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_inscrits

In [None]:
t1_inscrits.to_csv('lg2007/t1_inscrits.csv', index=False)