In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import requests
import re

In [2]:
base = 'https://www.archives-resultats-elections.interieur.gouv.fr/telechargements/LG2012/'

In [3]:
departements = (
    pd
    .read_xml(
        f'{base}resultatsT1/index.xml',
        xpath='.//Dpt',
        encoding='latin1',
        dtype="object"
    )
)

departements

Unnamed: 0,CodReg,CodReg3Car,CodDpt,CodDpt3Car,CodMinDpt,LibDpt,DateClotureDpt,HeureClotureDpt,Clos
0,82,082,01,001,01,AIN,12-06-2012,10:35:28,CLOS
1,22,022,02,002,02,AISNE,12-06-2012,10:35:31,CLOS
2,83,083,03,003,03,ALLIER,12-06-2012,10:35:32,CLOS
3,93,093,04,004,04,ALPES DE HAUTE PROVENCE,12-06-2012,10:35:33,CLOS
4,93,093,05,005,05,HAUTES ALPES,12-06-2012,10:35:34,CLOS
...,...,...,...,...,...,...,...,...,...
102,000,000,ZP,987,987,POLYNESIE FRANCAISE,12-06-2012,10:35:25,CLOS
103,000,000,ZS,975,975,SAINT PIERRE ET MIQUELON,12-06-2012,10:35:25,CLOS
104,000,000,ZW,986,986,WALLIS-ET-FUTUNA,12-06-2012,10:35:25,CLOS
105,000,000,ZX,977,977,SAINT-MARTIN/SAINT-BARTHELEMY,12-06-2012,10:35:26,CLOS


In [None]:
circonscriptions = (
    pd.concat(
        [
            pd
            .read_xml(
                f'{base}resultatsT1/{c["CodDpt3Car"]}/{c["CodDpt3Car"]}cir.xml',
                xpath='.//Circonscription',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodDpt3Car = c['CodDpt3Car']
            )
            for idx, c in tqdm(list(departements.iterrows()))
        ]
    )
)

circonscriptions

  0%|          | 0/107 [00:00<?, ?it/s]

In [None]:
def get_tour(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]//Candidat',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
                NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
                RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
                RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
        )
    except:
        print('error', url)
    
    return results

t1_resultats = (
    pd.concat(
        [
            get_tour(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_resultats

In [None]:
t1_resultats.to_csv('lg2012/t1_resultats.csv', index=False)

In [None]:
t2_resultats = (
    pd.concat(
        [
            get_tour(c,2)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t2_resultats

In [None]:
t2_resultats.to_csv('lg2012/t2_resultats.csv', index=False)

In [None]:
def get_inscrits(c, tour):
    results = None
    url = f'{base}resultatsT{tour}/{c.CodDpt3Car}/{c.CodDpt3Car}{c.CodCirLg}.xml'
    try:
        results = (
            pd
            .read_xml(
                url,
                xpath=f'.//Tour[./NumTour = {tour}]/Mentions/Inscrits',
                encoding='latin1',
                dtype="object"
            )
            .assign(
                CodCirc = c['CodDpt3Car'] + c['CodCirLg'],
            #     # NbVoix = lambda df: df.NbVoix.str.replace(' ','').astype(int),
            #     # RapportExprimes = lambda df: df.RapportExprime.str.replace(' ','').str.replace(',','.').astype(float),
            #     # RapportInscrits = lambda df: df.RapportInscrit.str.replace(' ','').str.replace(',','.').astype(float)
            )
            .rename({'Nombre': 'Inscrits'}, axis=1)
        )
    except:
        print('error', url)
    
    return results

t1_inscrits = (
    pd.concat(
        [
            get_inscrits(c,1)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t1_inscrits

In [None]:
t1_inscrits.to_csv('lg2012/t1_inscrits.csv', index=False)

In [None]:
t2_inscrits = (
    pd.concat(
        [
            get_inscrits(c,2)
            for idx, c in tqdm(list(circonscriptions.iterrows()))
        ]
    )
)

t2_inscrits

In [None]:
t2_inscrits.to_csv('lg2012/t2_inscrits.csv', index=False)