In [1]:
import pandas as pd
import numpy as np
import requests
from tqdm.notebook import tqdm
from io import StringIO

## prépare un mapping de l'index des circonscriptions de 2024 pour l'aligner avec celui de 2022

In [2]:
mapping_dept = (
    pd
    .read_csv('lg2022_circonscriptions.csv')
    .groupby(['CodDpt3Car', 'CodDpt'])
    .agg({'CodCirLg': len})
    .reset_index()
    .set_index('CodDpt3Car')
    [['CodDpt']]
    .query('CodDpt.str.startswith("Z")')
    .to_dict()
    ['CodDpt']
)

mapping_dept

{'099': 'ZZ',
 '971': 'ZA',
 '972': 'ZB',
 '973': 'ZC',
 '974': 'ZD',
 '975': 'ZS',
 '976': 'ZM',
 '977': 'ZX',
 '986': 'ZW',
 '987': 'ZP',
 '988': 'ZN'}

In [3]:
# candidats_dgfr = (
#     pd
#     .read_csv('https://www.data.gouv.fr/fr/datasets/r/9efe7b76-8257-4db5-9e9f-37abb81ce65d')
#     .assign(
#         NumCirc = lambda df: df['Code circonscription'].str[-2:].str.pad(3, fillchar="0"),
#         Dept = lambda df: df['Code circonscription'].str[:-2].str.pad(2, fillchar="0").str.pad(2, fillchar="0"),
#         CodCirc = lambda df: df.Dept.replace(mapping_dept) + df.NumCirc
#     )
# )

# candidats_dgfr

## récupère la liste des circonscriptions

In [4]:
base = 'https://www.resultats-elections.interieur.gouv.fr/telechargements/LG2024'

In [5]:
territoire_xml = requests.get(f'{base}/territoires/territoires.xml').text #.encode('latin-1')

In [6]:
territoires_url = f'{base}/territoires/territoires.xml'

In [7]:
regions = (
    pd
    .read_xml(
        StringIO(territoire_xml),
        xpath = '//Region',
        dtype="object"
     )
)

regions

Unnamed: 0,CodReg,LibReg,Departements
0,0,Non renseigné,\n
1,1,Guadeloupe,\n
2,2,Martinique,\n
3,3,Guyane,\n
4,4,La Réunion,\n
5,6,Mayotte,\n
6,11,Île-de-France,\n
7,24,Centre-Val de Loire,\n
8,27,Bourgogne-Franche-Comté,\n
9,28,Normandie,\n


In [34]:
departements = (
        (
            pd
            .read_xml(
                StringIO(territoire_xml),
                xpath = f'.//Region//Departement',
                dtype="object"
            )
        )   
)

departements

Unnamed: 0,CodDpt,LibDpt,Circonscriptions
0,975,Saint-Pierre-et-Miquelon,\n
1,986,Wallis et Futuna,\n
2,987,Polynésie française,\n
3,988,Nouvelle-Calédonie,\n
4,ZX,Saint-Martin/Saint-Barthélemy,\n
...,...,...,...
102,13,Bouches-du-Rhône,\n
103,83,Var,\n
104,84,Vaucluse,\n
105,2A,Corse-du-Sud,\n


In [35]:
departements.to_csv('lg2024_departements.csv', index=False)

In [8]:
def flat_codes(CodReg):
    departements = (
        (
            pd
            .read_xml(
                StringIO(territoire_xml),
                xpath = f'.//Region[./CodReg = "{CodReg}"]//Departement',
                dtype="object"
            )
            .assign(
                CodReg = CodReg
            )
        )
    )

    
    circonscriptions = (
        pd.concat([
            pd
            .read_xml(
                StringIO(territoire_xml),
                xpath = f'.//Departement[./CodDpt = "{CodDpt}"]//Circonscription',
                dtype="object"
            )
            .assign(
                CodReg = CodReg,
                CodDpt = CodDpt,
            )
            for CodDpt in departements.CodDpt.to_list()
        ])
        .assign(
            CodDpt = lambda df: df.CodCirElec.str[:-2] #.replace(mapping_dept)
        )
        .set_index('CodCirElec')
        .drop(['Communes'], axis=1)
    )
    
    return circonscriptions

circonscriptions = pd.concat([ flat_codes(CodReg) for CodReg in regions.CodReg.to_list() ])

circonscriptions

Unnamed: 0_level_0,LibCirElec,NbSap,CodReg,CodDpt
CodCirElec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
97501,Saint-Pierre-et-Miquelon,1,00,975
98601,1ère circonscription,1,00,986
98701,1ere circonscription,1,00,987
98702,2ème circonscription,1,00,987
98703,3ème circonscription,1,00,987
...,...,...,...,...
8405,5ème circonscription,1,93,84
2A01,1ère circonscription,1,94,2A
2A02,2ème circonscription,1,94,2A
2B01,1ère circonscription,1,94,2B


In [9]:
# circonscriptions = (
#     pd
#     .read_xml(
#         territoire_xml,
#         xpath='.//Circonscription',
#         dtype="object"
#     )
#     .assign(
#         CodDpt = lambda df: df.CodCirElec.str[:-2].replace(mapping_dept)
#     )
#     .set_index('CodCirElec')
#     .drop(['Communes'], axis=1)
# )

# circonscriptions

In [10]:
circonscriptions.to_csv('lg2024_circonscriptions.csv')

## récupère la liste des candidat·e·s du premier tour

In [11]:
def get_candidats_t1(circonscription):
    dept = circonscription[:-2]
    circ = circonscription

    url = f'https://www.resultats-elections.interieur.gouv.fr/telechargements/LG2024/candidatsT1/{dept}/C1{dept}{circonscription}.xml'
        
    try:
        df = (
            pd
            .read_xml(
                url,
                xpath='.//Candidat'
            )
            .assign(CodCirc=circonscription)
        )
    except:
        print(f'error: {circonscription}')
        df = None
        
    
    return df
    
    
candidats_2024_t1 = pd.concat(
    [
        get_candidats_t1(circonscription)
        for circonscription in circonscriptions.index.to_list()
    ]
)

candidats_2024_t1

Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNuaCand,LibNuaCand,CodCirc
0,1,LENORMAND,Stéphane,M.,DVD,Divers droite,97501
1,2,BEAUMONT,Frédéric,M.,SOC,Parti socialiste,97501
2,3,CHAGNON,Patricia,Mme,RN,Rassemblement National,97501
3,4,LETOURNEL,Marion,Mme,FI,La France insoumise,97501
4,5,LEBAILLY,Patrick,M.,DVG,Divers gauche,97501
...,...,...,...,...,...,...,...
3,4,RONGIONE,Viviane,Mme,EXG,Extrême gauche,2B02
4,5,GIACOMI,Jean-Antoine,M.,REG,Régionaliste,2B02
5,6,JOUART,Sylvie,Mme,RN,Rassemblement National,2B02
6,7,CARLI,Antò,M.,REG,Régionaliste,2B02


In [12]:
candidats_2024_t1_2 = (
    candidats_2024_t1
    .assign(
        CodCirc2 = lambda df: df.CodCirc.str[:-2].replace(mapping_dept) + "0" + df.CodCirc.str[-2:]
    )
)

candidats_2024_t1_2

Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNuaCand,LibNuaCand,CodCirc,CodCirc2
0,1,LENORMAND,Stéphane,M.,DVD,Divers droite,97501,ZS001
1,2,BEAUMONT,Frédéric,M.,SOC,Parti socialiste,97501,ZS001
2,3,CHAGNON,Patricia,Mme,RN,Rassemblement National,97501,ZS001
3,4,LETOURNEL,Marion,Mme,FI,La France insoumise,97501,ZS001
4,5,LEBAILLY,Patrick,M.,DVG,Divers gauche,97501,ZS001
...,...,...,...,...,...,...,...,...
3,4,RONGIONE,Viviane,Mme,EXG,Extrême gauche,2B02,2B002
4,5,GIACOMI,Jean-Antoine,M.,REG,Régionaliste,2B02,2B002
5,6,JOUART,Sylvie,Mme,RN,Rassemblement National,2B02,2B002
6,7,CARLI,Antò,M.,REG,Régionaliste,2B02,2B002


In [13]:
candidats_2024_t1_2.to_csv('lg2024_t1_candidats.csv', index=False)

### transpose la liste des candidat·e·s dans un format *wide*

In [14]:
candidats_2024_t1_wide = (
    candidats_2024_t1_2
    .assign(
        Nom_Prenom = lambda df: df.NomPsn + " " +df.PrenomPsn
    )
    .pivot_table(
        index = 'CodCirc2',
        columns = 'CodNuaCand',
        values = 'Nom_Prenom',
        aggfunc= lambda x : ', '.join(x)
    )
    .replace(np.nan, '')
)

candidats_2024_t1_wide

CodNuaCand,COM,DIV,DSV,DVC,DVD,DVG,ECO,ENS,EXD,EXG,...,LR,RDG,REC,REG,RN,SOC,UDI,UG,UXD,VEC
CodCirc2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01001,,,"VINCENT Cyril, MENDES Michael",,,,,GUILLERMIN Vincent,,LAHY Éric,...,BRETON Xavier,,,,MAÎTRE Christophe,,,GUERAUD Sébastien,,
01002,,,,,,,,DAUBIÉ Romain,EYRAUD Olivier,GOUTAGNY Vincent,...,NANCHI Alexandre,,,,KOTARAC Andréa,,,MEYER Maxime,,
01003,,"TONIZZO Sofia, KOUASSI Fulgence",VEILLEROT Annick,,,,,GIVERNET Olga,,MAISONNETTE Cécile,...,UNAL Khadija,,,,DUBARRY Karine,,,JOLIE Christian,,
01004,,"BRESSON Yannick, NICAUD Jérémy",,,,,,COQUELET Christophe,,COUSSON Sylvain,...,BILLOUDET Guy,,,,BUISSON Jérôme,,,LIOTIER Charline,,
01005,,,,,ABAD Damien,,CHATELARD Thomas,DESCOURS Nathalie,,CROZET Sylvie,...,BOURDIN Fabrice,,PATRU Maria Cristina,,,,,PISANI Florence,CHAVENT Marc,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZZ007,,"GEFFRAY Fanny, CHAMBON Jérôme",,HUQUET Isabelle,,RICHARD Cécile,,PETIT Frédéric,,,...,MIER-GARRIGOU Dominique,,ALEXANDRE Julie,,NAVEYS--DUMAS Mathilde,,,RHARMAOUI-CLAQUIN Asma,,
ZZ008,,"SPITALAS Nicolas, BIZET David, SIGOURA Benjamin",,NEFFATI Gilles,HABABOU SOLOMON Philippe,CHARTRAIN Valérie,,YADAN Caroline,,,...,"HABIB Meyer, ASSOULINE Aurelie",,BENSOUSSAN Guillaume,,,,,LERER Yaël,,
ZZ009,,"OUDRHIRI Hassan, SIDIBÉ Gabriel Marie, DIANIFA...",FADILI Hachim,"TAHIRI Rachid, DUCELLIER Régina","SACKHO Kourtoum, DAVOUX Erwan Borhan","BOUDJEKADA Ismaël, TINAUGUS Edouard, KHALFI Se...",,DJOUADI Samira,,,...,BADREDDINE Jihad,,DREVON Pierre,,CHARRON Elodie,,,BEN CHEÏKH Karim,,
ZZ010,,"MOJON-CHEMINADE Odile, MARIE-LOUISE Hugues Mic...",,"MABASI Marie Josée, HOJEIJ Ali Camille",,MAZOT Nathalie,,LAKRAFI Amélia,,,...,LAMAH Lucas,,CASTELLAN Philippe,,,,,DI MEO Elsa,DE VERON Jean,


In [15]:
candidats_2024_t1_wide.to_csv('lg2024_t1_candidats.wide.csv')

## précalcule un tableau sur la présence des nuances dans chacune des circonscriptions

In [16]:
nuances2024 = (
    candidats_2024_t1_2
    .pivot_table(
        index = 'CodCirc2',
        columns = 'CodNuaCand',
        values = 'NomPsn',
        aggfunc= lambda x : len(x) >= 1
    )
    .fillna(False)
)

nuances2024

CodNuaCand,COM,DIV,DSV,DVC,DVD,DVG,ECO,ENS,EXD,EXG,...,LR,RDG,REC,REG,RN,SOC,UDI,UG,UXD,VEC
CodCirc2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01001,False,False,True,False,False,False,False,True,False,True,...,True,False,False,False,True,False,False,True,False,False
01002,False,False,False,False,False,False,False,True,True,True,...,True,False,False,False,True,False,False,True,False,False
01003,False,True,True,False,False,False,False,True,False,True,...,True,False,False,False,True,False,False,True,False,False
01004,False,True,False,False,False,False,False,True,False,True,...,True,False,False,False,True,False,False,True,False,False
01005,False,False,False,False,True,False,True,True,False,True,...,True,False,True,False,False,False,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZZ007,False,True,False,True,False,True,False,True,False,False,...,True,False,True,False,True,False,False,True,False,False
ZZ008,False,True,False,True,True,True,False,True,False,False,...,True,False,True,False,False,False,False,True,False,False
ZZ009,False,True,True,True,True,True,False,True,False,False,...,True,False,True,False,True,False,False,True,False,False
ZZ010,False,True,False,True,False,True,False,True,False,False,...,True,False,True,False,False,False,False,True,True,False


In [17]:
nuances2024.to_csv('lg2024_t1_nuances.csv')

## récupère la liste des résultats du premier tour

In [18]:
def get_resultats_t1(CodCirc, CodDept, CodeReg):
    url = f'{base}/resultatsT1/{CodDept}/R1{CodCirc}.xml'
    
    try:
        df = (
            pd
            .read_xml(
                url,
                xpath = './/Candidat',
                dtype = 'object'
            )
            .assign(
                CodCirc=CodCirc,
                CodDept=CodDept,
                CodeReg=CodeReg
            )
        )
        #print(url)
    except:
        #print(f'error: {circonscription}')
        df = None
        
    
    return df
    
    
resultats_t1 = pd.concat(
    [
        get_resultats_t1(c['CodCirElec'], c['CodDpt'], c['CodReg'])
        for idx, c in tqdm(list(circonscriptions.reset_index().iterrows()))
    ]
)
resultats_t1

  0%|          | 0/577 [00:00<?, ?it/s]

Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNuaCand,LibNuaCand,NbVoix,RapportExprimes,RapportInscrits,Elu,CodCirc,CodDept,CodeReg
0,1,LENORMAND,Stéphane,M.,DVD,Divers droite,1184,4309,2336,QUALIF T2,97501,975,00
1,2,BEAUMONT,Frédéric,M.,SOC,Parti socialiste,464,1689,915,QUALIF T2,97501,975,00
2,3,CHAGNON,Patricia,Mme,RN,Rassemblement National,291,1059,574,NON,97501,975,00
3,4,LETOURNEL,Marion,Mme,FI,La France insoumise,409,1488,807,NON,97501,975,00
4,5,LEBAILLY,Patrick,M.,DVG,Divers gauche,400,1456,789,NON,97501,975,00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,4,RONGIONE,Viviane,Mme,EXG,Extrême gauche,280,063,041,NON,2B02,2B,94
4,5,GIACOMI,Jean-Antoine,M.,REG,Régionaliste,0,000,000,NON,2B02,2B,94
5,6,JOUART,Sylvie,Mme,RN,Rassemblement National,11275,2542,1660,QUALIF T2,2B02,2B,94
6,7,CARLI,Antò,M.,REG,Régionaliste,2277,513,335,NON,2B02,2B,94


In [19]:
resultats_t1_2 = (
    resultats_t1
    .join(
        circonscriptions
        .reset_index()
        .assign(
            CodCirc2 = lambda df: (
                df.CodDpt.replace(mapping_dept)
                + df.CodCirElec.astype(str).str[-2:].str.pad(3, fillchar='0')
            )
        )
        .set_index('CodCirElec'), #[['CodCirc2']],
        on = 'CodCirc'
    )
)

resultats_t1_2

Unnamed: 0,NumPanneauCand,NomPsn,PrenomPsn,CivilitePsn,CodNuaCand,LibNuaCand,NbVoix,RapportExprimes,RapportInscrits,Elu,CodCirc,CodDept,CodeReg,LibCirElec,NbSap,CodReg,CodDpt,CodCirc2
0,1,LENORMAND,Stéphane,M.,DVD,Divers droite,1184,4309,2336,QUALIF T2,97501,975,00,Saint-Pierre-et-Miquelon,1,00,975,ZS001
1,2,BEAUMONT,Frédéric,M.,SOC,Parti socialiste,464,1689,915,QUALIF T2,97501,975,00,Saint-Pierre-et-Miquelon,1,00,975,ZS001
2,3,CHAGNON,Patricia,Mme,RN,Rassemblement National,291,1059,574,NON,97501,975,00,Saint-Pierre-et-Miquelon,1,00,975,ZS001
3,4,LETOURNEL,Marion,Mme,FI,La France insoumise,409,1488,807,NON,97501,975,00,Saint-Pierre-et-Miquelon,1,00,975,ZS001
4,5,LEBAILLY,Patrick,M.,DVG,Divers gauche,400,1456,789,NON,97501,975,00,Saint-Pierre-et-Miquelon,1,00,975,ZS001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,4,RONGIONE,Viviane,Mme,EXG,Extrême gauche,280,063,041,NON,2B02,2B,94,2ème circonscription,1,94,2B,2B002
4,5,GIACOMI,Jean-Antoine,M.,REG,Régionaliste,0,000,000,NON,2B02,2B,94,2ème circonscription,1,94,2B,2B002
5,6,JOUART,Sylvie,Mme,RN,Rassemblement National,11275,2542,1660,QUALIF T2,2B02,2B,94,2ème circonscription,1,94,2B,2B002
6,7,CARLI,Antò,M.,REG,Régionaliste,2277,513,335,NON,2B02,2B,94,2ème circonscription,1,94,2B,2B002


In [20]:
len(resultats_t1_2.CodCirc.unique())

577

In [21]:
circonscriptions.query('~CodCirElec.isin(@resultats_t1_2.CodCirc)')

Unnamed: 0_level_0,LibCirElec,NbSap,CodReg,CodDpt
CodCirElec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [22]:
resultats_t1_2.to_csv('lg2024_t1_resultats.csv', index=False)

In [26]:
def get_inscrits_t1(CodCirc, CodDept, CodeReg):
    url = f'{base}/resultatsT1/{CodDept}/R1{CodCirc}.xml'
    
    try:
        df = (
            pd
            .read_xml(
                url,
                xpath = './/Inscrits',
                dtype = 'object'
            )
            .assign(
                CodCirc=CodCirc,
                CodDept=CodDept,
                CodeReg=CodeReg
            )
        )
        #print(url)
    except:
        #print(f'error: {circonscription}')
        df = None
        
    
    return df
    
    
inscrits_t1 = pd.concat(
    [
        get_inscrits_t1(c['CodCirElec'], c['CodDpt'], c['CodReg'])
        for idx, c in tqdm(list(circonscriptions.reset_index().iterrows()))
    ]
)
inscrits_t1

  0%|          | 0/577 [00:00<?, ?it/s]

Unnamed: 0,Nombre,CodCirc,CodDept,CodeReg
0,5069,97501,975,00
0,9031,98601,986,00
0,73776,98701,987,00
0,69245,98702,987,00
0,69028,98703,987,00
...,...,...,...,...
0,84946,8405,84,93
0,52490,2A01,2A,94
0,62495,2A02,2A,94
0,62114,2B01,2B,94


In [31]:
inscrits_t1_2 = (
    inscrits_t1
    .join(
        circonscriptions
        .reset_index()
        .assign(
            CodCirc2 = lambda df: (
                df.CodDpt.replace(mapping_dept)
                + df.CodCirElec.astype(str).str[-2:].str.pad(3, fillchar='0')
            )
        )
        .set_index('CodCirElec')[['CodCirc2']],
        on = 'CodCirc'
    )
    .rename({
        'Nombre': 'Inscrits'
    }, axis=1)
    .set_index('CodCirc2')
    .sort_index()
)

inscrits_t1_2

Unnamed: 0_level_0,Inscrits,CodCirc,CodDept,CodeReg
CodCirc2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01001,86843,0101,01,84
01002,101874,0102,01,84
01003,84130,0103,01,84
01004,96119,0104,01,84
01005,77900,0105,01,84
...,...,...,...,...
ZZ007,130824,ZZ07,ZZ,00
ZZ008,148957,ZZ08,ZZ,00
ZZ009,130387,ZZ09,ZZ,00
ZZ010,113855,ZZ10,ZZ,00


In [32]:
inscrits_t1_2.to_csv('lg2024_t1_inscrits.csv')