In [1]:
import pandas as pd

In [25]:
data = pd.read_json('./election.json')['data']
df = pd.DataFrame(data.tolist())

# Name the columns
df.columns = ["departement", "commune", "lieu_de_vote", "numero_bureau", "nombre_inscrits"]

In [26]:
df.describe()

Unnamed: 0,departement,commune,lieu_de_vote,numero_bureau,nombre_inscrits
count,15633,15633,15633,15633,15633
unique,46,553,6341,95,580
top,DAKAR,TOUBA MOSQUEE,UNIVERSITE TOUBA DAROU KHOUDOSS,1,600
freq,1275,610,95,6681,323


In [27]:
# Convert nombre_inscrits to numeric
df['nombre_inscrits'] = pd.to_numeric(df['nombre_inscrits'], errors='coerce')

In [28]:
regions = {
  "DAKAR": ["DAKAR", "GUEDIAWAYE", "KEUR MASSAR", "PIKINE", "RUFISQUE"],
  "DIOURBEL": ["BAMBEY", "DIOURBEL", "MBACKE"],
  "FATICK": ["FATICK", "FOUNDIOUGNE", "GOSSAS"],
  "KAFFRINE": ["BIRKILANE", "KAFFRINE", "KOUNGHEUL", "MALEM HODAR"],
  "KAOLACK": ["GUINGUINEO", "KAOLACK", "NIORO DU RIP"],
  "KEDOUGOU": ["KEDOUGOU", "SALEMATA", "SARAYA"],
  "KOLDA": ["KOLDA", "MEDINA YORO FOULAH", "VELINGARA"],
  "LOUGA": ["KEBEMER", "LINGUERE", "LOUGA"],
  "MATAM": ["KANEL", "MATAM", "RANEROU FERLO"],
  "SAINT LOUIS": ["DAGANA", "PODOR", "SAINT LOUIS"],
  "SEDHIOU": ["BOUNKILING", "GOUDOMP", "SEDHIOU"],
  "TAMBACOUNDA": ["BAKEL", "GOUDIRY", "KOUMPENTOUM", "TAMBACOUNDA"],
  "THIES": ["MBOUR", "THIES", "TIVAOUANE"],
  "ZIGUINCHOR": ["BIGNONA", "OUSSOUYE", "ZIGUINCHOR"]
}



In [29]:
# Aggregating by departement
departement_agg = df.groupby('departement')['nombre_inscrits'].sum().reset_index()
departement_agg["region"] = departement_agg["departement"].apply(lambda x: [k for k, v in regions.items() if x in v][0])
region_agg = departement_agg.groupby('region')['nombre_inscrits'].sum().reset_index()

In [30]:

region_agg.rename(columns={"region": "locality_name"}, inplace=True)
region_agg['locality_type'] = 'region'
region_agg["locality_parent_name"] = "Senegal"
region_agg["locality_parent_type"] = "country"

# reorder columns
region_agg = region_agg[["locality_name", "locality_type", "locality_parent_name", "locality_parent_type", "nombre_inscrits"]]
print(region_agg)

   locality_name locality_type locality_parent_name locality_parent_type  \
0          DAKAR        region              Senegal              country   
1       DIOURBEL        region              Senegal              country   
2         FATICK        region              Senegal              country   
3       KAFFRINE        region              Senegal              country   
4        KAOLACK        region              Senegal              country   
5       KEDOUGOU        region              Senegal              country   
6          KOLDA        region              Senegal              country   
7          LOUGA        region              Senegal              country   
8          MATAM        region              Senegal              country   
9    SAINT LOUIS        region              Senegal              country   
10       SEDHIOU        region              Senegal              country   
11   TAMBACOUNDA        region              Senegal              country   
12         T

In [31]:
departement_agg.rename(columns={"departement": "locality_name", "region":"locality_parent_name"}, inplace=True)
departement_agg['locality_type'] = 'departement'
departement_agg["locality_parent_type"] = "region"

# reorder columns
departement_agg = departement_agg[["locality_name", "locality_type", "locality_parent_name", "locality_parent_type", "nombre_inscrits"]]
print(departement_agg)

         locality_name locality_type locality_parent_name  \
0                BAKEL   departement          TAMBACOUNDA   
1               BAMBEY   departement             DIOURBEL   
2              BIGNONA   departement           ZIGUINCHOR   
3            BIRKILANE   departement             KAFFRINE   
4           BOUNKILING   departement              SEDHIOU   
5               DAGANA   departement          SAINT LOUIS   
6                DAKAR   departement                DAKAR   
7             DIOURBEL   departement             DIOURBEL   
8               FATICK   departement               FATICK   
9          FOUNDIOUGNE   departement               FATICK   
10              GOSSAS   departement               FATICK   
11             GOUDIRY   departement          TAMBACOUNDA   
12             GOUDOMP   departement              SEDHIOU   
13          GUEDIAWAYE   departement                DAKAR   
14          GUINGUINEO   departement              KAOLACK   
15            KAFFRINE  

In [32]:
# Aggregating by commune
commune_agg = df.groupby(['departement', 'commune'])['nombre_inscrits'].sum().reset_index()
commune_agg.rename(columns={'departement': 'locality_parent_name', 'commune': 'locality_name'}, inplace=True)
commune_agg["locality_type"] = "commune"    
commune_agg["locality_parent_type"] = "departement"

# reorder columns
commune_agg = commune_agg[['locality_name', 'locality_type', 'locality_parent_name', 'locality_parent_type', 'nombre_inscrits']]

print(commune_agg)

            locality_name locality_type locality_parent_name  \
0                   BAKEL       commune                BAKEL   
1                  BALLOU       commune                BAKEL   
2                    BELE       commune                BAKEL   
3                 DIAWARA       commune                BAKEL   
4                   GABOU       commune                BAKEL   
..                    ...           ...                  ...   
548  BOUTOUPA CAMARACOUND       commune           ZIGUINCHOR   
549               ENAMPOR       commune           ZIGUINCHOR   
550               NIAGUIS       commune           ZIGUINCHOR   
551               NIASSIA       commune           ZIGUINCHOR   
552            ZIGUINCHOR       commune           ZIGUINCHOR   

    locality_parent_type  nombre_inscrits  
0            departement             9624  
1            departement             9846  
2            departement             7641  
3            departement             4802  
4          

In [33]:
# Aggregating by lieu_de_vote
lieu_de_vote_agg = df.groupby(['commune', 'lieu_de_vote'])['nombre_inscrits'].sum().reset_index()
lieu_de_vote_agg.rename(columns={'commune': 'locality_parent_name', 'lieu_de_vote': 'locality_name'}, inplace=True)
lieu_de_vote_agg["locality_type"] = "lieu_de_vote"
lieu_de_vote_agg["locality_parent_type"] = "commune"

# reorder columns
lieu_de_vote_agg = lieu_de_vote_agg[['locality_name', 'locality_type', 'locality_parent_name', 'locality_parent_type', 'nombre_inscrits']]

print(lieu_de_vote_agg)

                          locality_name locality_type locality_parent_name  \
0                              BAGHAGHA  lieu_de_vote               ADEANE   
1                               DIAGNON  lieu_de_vote               ADEANE   
2            ECOLE ELEMENTAIRE D ADEANE  lieu_de_vote               ADEANE   
3     ECOLE ELEMENTAIRE DE AGNACK PETIT  lieu_de_vote               ADEANE   
4           ECOLE ELEMENTAIRE DE GONOUM  lieu_de_vote               ADEANE   
...                                 ...           ...                  ...   
6676                  ECOLE MATAR DIEME  lieu_de_vote           ZIGUINCHOR   
6677   ECOLE MISSION CATHOLIQUE DE NEMA  lieu_de_vote           ZIGUINCHOR   
6678                 ECOLE MOUNORY MANE  lieu_de_vote           ZIGUINCHOR   
6679                  ECOLE SEYDOU KANE  lieu_de_vote           ZIGUINCHOR   
6680                     EFI ZIGUINCHOR  lieu_de_vote           ZIGUINCHOR   

     locality_parent_type  nombre_inscrits  
0                 

In [34]:
total_inscrits = df['nombre_inscrits'].sum()

print("Total nombre_inscrits:", total_inscrits)


Total nombre_inscrits: 7033854


In [35]:
dfs = [region_agg, departement_agg, commune_agg, lieu_de_vote_agg]

In [36]:
merged_df = pd.concat(dfs, ignore_index=True)
merged_df.to_csv('election_agg.csv', index=False)