This notebook formats results for 2008 and 2014 city council elections, as well as presidential and legislative 2017 elections, for the 6 main parties. We can then use these data to model election results in Paris at the district level ("arrondissement") - we'll do that in another notebook.

In [1]:
%load_ext lab_black
%load_ext watermark

import numpy as np
import pandas as pd

from pathlib import Path

In [41]:
basepath = Path("../../../Downloads/db_iris_all/activite_residents/")
files_in_path = basepath.glob("*.xls")

actifs = pd.DataFrame()
for file in files_in_path:
    df = pd.read_excel(
        file,
        header=5,
        sheet_name="IRIS",
        usecols=["DEP", "LIBCOM", f"C{file.stem[-2:]}_ACTOCC1564"],
        nrows=40_500,
    )
    df = df[df.DEP == "75"].reset_index(drop=True)
    actifs = pd.concat([actifs, df], axis=1)
actifs

Unnamed: 0,DEP,LIBCOM,C09_ACTOCC1564,DEP.1,LIBCOM.1,C08_ACTOCC1564,DEP.2,LIBCOM.2,C06_ACTOCC1564,DEP.3,...,C10_ACTOCC1564,DEP.4,LIBCOM.3,C14_ACTOCC1564,DEP.5,LIBCOM.4,C15_ACTOCC1564,DEP.6,LIBCOM.5,C16_ACTOCC1564
0,75,Paris 1er Arrondissement,512.231253,75,Paris 1er Arrondissement,492.067504,75,Paris,568.818813,75,...,475.765881,75,Paris 1er Arrondissement,492.410086,75,Paris 1er Arrondissement,492.721087,75,Paris 1er Arrondissement,470.779059
1,75,Paris 1er Arrondissement,93.447193,75,Paris 1er Arrondissement,121.552421,75,Paris,215.520153,75,...,79.686039,75,Paris 1er Arrondissement,43.660221,75,Paris 1er Arrondissement,43.098351,75,Paris 1er Arrondissement,61.093670
2,75,Paris 1er Arrondissement,244.062618,75,Paris 1er Arrondissement,217.473616,75,Paris,182.793737,75,...,217.851776,75,Paris 1er Arrondissement,118.832762,75,Paris 1er Arrondissement,119.288375,75,Paris 1er Arrondissement,94.173716
3,75,Paris 1er Arrondissement,9.519256,75,Paris 1er Arrondissement,0.999984,75,Paris,1.000000,75,...,9.393106,75,Paris 1er Arrondissement,0.000000,75,Paris 1er Arrondissement,0.000000,75,Paris 1er Arrondissement,0.000000
4,75,Paris 1er Arrondissement,0.000000,75,Paris 1er Arrondissement,0.999984,75,Paris,1.000000,75,...,0.000000,75,Paris 1er Arrondissement,0.000000,75,Paris 1er Arrondissement,0.000000,75,Paris 1er Arrondissement,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
987,75,Paris 20e Arrondissement,679.219728,75,Paris 20e Arrondissement,692.119966,75,Paris,590.729538,75,...,671.118071,75,Paris 20e Arrondissement,582.387194,75,Paris 20e Arrondissement,567.237004,75,Paris 20e Arrondissement,631.768882
988,75,Paris 20e Arrondissement,1105.467996,75,Paris 20e Arrondissement,1077.351930,75,Paris,1024.827900,75,...,1085.717660,75,Paris 20e Arrondissement,1002.405294,75,Paris 20e Arrondissement,1004.222564,75,Paris 20e Arrondissement,926.924019
989,75,Paris 20e Arrondissement,1472.805741,75,Paris 20e Arrondissement,1457.509164,75,Paris,1452.365613,75,...,1297.373335,75,Paris 20e Arrondissement,1299.170304,75,Paris 20e Arrondissement,1397.191531,75,Paris 20e Arrondissement,1391.058764
990,75,Paris 20e Arrondissement,1197.804270,75,Paris 20e Arrondissement,1229.714239,75,Paris,1245.690660,75,...,1254.314004,75,Paris 20e Arrondissement,1384.132182,75,Paris 20e Arrondissement,1408.585263,75,Paris 20e Arrondissement,1387.621686


In [43]:
actifs = pd.read_excel(
    "../../../Downloads/db_iris_all/activite_residents/base-ic-activite-residents-2016.xls",
    header=5,
    sheet_name="IRIS",
    usecols=["DEP", "LIBCOM", f"C{16}_ACTOCC1564"],
    skipfooter=10_600,
)
actifs = actifs[actifs.DEP == "75"].reset_index(drop=True)

In [44]:
actifs

Unnamed: 0,DEP,LIBCOM,C16_ACTOCC1564
0,75,Paris 1er Arrondissement,470.779059
1,75,Paris 1er Arrondissement,61.093670
2,75,Paris 1er Arrondissement,94.173716
3,75,Paris 1er Arrondissement,0.000000
4,75,Paris 1er Arrondissement,0.000000
...,...,...,...
987,75,Paris 20e Arrondissement,631.768882
988,75,Paris 20e Arrondissement,926.924019
989,75,Paris 20e Arrondissement,1391.058764
990,75,Paris 20e Arrondissement,1387.621686


In [20]:
# extract district number:
actifs["LIBCOM"] = actifs.LIBCOM.str.extract("(\d+)").astype(int)
actifs = actifs.rename(columns={"LIBCOM": "arrondissement"})
actifs.groupby("arrondissement").sum()

Unnamed: 0_level_0,C16_ACTOCC1564
arrondissement,Unnamed: 1_level_1
1,8061.414072
2,11953.850833
3,18880.704683
4,13794.282329
5,26531.098045
6,16819.451985
7,23397.139726
8,17298.062602
9,32442.70487
10,48166.862564


In [22]:
actifs.groupby("arrondissement").sum().info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20 entries, 1 to 20
Data columns (total 1 columns):
C16_ACTOCC1564    20 non-null float64
dtypes: float64(1)
memory usage: 320.0 bytes


In [17]:
%watermark -a AlexAndorra -n -u -v -iv

numpy  1.17.3
pandas 0.25.2
AlexAndorra 
last updated: Mon Nov 18 2019 

CPython 3.7.5
IPython 7.9.0
