In [None]:
import pandas as pd
import altair as alt

In [None]:
df = pd.read_csv("annuaire_genre.csv", sep=";")

In [None]:
df.head()

In [None]:
def traitementHierarchie (chaine) :
    chaine = chaine.split('>')
    chaine = [e.lstrip().rstrip() for e in chaine]
    chaine = [e for e in chaine if e != '']
    chaine = list(set(chaine))

    return chaine, len(chaine)

df['chaine'], df['profondeur'] = tuple(zip(*df.apply(lambda row : traitementHierarchie(row['hierarchie']), axis = 1)))

df[['genre', 'prenom', 'chaine', 'profondeur']].head()

In [None]:
df[['genre', 'prenom', 'chaine', 'profondeur']].shape

In [None]:
filename = "data.json"
df[['genre', 'prenom', 'chaine', 'profondeur']].to_json(filename, orient='records')

In [None]:
base = alt.Chart(filename)

x_scale = alt.Scale(domain=[0, 2000])
y_scale = alt.Scale(domain=[1,6])
color_scale = alt.Scale(domain=["F", "M"], range=[" #FF851B","#39CCCC "])

left = base.transform_filter(
    alt.datum.genre == "F"
).encode(
    y = alt.Y("profondeur:O", axis=None),
    x = alt.X('count():Q',
              title="population",
              sort=alt.SortOrder('descending'),
              scale=x_scale
             ),
    color = alt.Color('genre:N', scale=color_scale, legend=None)
).mark_bar().properties(title="Femmes")

middle = base.encode(
    y = alt.Y('profondeur:O', axis=None),
    text = alt.Text('profondeur:N')
).mark_text()

right = base.transform_filter(
    alt.datum.genre == "M"
).encode(
    y = alt.Y("profondeur:O", axis=None),
    x = alt.X('count():Q', title="population", scale=x_scale),
    color = alt.Color('genre:N', scale=color_scale, legend=None)
).mark_bar().properties(title="Homme")

left | middle | right

In [None]:
def compute_ratio(df, col):
    ratio = df.groupby(['genre', col]).size().reset_index()
    ratio.columns = ['genre', col, 'nombre']
    
    ratio['proportion'] = ratio.apply(lambda r: float(r['nombre']) / float(ratio[ ratio[col] == r[col] ]['nombre'].sum()), axis=1)

    return ratio

compute_ratio(df[ df["rang"] == 0 ], 'profondeur')

In [None]:
def pyramide(df, col, domain=[0,1]):
    
    base = alt.Chart(compute_ratio(df, col))

    x_scale = alt.Scale(domain=domain)
    color_scale = alt.Scale(domain=["F", "M"], range=[" #FF851B","#39CCCC "])

    left = base.transform_filter(
        alt.datum.genre == "F"
    ).encode(
        y = alt.Y("{}:O".format(col), axis=None),
        x = alt.X('proportion:Q',
                  title="",
                  sort=alt.SortOrder('descending'),
                  scale=x_scale,
                  axis=alt.Axis(format='.0%')
                 ),
        color = alt.Color('genre:N', scale=color_scale, legend=None)
    ).mark_bar(clip=True).properties(title={"text": "👩", "anchor": "end"})

    middle = base.encode(
        y = alt.Y("{}:O".format(col), axis=None, title="Niveau hiérarchique du service"),
        text = alt.Text('{}:N'.format(col)),
    ).mark_text().properties(width=10)
    #.properties(title="Niveau hiérarchique du service")

    right = base.transform_filter(
        alt.datum.genre == "M"
    ).encode(
        y = alt.Y("{}:O".format(col), axis=None),
        x = alt.X('proportion:Q', title="", scale=x_scale, axis=alt.Axis(format='.0%')),
        color = alt.Color('genre:N', scale=color_scale, legend=None)
    ).mark_bar(clip=True).properties(title={"text": "👨", "anchor": "start"})

    return alt.hconcat(
        left, middle, right
    ).properties(
        background="#ffffff",
        title={"text": "Répartition par genre du chef / de la cheffe de service", "anchor": "middle"},
        padding=20
    ).configure_title(
        fontSize=20,
    ).configure_axisY(
        titleAngle=0,
        titleAlign="right"
    )

pyramide(df[ (df["rang"] == 0) & (df['profondeur'] <  6 ) ], "profondeur")