In [1]:
import pandas as pd
import altair as alt
import numpy as np
import random

In [2]:
file = "dpt2020.csv"

df = pd.read_csv(file, sep=';')
df_clean = df.loc[df['annais']!='XXXX', :]
df_clean = df_clean.loc[df_clean['preusuel']!='_PRENOMS_RARES']
df_Q1 = df_clean.groupby(['annais', 'preusuel', 'sexe']).agg({'nombre':'sum'}).reset_index()
df_Q1 = df_Q1.sort_values(['preusuel', 'annais', 'sexe'], ascending=[True,True,True])
df_Q1['shift'] = df_Q1.groupby(['preusuel', 'sexe'])['nombre'].shift()
df_Q1['diff'] = (df_Q1['nombre'] - df_Q1['shift']) / df_Q1['shift']
df_Q1['sexe'] = df_Q1["sexe"].map({1:"M",2:"F"})
df_Q1['new_name'] = df_Q1["preusuel"] + " - " + df_Q1["sexe"]
df_Q1

Unnamed: 0,annais,preusuel,sexe,nombre,shift,diff,new_name
126584,1983,AADIL,M,3,,,AADIL - M
148799,1992,AADIL,M,3,3.0,0.000000,AADIL - M
235138,2016,AAHIL,M,3,,,AAHIL - M
239659,2017,AALIYA,F,3,,,AALIYA - F
174558,2001,AALIYAH,F,9,,,AALIYAH - F
...,...,...,...,...,...,...,...
239658,2016,ÖMER,M,18,10.0,0.800000,ÖMER - M
244144,2017,ÖMER,M,30,18.0,0.666667,ÖMER - M
248614,2018,ÖMER,M,31,30.0,0.033333,ÖMER - M
253023,2019,ÖMER,M,37,31.0,0.193548,ÖMER - M


In [3]:
# names = random.sample(list(df_Q1['preusuel'].unique()), k=10)
names = ['Guillaume - M', 'Bastien - M', 'Clément - M', 'Julien - M', 'Lucas - M']
names = [name.upper() for name in names]


df_chart_1 = df_Q1.loc[df_Q1['new_name'].isin(names), :]

selection_1 = alt.selection_multi(fields=['new_name'])
input_dropdown_2 = alt.binding_select(options=names, name='new_name')
selection_2 = alt.selection_single(fields=['new_name'], bind=input_dropdown_2)
color_1 = alt.condition(selection_1,
                      alt.Color('new_name:N', legend=None),
                      alt.value('lightgray'))
annee_selection = alt.selection_interval(encodings=['x'])



chart_1 = alt.Chart(df_chart_1, title='Évolution temporelle par prénom').mark_line().encode(
    x=alt.X('annais:T', axis=alt.Axis(title='Année')), 
    y=alt.Y('nombre:Q', axis=alt.Axis(title='Nombre de naissances')),
    color=color_1).properties(width=700
                             ).add_selection(annee_selection
               )

legend_1 = alt.Chart(df_chart_1).mark_point().encode(
    y=alt.Y('new_name:N', axis=alt.Axis(orient='right', title='Prénom - sexe')),
    color=color_1
).add_selection(
    selection_1
)


chart_2 = alt.Chart(df_chart_1, title="Évolution temporelle de la variation relative d'un prénom sélectionné dans la légende"
                   ).mark_bar().encode(
    x=alt.X("annais:T", axis=alt.Axis(title='Année')),
    y=alt.Y("diff:Q", axis=alt.Axis(title="Variation relative entre l'année N et (N-1)")),
    color=alt.condition(
        alt.datum.diff > 0,
        alt.value("steelblue"), 
        alt.value("orange") 
    )
).properties(width=500).add_selection(
    selection_1
).transform_filter(
    selection_1
).add_selection(
    annee_selection
)


chart_3 = alt.Chart(df_chart_1, title='Total par prénom').mark_bar().encode(
    x=alt.X('new_name:N', axis=alt.Axis(title='Prénom - sexe', labelAngle=-45)),
    y=alt.Y('sum(nombre):Q', axis=alt.Axis(title='Total des naissances'), stack='zero'),
    color=color_1
).transform_filter(
    annee_selection | selection_1
)


(chart_1 |legend_1) & (chart_2 | (chart_3)) 