In [35]:
import pandas as pd
import altair as alt
alt.data_transformers.enable('json') # Let Altair/Vega-Lite work with large data sets
import ipywidgets as widgets
from IPython.display import display, clear_output

DataTransformerRegistry.enable('json')

In [67]:
names = pd.read_csv("dpt2020.csv", sep=";")
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt == 'XX'].index, inplace=True)

names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
2955986,2,LYDIE,1973,74,12
2005516,2,BÉATRICE,1982,45,3
3012805,2,MANON,2012,72,49
74043,1,ALEXANDRE,1921,972,20
1615059,1,TRISTAN,1990,64,7


In [37]:
name_counts = names.groupby(['preusuel', 'sexe']).size().reset_index(name='count')

name_pivot = name_counts.pivot(index='preusuel', columns='sexe', values='count').fillna(0)

# identify names that appear for both sexes
common_names = name_pivot[(name_pivot[1] > 0) & (name_pivot[2] > 0)].reset_index()

In [38]:
common_names

sexe,preusuel,1,2
0,ABDON,99.0,11.0
1,ABEL,2979.0,17.0
2,ACHILLE,1564.0,20.0
3,ADAM,2357.0,1.0
4,ADAMA,273.0,112.0
...,...,...,...
863,ZACHARIE,580.0,21.0
864,ZAYANE,1.0,2.0
865,ZOÉ,32.0,2876.0
866,ÉDEN,170.0,25.0


In [39]:
names_sex1 = names[names['sexe'] == 1]

In [41]:
names_sex2 = names[names['sexe'] == 2]

In [70]:
sexe_mapping = {1: 'Male', 2: 'Female'}

In [91]:
# Create a dropdown widget
unique_names = names_sex1['preusuel'].unique().tolist()
dropdown = widgets.Dropdown(options=unique_names, description='Name:')

# Define the function to update the chart
def update_chart(name):
    df_edith1 = names_sex1[names_sex1['preusuel'] == name].copy()
    
    df_edith1.loc[:, 'annais'] = pd.to_numeric(df_edith1['annais'], errors='coerce')
    year_bins = [1900, 1905, 1910,1915, 1920,1925, 1930,1935, 1940,1945, 1950,1955, 1960,1965, 1970,1975, 1980,1985, 1990,1995, 2000,2005, 2010,2015, 2020]
    bin_labels = [f'{year_bins[i]}-{year_bins[i+1]}' for i in range(len(year_bins)-1)]
    df_edith1.loc[:, 'year_bin'] = pd.cut(df_edith1['annais'], bins=year_bins, labels=bin_labels, right=False)
    df_edith1['sexe'] = df_edith1['sexe'].map(sexe_mapping)
    df_aggregated1 = df_edith1.groupby(['year_bin', 'sexe'],  observed=False).agg({'nombre': 'sum'}).reset_index()
    
    
    bar1 = alt.Chart(df_aggregated1).mark_bar(opacity=0.8, color='blue').encode(
        x=alt.X('year_bin:O', title='Year Bin'),
        y=alt.Y('nombre:Q', title='Number of People'),
        tooltip=['year_bin', 'nombre', 'sexe']
    )
    
    
    df_edith2 = names_sex2[names_sex2['preusuel'] == name].copy()
    
    df_edith2.loc[:, 'annais'] = pd.to_numeric(df_edith2['annais'], errors='coerce')
    year_bins = [1900, 1905, 1910,1915, 1920,1925, 1930,1935, 1940,1945, 1950,1955, 1960,1965, 1970,1975, 1980,1985, 1990,1995, 2000,2005, 2010,2015, 2020]
    bin_labels = [f'{year_bins[i]}-{year_bins[i+1]}' for i in range(len(year_bins)-1)]
    df_edith2.loc[:, 'year_bin'] = pd.cut(df_edith2['annais'], bins=year_bins, labels=bin_labels, right=False)
    df_edith1['sexe'] = df_edith1['sexe'].map(sexe_mapping)
    df_aggregated2 = df_edith2.groupby(['year_bin', 'sexe'],  observed=False).agg({'nombre': 'sum'}).reset_index()
    
    bar2 = alt.Chart(df_aggregated2).mark_bar(opacity=0.75, color='red').encode(
        x=alt.X('year_bin:O', title='Year Bin'),
        y=alt.Y('nombre:Q', title='Number of People'),
        tooltip=['year_bin', 'nombre', 'sexe']
    )
 
    # Create a horizontal rule to represent the average number of males
    avg_line_male = alt.Chart(df_edith1).mark_rule(color='lightblue').encode(
        y='average(nombre):Q',
        tooltip=alt.Tooltip('average:Q', title='Average Number of Males')
    )
    
    avg_line_female = alt.Chart(df_edith2).mark_rule(color='lightred').encode(
    y='average(nombre):Q',
    tooltip=alt.Tooltip('average(nombre):Q', title='Average Number of Females')
    )
    
    # Layer the bar charts and the horizontal line
    layered_chart = alt.layer(bar1, bar2, avg_line_male, avg_line_female).resolve_scale(
        y='shared'
    ).properties(
        title=f"Layered Bar Chart of the number of people with name '{name}' in both genders through time"
    )
    
    # Clear the output area before displaying the new chart
    with output:
        clear_output(wait=True)
        display(layered_chart)

# Create an output widget to display the chart
output = widgets.Output()

# Display the dropdown and set up the observer
dropdown.observe(lambda change: update_chart(change['new']), names='value')
display(dropdown, output)
    
    
# Initial display
update_chart(unique_names[0])

Dropdown(description='Name:', options=('AADIL', 'AAHIL', 'AARON', 'AARONN', 'AARUSH', 'AAYAN', 'AB', 'AB-DEL',…

Output()