In [50]:
import altair as alt
import pandas as pd
import geopandas as gpd 
alt.data_transformers.enable('json') 
import ipywidgets as widgets
from IPython.display import HTML,display, clear_output

pass

In [51]:
df = pd.read_csv("data/dpt2020.csv", sep=";")
df['annais'] = pd.to_numeric(df['annais'], errors='coerce')
df = df.dropna(subset=['annais'])
df['annais'] = df['annais'].astype(int)

In [52]:

# Group by name, sex and year and sum 'nombre' across all departement
grouped = df.groupby(['preusuel', 'sexe', 'annais'])['nombre'].sum().reset_index()

# we get a table with only one line for each name and sex with the number of births summed

In [53]:
grouped.head(5)

Unnamed: 0,preusuel,sexe,annais,nombre
0,AADIL,1,1983,3
1,AADIL,1,1992,3
2,AAHIL,1,2016,3
3,AALIYA,2,2017,3
4,AALIYAH,2,2001,9


In [54]:
# to find unisex names, i group by name and count number of sexes per name (either 1, 2 or both)
# name_sex_count = grouped.groupby('preusuel')['sexe'].nunique().reset_index()
name_sex_count = grouped.groupby(['preusuel', 'annais'])['sexe'].nunique().reset_index()

name_sex_count


Unnamed: 0,preusuel,annais,sexe
0,AADIL,1983,1
1,AADIL,1992,1
2,AAHIL,2016,1
3,AALIYA,2017,1
4,AALIYAH,2001,1
...,...,...,...
249242,ÖMER,2016,1
249243,ÖMER,2017,1
249244,ÖMER,2018,1
249245,ÖMER,2019,1


In [55]:
# keep the names where sexes is 2 (both men and women name)
# unisex_names = name_sex_count[name_sex_count['sexe'] == 2]['preusuel']
unisex_name = name_sex_count[name_sex_count['sexe'] == 2]


In [56]:
# Filter only unisex names
df_unisex = grouped.merge(unisex_name[['preusuel', 'annais']], on=['preusuel', 'annais'])


In [57]:

df_plot = df_unisex.pivot(index=['preusuel', 'annais'], columns='sexe', values='nombre').fillna(0).reset_index()
df_plot.columns = ['preusuel', 'annais', 'men', 'women']
df_plot['total'] = df_plot['men'] + df_plot['women']


In [58]:

# melt to get right formatting

df_plot = df_plot.melt(id_vars=['preusuel', 'annais', 'total'],
                       value_vars=['men', 'women'],
                       var_name='sex', value_name='count')

df_plot.head(5)

Unnamed: 0,preusuel,annais,total,sex,count
0,ABDON,1918,8,men,5
1,ABDON,1922,19,men,15
2,ABDON,1927,20,men,11
3,ABDON,1935,14,men,11
4,ABDON,1937,6,men,3


In [59]:
# Compute % for each name/sex/year
df_plot['pct'] = df_plot['count'] / df_plot['total']
df_plot

Unnamed: 0,preusuel,annais,total,sex,count,pct
0,ABDON,1918,8,men,5,0.625000
1,ABDON,1922,19,men,15,0.789474
2,ABDON,1927,20,men,11,0.550000
3,ABDON,1935,14,men,11,0.785714
4,ABDON,1937,6,men,3,0.500000
...,...,...,...,...,...,...
16677,ÉDEN,2020,228,women,13,0.057018
16678,ÉLIE,2015,73,women,4,0.054795
16679,ÉLIE,2017,100,women,3,0.030000
16680,ÉLIE,2019,107,women,6,0.056075


In [60]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [61]:
# !pip install ipywidgets
# !jupyter nbextension enable --py widgetsnbextension

In [62]:
# Pivot per name/year to get counts per sex
pivot_year = grouped.pivot(index=['preusuel', 'annais'], columns='sexe', values='nombre').fillna(0)
pivot_year.columns = ['men', 'women']  #sexe 1=men, 2=women

# Compute %
pivot_year['pct_male'] = pivot_year['men'] / (pivot_year['men'] + pivot_year['women'])

df_pct = pivot_year.reset_index()


In [63]:


df_plot = df_plot[df_plot['preusuel'] != '_PRENOMS_RARES']

names = sorted(df_pct['preusuel'].unique())
selected_names = []

name_input = widgets.Combobox(
    placeholder='Choose a name',
    options=names,
    description='Name:',
    ensure_option=True
)

output_tags = widgets.Output()
output_chart = widgets.Output()

display(HTML("""
<style>
.widget-button.x-button {
    border-radius: 15px !important;
    background-color: lightgray !important;
    color: black;
    min-width: 20px;
    min-height: 20px;
    padding: 0;
    border: none;
}

.widget-button.x-button:hover {
    background-color: darkgray !important;
    color: white;
}
</style>
"""))

def update_tag_display():
    with output_tags:
        clear_output()
        tag_widgets = []
        for name in selected_names:
            remove_button = widgets.Button(description='x', layout=widgets.Layout(width='30px'))
            remove_button.add_class("x-button")


            label = widgets.Label(name)
            box = widgets.HBox([label, remove_button], layout=widgets.Layout(margin='0 5px 0 0'))
            remove_button.on_click(lambda b, n=name: remove_name(n))
            tag_widgets.append(box)
        display(widgets.HBox(tag_widgets, layout=widgets.Layout(flex_wrap='wrap')))


def remove_name(name):
    if name in selected_names:
        selected_names.remove(name)
        update_tag_display()
        update_chart()

def on_name_selected(change):
    name = change['new']
    if name and name not in selected_names:
        selected_names.append(name)
        name_input.value = ''
        update_tag_display()
        update_chart()

name_input.observe(on_name_selected, names='value')

click_name = alt.selection_point(
    name="click_name",
    fields=["preusuel"],
    on="click",
    # value="CAMILLE",
)

year_slider = alt.binding_range(
    name='Year:', 
    min=int(df_plot['annais'].min()), max=int(df_plot['annais'].max()), step=1)


year_select = alt.selection_point(
    name="Select",
    fields=['annais'],
    bind=year_slider,
    value=2000
)


click = alt.selection_point(
    fields=['preusuel', 'annais'],
    on='click',
    empty=True,
    clear='dblclick'
)


chart_left = alt.Chart(df_plot).transform_filter(
    year_select
).transform_window(
    rank='rank()',
    sort=[alt.SortField('total', order='descending')],
    groupby=['annais']
).transform_filter(
    (alt.datum.rank <= 40) # it will output the top n since each name has 2 lines (men and women) so n *2
).mark_bar().encode(
    x=alt.X('pct:Q', stack='normalize', axis=alt.Axis(format='%'), title='Percentage'),
    y=alt.Y('preusuel:N', sort='-x'),
    color=alt.Color('sex:N', scale=alt.Scale(domain=['men', 'women'], range=['purple', 'red'])),
    tooltip=['preusuel', 'sex', alt.Tooltip('pct:Q', format='.2%'), 'total'],
    opacity=alt.condition(click_name, alt.value(1), alt.value(0.4))  # Highlight selected bar
).add_params(
    year_select,
    click_name
).properties(
    width=300,
    height=400,
    title='Top 20 unisex names for selected year'
)



years_to_show = list(range(1900, 2021, 5))




click = alt.selection_point(
    fields=['preusuel', 'annais'],
    on='click',
    empty=False
)

clicked_name_state = 'None'

def get_click_name_selection(clicked_name_state=None):
    return alt.selection_point(
        name="click_name",
        fields=["preusuel"],
        on="click",
        toggle=True,  # allows multi-selection
        clear='dblclick',
        value=([{"preusuel": clicked_name_state}] if clicked_name_state else [{"preusuel": "CAMILLE"}])
    )
    
def create_right_charts():
    if not selected_names and not click_name:
        return alt.Chart().mark_text(text='No names selected.').properties(width=600, height=400)

    base = alt.Chart(df_pct).encode(
    x=alt.X(
        'annais:Q',  
        title='Year',
        axis=alt.Axis(tickMinStep=5, values=years_to_show, labelAngle=-45, format='d', orient='bottom'),
        scale=alt.Scale(domain=[1900, 2020])  
    ),
    y=alt.Y('pct_male:Q', scale=alt.Scale(domain=[0, 1])),
    color='preusuel:N',
    tooltip=['preusuel', 'annais', alt.Tooltip('pct_male:Q', format='.2f')]
).properties(
        width=500,
        height=400,
        title='Gender proportion over time for selected names'
    )

    # Line chart from dropdown-selected names
    dropdown_layer = base.mark_line(point=True).transform_filter(
        alt.FieldOneOfPredicate(field='preusuel', oneOf=selected_names)
    )

    # Line chart from click selection (left chart)
    click_layer = base.mark_line(point=True).transform_filter(
        click_name
    ).add_params(click_name)

    line_chart = dropdown_layer + click_layer

    # Red highlight on clicked point
    highlight = alt.Chart(df_pct).mark_point(filled=True, size=150, color='red').encode(
    x=alt.X('annais:Q'),  
    y='pct_male:Q',
    tooltip=['preusuel', 'annais', alt.Tooltip('pct_male:Q', format='.4f')]
).transform_filter(click).add_params(click)

    rule = alt.Chart(pd.DataFrame({'y': [0.5]})).mark_rule(color='grey').encode(y='y:Q')

    labels = alt.Chart(pd.DataFrame({
        'y': [0, 1],
        'label': ['Women', 'Men']
    })).mark_text(align='left', dx=-300, dy=-5).encode(
        y='y:Q',
        text='label:N'
    )

    # Bar chart for clicked name only
    bar_data = alt.Chart(df_pct).transform_filter(
        click
    ).transform_calculate(
        prop_female='1 - datum.pct_male'
    ).transform_fold(
        ['prop_female', 'pct_male'], 
        as_=['gender', 'value']
    ).encode(
        y=alt.Y('value:Q', stack='zero', scale=alt.Scale(domain=[0, 1])),  
        x=alt.value(20),
        color=alt.Color('gender:N',
                        scale=alt.Scale(domain=['pct_male', 'prop_female'], range=['purple', 'red']),
                        legend=None),
        order=alt.Order('gender:N', sort='ascending')
    ).mark_bar().properties(
        width=30,
        height=400,
        title='Ratio (W/M)'
    ).add_params(click)

    full_chart = line_chart + highlight + rule + labels
    return alt.hconcat(full_chart, bar_data).resolve_scale(color='independent')

def update_chart():
    global click_name, chart_left 
    with output_chart:
        clear_output()
        try:
            click_name = get_click_name_selection()  

            chart_left = alt.Chart(df_plot).transform_filter(
                year_select
            ).transform_window(
                rank='rank()',
                sort=[alt.SortField('total', order='descending')],
                groupby=['annais']
            ).transform_filter(
                (alt.datum.rank <= 40)
            ).mark_bar().encode(
                x=alt.X('pct:Q', stack='normalize', axis=alt.Axis(format='%'), title='Percentage'),
                y=alt.Y('preusuel:N', sort='-x'),
                color=alt.Color('sex:N', scale=alt.Scale(domain=['men', 'women'], range=['purple', 'red'])),
                tooltip=['preusuel', 'sex', alt.Tooltip('pct:Q', format='.2%'), 'total'],
                opacity=alt.condition(click_name, alt.value(1), alt.value(0.4))
            ).add_params(
                year_select,
                click_name
            ).properties(
                width=300,
                height=400,
                title='Top 20 unisex names for selected year'
            )

            final = alt.hconcat(chart_left, create_right_charts()).resolve_scale(color='independent')
            display(final)
        except Exception as e:
            print("Error displaying chart:", e)

display(HTML('''
<style>
input[type="range"] {
    width: 400px !important; 
    height: 25px !important; 
}
</style>
'''))

display(name_input)

display(output_tags)

display(output_chart)

update_chart()


Combobox(value='', description='Name:', ensure_option=True, options=('AADIL', 'AAHIL', 'AALIYA', 'AALIYAH', 'A…

Output()

Output()

In [65]:
final = alt.hconcat(chart_left, create_right_charts()).resolve_scale(color='independent')

final.save('Vis3.html')
