In [1]:
# French Baby Names Map Visualization
import altair as alt
import pandas as pd
import geopandas as gpd

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [2]:
# loading data
names = pd.read_csv('dpt2020.csv', sep=';')
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt == 'XX'].index, inplace=True)
names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
2328100,2,ELISABETH,2005,94,4
3203324,2,MELINA,2014,7,3
2616559,2,IRÈNE,1938,40,3
974119,1,LÉANDRE,2009,24,4
2743446,2,KAREN,1983,77,4


In [3]:
#loading map data
depts = gpd.read_file('departements-version-simplifiee.geojson')
depts.sample(5)

Unnamed: 0,code,nom,geometry
21,23,Creuse,"POLYGON ((2.16779 46.42407, 2.19757 46.4283, 2..."
30,30,Gard,"POLYGON ((3.37365 44.17076, 3.43083 44.148, 3...."
11,12,Aveyron,"POLYGON ((2.20748 44.61553, 2.20841 44.64384, ..."
87,87,Haute-Vienne,"POLYGON ((0.82343 46.12858, 0.83345 46.16655, ..."
43,43,Haute-Loire,"POLYGON ((3.89741 45.35708, 3.91694 45.3394, 3..."


In [4]:
# keep a copy of just the names
just_names = names
# merge names with map data
names = depts.merge(names, how='right', left_on='code', right_on='dpt')
names.sample(5)

Unnamed: 0,code,nom,geometry,sexe,preusuel,annais,dpt,nombre
3548575,59,Nord,"MULTIPOLYGON (((3.0404 50.15971, 3.06301 50.17...",2,SUZANNE,2000,59,12
1601610,59,Nord,"MULTIPOLYGON (((3.0404 50.15971, 3.06301 50.17...",1,VALERY,1980,59,8
63095,17,Charente-Maritime,"MULTIPOLYGON (((-1.4809 46.21003, -1.4528 46.2...",1,ALEXANDRE,1935,17,3
3233227,93,Seine-Saint-Denis,"POLYGON ((2.55306 49.00982, 2.58031 48.99159, ...",2,NAELA,2008,93,3
3442182,77,Seine-et-Marne,"POLYGON ((2.57166 48.69201, 2.5688 48.70722, 2...",2,SAFIYA,2013,77,7


In [5]:
# Group by dpt, preusuel, sexe to get total numbers
grouped = names.groupby(['dpt', 'preusuel', 'sexe'], as_index=False).agg({'nombre': 'sum'})

# For each department, find the name with max and min count
def get_extremes(df):
    max_row = df.loc[df['nombre'].idxmax()]
    min_row = df.loc[df['nombre'].idxmin()]
    return pd.Series({
        'max_name': max_row['preusuel'],
        'max_nombre': max_row['nombre'],
        'min_name': min_row['preusuel'],
        'min_nombre': min_row['nombre'],
    })

extremes = grouped.groupby('dpt').apply(get_extremes).reset_index()

# Merge extremes with department geometry
merged = depts.merge(extremes, how='left', left_on='code', right_on='dpt')

  extremes = grouped.groupby('dpt').apply(get_extremes).reset_index()


In [None]:
# Plot
alt.Chart(merged).mark_geoshape(stroke='white').encode(
    tooltip=['nom:N', 'max_name:N', 'max_nombre:Q', 'min_name:N', 'min_nombre:Q'],
    color='max_nombre:Q'
).properties(width=800, height=600)