In [38]:
# French Baby Names Map Visualization
import altair as alt
import pandas as pd
import geopandas as gpd

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [39]:
# loading data
names = pd.read_csv('dpt2020.csv', sep=';')
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt == 'XX'].index, inplace=True)
names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
443121,1,EMILE,1918,3,17
466020,1,ERIC,1974,29,154
2154016,2,CHRISTINE,1949,24,9
1444553,1,RODRIGUE,1975,92,3
479878,1,ESTEBAN,2019,6,15


In [40]:
#loading map data
depts = gpd.read_file('departements-version-simplifiee.geojson')
depts.sample(5)

Unnamed: 0,code,nom,geometry
41,41,Loir-et-Cher,"POLYGON ((0.84122 48.10306, 0.87589 48.10944, ..."
9,10,Aube,"POLYGON ((3.41479 48.39027, 3.42208 48.41334, ..."
81,81,Tarn,"POLYGON ((1.99017 44.14945, 2.0297 44.15704, 2..."
22,24,Dordogne,"POLYGON ((0.62974 45.71457, 0.65423 45.6887, 0..."
83,83,Var,"MULTIPOLYGON (((6.4348 43.01554, 6.4552 43.026..."


In [41]:
# keep a copy of just the names
just_names = names
# merge names with map data
names = depts.merge(names, how='right', left_on='code', right_on='dpt')
names.sample(5)

Unnamed: 0,code,nom,geometry,sexe,preusuel,annais,dpt,nombre
1723906,35,Ille-et-Vilaine,"MULTIPOLYGON (((-2.12371 48.60441, -2.14142 48...",2,AGNÈS,1950,35,21
292880,58,Nièvre,"POLYGON ((2.87463 47.52042, 2.8489 47.53754, 2...",1,CHRISTOPHE,1972,58,137
440037,13,Bouches-du-Rhône,"POLYGON ((4.73906 43.92406, 4.82174 43.91283, ...",1,EMMANUEL,1976,13,71
1072361,92,Hauts-de-Seine,"POLYGON ((2.29097 48.95097, 2.32697 48.94536, ...",1,MARC-ANTOINE,1997,92,4
2090478,59,Nord,"MULTIPOLYGON (((3.0404 50.15971, 3.06301 50.17...",2,CHARLYNE,1996,59,3


In [53]:
# Group by dpt, preusuel, sexe to get total numbers
grouped = names.groupby(['dpt', 'preusuel', 'sexe'], as_index=False).agg({'nombre': 'sum'})

# For each department, find the name with max and min count
def get_extremes(df):
    max_row = df.loc[df['nombre'].idxmax()]
    min_row = df.loc[df['nombre'].idxmin()]
    return pd.Series({
        'max_name': max_row['preusuel'],
        'max_nombre': max_row['nombre'],
        'min_name': min_row['preusuel'],
        'min_nombre': min_row['nombre'],
    })

extremes = grouped.groupby('dpt').apply(get_extremes).reset_index()

# Merge extremes with department geometry
merged = depts.merge(extremes, how='left', left_on='code', right_on='dpt')

  extremes = grouped.groupby('dpt').apply(get_extremes).reset_index()


In [54]:
# Plot
alt.Chart(merged).mark_geoshape(stroke='white').encode(
    tooltip=['nom:N', 'code:N', 'max_name:N', 'max_nombre:Q', 'min_name:N', 'min_nombre:Q'],
    color='max_nombre:Q'
).properties(width=800, height=600)