In [61]:
# French Baby Names Map Visualization
import altair as alt
import pandas as pd
import geopandas as gpd

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [None]:
# TODO
# - Better score calculation (and more meaningful, like  name/1000 births)
# - Maybe a second chart where you can select a range of years, one year is not always enough and pertinent
# - Cleaner code
# - Keep the original scores

In [62]:
# loading data
names = pd.read_csv('dpt2020.csv', sep=';')
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt == 'XX'].index, inplace=True)
names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
705731,1,HICHEM,2013,13,4
1694195,1,YLIANO,2016,59,6
2913483,2,LOUNA,2007,57,19
1033215,1,LOUIS,2013,43,12
2581173,2,HENRIETTE,1909,10,32


In [63]:
# Grab min year, max year from annais column for later
min_year = names.annais.min()
max_year = names.annais.max()
min_year = int(min_year)
max_year = int(max_year)

# Grab all departments
departments = names.dpt.unique()

In [64]:
#loading map data
depts = gpd.read_file('departements-version-simplifiee.geojson')
depts.sample(5)

Unnamed: 0,code,nom,geometry
90,90,Territoire de Belfort,"POLYGON ((6.82354 47.81305, 6.84618 47.82295, ..."
94,94,Val-de-Marne,"POLYGON ((2.3319 48.81701, 2.36395 48.81632, 2..."
73,73,Savoie,"POLYGON ((6.80252 45.77837, 6.80842 45.72515, ..."
2,3,Allier,"POLYGON ((3.03207 46.79491, 3.04907 46.75808, ..."
8,9,Ariège,"POLYGON ((1.68842 43.27355, 1.70075 43.26054, ..."


In [None]:
# keep a copy of just the names
just_names = names
# merge names with map data
names = depts.merge(names, how='right', left_on='code', right_on='dpt')
names.sample(5)

Unnamed: 0,code,nom,geometry,sexe,preusuel,annais,dpt,nombre
305328,71,Saône-et-Loire,"POLYGON ((4.11597 47.12334, 4.15377 47.11456, ...",1,CLAUDIUS,1913,71,50
2252393,14,Calvados,"POLYGON ((-1.11962 49.35557, -1.07822 49.38849...",2,EGLANTINE,1989,14,3
2673058,16,Charente,"POLYGON ((-0.10294 45.96966, -0.04143 45.99348...",2,JULIE,1983,16,32
161667,17,Charente-Maritime,"MULTIPOLYGON (((-1.4809 46.21003, -1.4528 46.2...",1,ARTHUR,1987,17,6
65539,26,Drôme,"POLYGON ((4.80049 45.29836, 4.8588 45.30895, 4...",1,ALEXANDRE,1982,26,55


In [76]:
# For each department, find the number of birth for each year
dep_year = names.groupby(['dpt', 'annais']).agg({'nombre': 'sum'}).reset_index()       
dep_year['annais'] = dep_year['annais'].astype(int)

In [None]:
# Select a specific name to visualize
selected_name = "Marie"

# Filter data for the selected name and convert year to numeric
name_data = just_names[just_names['preusuel'] == selected_name].copy()
name_data['annais'] = pd.to_numeric(name_data['annais'], errors='coerce')
name_data = name_data.dropna(subset=['annais'])
name_data['annais'] = name_data['annais'].astype(int)

# Group by department and year to get total counts
name_by_year_dept = name_data.groupby(['dpt', 'annais'], as_index=False).agg({'nombre': 'sum'})

# Dropna values in 'nombre' column
name_by_year_dept = name_by_year_dept.dropna(subset=['nombre'])

# Loop over each year, over each department, and add missing combinations to the dataframe
for year in range(min_year, max_year + 1):
    for dpt in departments:
        # If no entry for this year and department, add a row with count 0
        if not ((name_by_year_dept['dpt'] == dpt) & (name_by_year_dept['annais'] == year)).any():
            new_row = pd.DataFrame({'dpt': [dpt], 'annais': [year], 'nombre': [0]})
            name_by_year_dept = pd.concat([name_by_year_dept, new_row], ignore_index=True)
name_by_year_dept['nombre'] = pd.to_numeric(name_by_year_dept['nombre'], errors='coerce').fillna(0)

# Add total counts for each department across all years
name_by_year_dept = name_by_year_dept.merge(dep_year, on=['dpt', 'annais'], how='left', suffixes=('', 'total'))
print(f"Columns: {name_by_year_dept.columns.tolist()}")
# Update the 'nombre' column to be the relative to the total
name_by_year_dept['nombre'] = name_by_year_dept['nombre'] / name_by_year_dept['nombretotal']

# Merge with department geometry
map_data = depts.merge(name_by_year_dept, how='left', left_on='code', right_on='dpt')
map_data['nombre'] = map_data['nombre'].fillna(0)

print(f"Data for name '{selected_name}':")
print(f"Available years: {sorted(name_by_year_dept['annais'].unique())}")
print(f"Total records: {len(map_data)}")

Columns: ['dpt', 'annais', 'nombre', 'nombretotal']
Data for name 'Marie':
Available years: [np.int64(1900), np.int64(1901), np.int64(1902), np.int64(1903), np.int64(1904), np.int64(1905), np.int64(1906), np.int64(1907), np.int64(1908), np.int64(1909), np.int64(1910), np.int64(1911), np.int64(1912), np.int64(1913), np.int64(1914), np.int64(1915), np.int64(1916), np.int64(1917), np.int64(1918), np.int64(1919), np.int64(1920), np.int64(1921), np.int64(1922), np.int64(1923), np.int64(1924), np.int64(1925), np.int64(1926), np.int64(1927), np.int64(1928), np.int64(1929), np.int64(1930), np.int64(1931), np.int64(1932), np.int64(1933), np.int64(1934), np.int64(1935), np.int64(1936), np.int64(1937), np.int64(1938), np.int64(1939), np.int64(1940), np.int64(1941), np.int64(1942), np.int64(1943), np.int64(1944), np.int64(1945), np.int64(1946), np.int64(1947), np.int64(1948), np.int64(1949), np.int64(1950), np.int64(1951), np.int64(1952), np.int64(1953), np.int64(1954), np.int64(1955), np.int64(19

In [87]:
# Create year slider
year_slider = alt.binding_range(min=int(map_data['annais'].min()), 
                               max=int(map_data['annais'].max()), 
                               step=1, 
                               name='Year: ')
year_select = alt.param(bind=year_slider, value=2000)

# Create the map with dynamic scale
base_map = alt.Chart(map_data).add_params(
    year_select
).transform_filter(
    alt.datum.annais == year_select    
).mark_geoshape(
    stroke='white',
    strokeWidth=0.5
).encode(
    color=alt.Color('nombre:Q', 
                   scale=alt.Scale(scheme='blues'),
                   title=f'Number of babies named {selected_name}'),
    tooltip=['nom:N', 'code:N', 'annais:O', 'nombre:Q']
).properties(
    width=800,
    height=600,
    title=f'Popularity of the name "{selected_name}" across French departments'
).resolve_scale(
    color='independent'
)

base_map