# Imports

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

import requests
import json

import altair as alt

In [2]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
PROJECT_DIR = '../'
DATA_DIR = PROJECT_DIR + 'data/'
REPORTS_DIR = PROJECT_DIR + 'reports/'

# Data exploration

### A) Create the map

In [4]:
json_file = DATA_DIR + 'departements.geojson'
df_map = gpd.read_file(json_file)
df_map.columns = ['department_id', 'department_name', 'geometry']
df_map

Unnamed: 0,department_id,department_name,geometry
0,02,Aisne,"POLYGON ((3.17270 50.01200, 3.18220 50.01234, ..."
1,10,Aube,"POLYGON ((3.41479 48.39027, 3.42208 48.41334, ..."
2,14,Calvados,"POLYGON ((-1.11962 49.35557, -1.11503 49.36240..."
3,15,Cantal,"POLYGON ((2.50841 45.47850, 2.52444 45.48070, ..."
4,28,Eure-et-Loir,"POLYGON ((0.81482 48.67016, 0.82767 48.68072, ..."
...,...,...,...
91,41,Loir-et-Cher,"POLYGON ((0.84122 48.10306, 0.87588 48.10943, ..."
92,53,Mayenne,"POLYGON ((-0.86036 48.50146, -0.84611 48.49828..."
93,54,Meurthe-et-Moselle,"MULTIPOLYGON (((5.45556 49.48093, 5.44953 49.4..."
94,79,Deux-Sèvres,"POLYGON ((-0.89196 46.97582, -0.87973 46.97580..."


### B) Load baby names information

In [5]:
data_file = DATA_DIR + 'dpt2020.csv'
df_w_sex = pd.read_csv(data_file, delimiter=';')
df_w_sex.columns = ['sex', 'name', 'year', 'department_id', 'count']
df_w_sex  

Unnamed: 0,sex,name,year,department_id,count
0,1,_PRENOMS_RARES,1900,02,7
1,1,_PRENOMS_RARES,1900,04,9
2,1,_PRENOMS_RARES,1900,05,8
3,1,_PRENOMS_RARES,1900,06,23
4,1,_PRENOMS_RARES,1900,07,9
...,...,...,...,...,...
3727548,2,ZYA,2018,59,3
3727549,2,ZYA,XXXX,XX,264
3727550,2,ZYNA,2013,93,3
3727551,2,ZYNA,XXXX,XX,59


In [6]:
df_w_sex['name'] = df_w_sex['name'].replace('_PRENOMS_RARES', np.nan)
df_w_sex['year'] = df_w_sex['year'].replace('XXXX', np.nan)
df_w_sex = df_w_sex.dropna()
df_w_sex

Unnamed: 0,sex,name,year,department_id,count
10885,1,AADIL,1983,84,3
10886,1,AADIL,1992,92,3
10888,1,AAHIL,2016,95,3
10892,1,AARON,1962,75,3
10893,1,AARON,1976,75,3
...,...,...,...,...,...
3727545,2,ZYA,2013,44,4
3727546,2,ZYA,2013,59,3
3727547,2,ZYA,2017,974,3
3727548,2,ZYA,2018,59,3


In [7]:
df_wo_sex = df_w_sex.groupby(by=['name', 'year', 'department_id']).sum().reset_index()
df_wo_sex

Unnamed: 0,name,year,department_id,sex,count
0,AADIL,1983,84,1,3
1,AADIL,1992,92,1,3
2,AAHIL,2016,95,1,3
3,AALIYA,2017,75,2,3
4,AALIYAH,2001,92,2,4
...,...,...,...,...,...
3637581,ÖMER,2020,45,1,3
3637582,ÖMER,2020,60,1,4
3637583,ÖMER,2020,67,1,3
3637584,ÖMER,2020,68,1,3


In [8]:
display(df_w_sex['name'].value_counts())
print()
display(df_wo_sex['name'].value_counts())

CAMILLE        13820
MARIE          13300
PIERRE         11388
PAUL           10711
JEAN           10694
               ...  
FELINE             1
ROBESPIERRE        1
MOMINA             1
LEONCINE           1
ZYNA               1
Name: name, Length: 15270, dtype: int64




MARIE     11353
PIERRE    11278
PAUL      10706
JEAN      10639
LOUIS     10119
          ...  
AÏRON         1
AÏTOR         1
MATTEI        1
BADICE        1
BARTHA        1
Name: name, Length: 15270, dtype: int64

### C) Create the final dataframe

# Data visualization

### A) Visualization 1

##### Top names over time

In [9]:
top_n_names_by_year = 3

df_wo_sex_top_n = df_wo_sex[['name', 'year', 'count', 'sex']].copy()
df_wo_sex_top_n = df_wo_sex_top_n.groupby(['year', 'name']).sum().reset_index()
df_wo_sex_top_n = df_wo_sex_top_n.groupby('year').apply(lambda x: x.sort_values('count', ascending=False)).reset_index(drop=True)
df_wo_sex_top_n = df_wo_sex_top_n.groupby('year').head(top_n_names_by_year)

valid_names = df_wo_sex_top_n['name'].unique().tolist()
print(len(valid_names))

df_wo_sex_top_n

46


Unnamed: 0,year,name,count,sex
0,1900,MARIE,49752,263
1,1900,JEAN,14100,96
2,1900,JEANNE,13981,188
997,1901,MARIE,53177,260
998,1901,JEAN,15638,96
...,...,...,...,...
240536,2019,LÉO,4654,98
240537,2019,RAPHAËL,4458,97
244876,2020,LÉO,4491,96
244877,2020,GABRIEL,4410,96


In [10]:
df_w_sex_by_years = df_w_sex[['year', 'name', 'count', 'sex']].copy()
df_w_sex_by_years = df_w_sex_by_years[df_w_sex_by_years['name'].isin(valid_names)]
df_w_sex_by_years = df_w_sex_by_years.groupby(['year', 'name', 'sex']).sum().reset_index()

# Create the global rank and the sex rank
s_rank_global = df_w_sex_by_years.groupby(by=['name']).sum()['count'].rank(ascending=False, method='min').astype(int)
df_w_sex_by_years['rank'] = df_w_sex_by_years['name'].apply(lambda x: s_rank_global.loc[x])

# Sort by year and rank
df_w_sex_by_years = df_w_sex_by_years.sort_values(by=['year', 'rank']).reset_index(drop=True)

# Preprocess data
df_w_sex_by_years['year'] = df_w_sex_by_years['year'].astype(int)
df_w_sex_by_years['sex']= df_w_sex_by_years['sex'].map({1:'male', 2:'female'})
years_to_plot = df_w_sex_by_years['year'].unique().tolist()

df_w_sex_by_years

  s_rank_global = df_w_sex_by_years.groupby(by=['name']).sum()['count'].rank(ascending=False, method='min').astype(int)


Unnamed: 0,year,name,sex,count,rank
0,1900,MARIE,male,1039,1
1,1900,MARIE,female,48713,1
2,1900,JEAN,male,14097,2
3,1900,JEAN,female,3,2
4,1900,MICHEL,male,729,3
...,...,...,...,...,...
5906,2020,RAPHAËL,male,3965,42
5907,2020,THÉO,male,1918,43
5908,2020,ENZO,male,1583,44
5909,2020,NATHAN,male,2399,45


In [67]:
df_plot = df_w_sex_by_years.copy()

# Define the top_n and years_to_plot variables
top_n = 10

# Create slider
slider_year = alt.binding_range(min=df_plot['year'].min()+1, max=df_plot['year'].max(), step=1, name='Year:')
selector_year = alt.selection_point(name='year', fields=['year'], bind=slider_year, value={'year': df_plot['year'].min()})

# 
brush = alt.selection_point(name='name_selector', fields=['name'], value=df_plot[df_plot['year']==df_plot['year'].min()]['name'][0])
# brush = alt.selection_interval(encodings=['y'], resolve='global')

# Bar chart of top names
bar_ranking = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('count:Q', scale=alt.Scale(domain=(0, df_plot['count'].max()))),
    y=alt.Y('name:N', sort=alt.EncodingSortField(field='count', op='sum', order='descending')),
    tooltip=[
        alt.Tooltip('year:N', title='Year'),
        alt.Tooltip('name:N', title='Name'),
        alt.Tooltip('rank:Q', title='Rank'),
        alt.Tooltip('count:Q', title='Count'),
        alt.Tooltip('sex:N', title='Sex'),
    ],
    color=alt.Color('sex:N', scale=alt.Scale(domain=['male', 'female'], range=['#01A6EA', '#FFB1CB'])),
    opacity=alt.condition(brush, alt.value(1), alt.value(0.2)),
).add_params(
    selector_year, brush
).transform_filter(
    selector_year
).properties(
    title=alt.TitleParams(text=f"Baby names ranking over time", fontSize=16),
    width=400,
    height=350
)

# Display the bar chart
bar_ranking

In [69]:
df_plot.dtypes

year      int32
name     object
sex      object
count     int64
rank      int32
dtype: object

In [17]:
df_plot = df_w_sex_by_years.copy()
df_plot = df_plot[['name', 'year', 'count', 'sex']].groupby(['name', 'year', 'sex']).sum().reset_index()
df_plot['year'] = df_plot['year'].astype(int)
df_plot['year'] = df_plot['year'].astype(str)

# Create names list widget
input_dropdown = alt.binding_select(options=[None]+df_plot['name'].unique().tolist())
selector_name = alt.selection_point(fields=['name'], bind=input_dropdown, name='Name to select for comparison:', value=df_plot['name'].values[0])

# # Line chart of top names
# line_with_selector_name = alt.Chart(df_plot).mark_line(point=True).encode(
#     x=alt.X(
#         'year:N', 
#         scale=alt.Scale(zero=False, domain=df_plot['year'].unique().tolist()), 
#         axis=alt.Axis(tickCount=10, labelExpr='parseInt(datum.value) % 5 === 0 ? datum.label : ""')
#     ),
#     y=alt.Y('count:Q', scale=alt.Scale(zero=False)),
#     tooltip=[
#         alt.Tooltip('year:N', title='Year'),
#         alt.Tooltip('name:N', title='Name'),
#         # alt.Tooltip('rank:Q', title='Rank'),
#         alt.Tooltip('count:Q', title='Count'),
#         # alt.Tooltip('sex:N', title='Sex'),
#     ],
#     color=alt.Color('sex:N'),
#     # color=alt.condition(selector_name, alt.Color('name:N'), alt.value('lightgrey')),
#     # color=alt.condition(brush, alt.ColorValue("steelblue"), alt.ColorValue("grey")),
#     # color=alt.ColorValue('green'),
#     # color=alt.Color('count:Q', legend=alt.Legend(title='Name')),

#     # opacity=alt.condition(brush, alt.value(1), alt.value(0.2)),
# ).add_params(
#     selector_name
# # ).add_params(
# #     brush
# ).transform_filter(
#     selector_name
# # ).transform_filter(
# #     brush
# ).properties(
#     title=alt.TitleParams(text=f"Baby names evolution comparison over time", fontSize=20),
#     width=400,
#     height=300
# )

line_from_bar = alt.Chart(df_plot).mark_line(point=True).encode(
    x=alt.X(
        'year:N', 
        scale=alt.Scale(zero=False, domain=df_plot['year'].unique().tolist()), 
        axis=alt.Axis(tickCount=10, labelExpr='parseInt(datum.value) % 5 === 0 ? datum.label : ""')
    ),
    y=alt.Y('count:Q', scale=alt.Scale(zero=False)),
    tooltip=[
        alt.Tooltip('year:N', title='Year'),
        alt.Tooltip('name:N', title='Name'),
        # alt.Tooltip('rank:Q', title='Rank'),
        alt.Tooltip('count:Q', title='Count'),
        # alt.Tooltip('sex:N', title='Sex'),
    ],
    # color=alt.Color(legend=alt.Legend(title='Name')),
    # color=alt.ColorValue('orange'),
    color=alt.Color('sex:N'),
    # color=alt.ColorValue('green'),
    # color=alt.condition(brush, alt.Color('name:N'), alt.value('lightgrey')),
    # color=alt.condition(brush, alt.ColorValue("steelblue"), alt.ColorValue("grey")),
    # opacity=alt.condition(brush, alt.value(1), alt.value(0.2)),
# ).add_params(
#     selector_name
).add_params(
    brush
# ).transform_filter(
#     selector_name
).transform_filter(
    brush
).properties(
    title=alt.TitleParams(text=f"Baby name gender comparison over time", fontSize=16),
    width=400,
    height=300
)

# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection_point(nearest=True, on='mouseover', fields=['year'], empty=False)

# Transparent selectors across the chart. This is what tells us the x-value of the cursor
selectors = alt.Chart(df_plot).mark_point().encode(
    x='year:N',
    opacity=alt.value(0),
).add_params(
    nearest
)

# line = (line_from_bar + line_with_selector_name)
# line = (line_with_selector_name + line_from_bar)
line = line_from_bar

# Draw points on the line, and highlight based on selection
points1 = line_from_bar.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
# points2 = line_with_selector_name.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))

# Draw text labels near the points, and highlight based on selection
text1 = line_from_bar.mark_text(align='left', dx=5, dy=-5).encode(text=alt.condition(nearest, 'count:Q', alt.value(' ')))
# text2 = line_with_selector_name.mark_text(align='left', dx=5, dy=-5).encode(text=alt.condition(nearest, 'count:Q', alt.value(' ')))

# Draw a rule at the location of the selection
rules = alt.Chart(df_plot).mark_rule(color='gray').encode(
    x='year:N',
    color=alt.ColorValue('black')
).transform_filter(
    nearest
)

# Display the line chart
line = alt.layer(
    line, selectors, rules, 
    points1, #points2,
    text1, #text2
)


# Display the line chart
fig = alt.hconcat(
    bar_ranking, 
    line,
).resolve_legend(
    color="independent",
    size="independent"
)
fig

In [65]:
df_wo_sex_by_location = df_wo_sex[['year', 'count', 'department_id', 'name']].copy()

df_wo_sex_by_location = df_wo_sex_by_location.groupby(['year', 'department_id', 'name']).sum().reset_index()
df_wo_sex_by_location = df_wo_sex_by_location.groupby(['year', 'department_id']).apply(lambda x: x.sort_values('count', ascending=False)).reset_index(drop=True)
df_wo_sex_by_location = df_wo_sex_by_location.groupby(['year', 'department_id']).head(1)
df_wo_sex_by_location

df_w_sex_by_location = df_w_sex.copy()
df_w_sex_by_location = df_w_sex_by_location.merge(df_wo_sex_by_location[['name', 'year', 'department_id']], on=['name', 'year', 'department_id'], how='inner')
df_w_sex_by_location['department_id'] = df_w_sex_by_location['department_id'].apply(lambda x: '0'+x if len(x)==1 else x)
df_w_sex_by_location['sex'] = df_w_sex_by_location['sex'].map({1:'male', 2:'female'})
df_w_sex_by_years['year'] = df_w_sex_by_years['year'].astype(int)

df_w_sex_by_location

Unnamed: 0,sex,name,year,department_id,count
0,male,ADAM,2009,92,233
1,male,ADAM,2010,92,237
2,male,ADAM,2011,34,101
3,male,ADAM,2011,84,69
4,male,ADAM,2011,92,284
...,...,...,...,...,...
13333,female,VANESSA,1985,971,77
13334,female,VANESSA,1985,972,83
13335,female,VANESSA,1986,971,77
13336,female,VANESSA,1986,972,89


In [66]:
df_plot = df_w_sex_by_location
# df_plot['department_id'] = df_plot['department_id'].astype(str)
# df_plot['department_id'] = df_plot['department_id'].astype(int)

bar_location = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X(
        'department_id:N', 
        sort=alt.EncodingSortField(field='department_id', order='ascending'),
        scale=alt.Scale(zero=False, domain=sorted(df_plot['department_id'].unique().tolist())), 
        # axis=alt.Axis(tickCount=10)
    ),
    y=alt.Y('count:Q', scale=alt.Scale(zero=False, domain=[0, df_plot['count'].max()])),
    tooltip=[
        alt.Tooltip('department_id:N', title='Department code'),
        alt.Tooltip('count:Q', title='Count'),
        alt.Tooltip('name:N', title='Name'),
        alt.Tooltip('year:N', title='Year'),
        alt.Tooltip('sex:N', title='Sex'),
    ],
    color=alt.Color('sex:N', scale=alt.Scale(domain=['male', 'female'], range=['#01A6EA', '#FFB1CB'])),
    # color=alt.Color('sex:N'),
    opacity=alt.condition(selector_year, alt.value(1), alt.value(0.2)),
).add_params(
    selector_year
).transform_filter(
    selector_year
# ).add_params(
#     brush
# ).transform_filter(
#     brush
).properties(
    title=alt.TitleParams(text=f"Baby name location evolution over time", fontSize=16),
    width=1000,
    height=300
)

# Display bar location
bar_location

# top names for each dept (en haut)
# name selected with the bar chart by dept (en bas)

# fig2 = alt.vconcat(
#     fig, 
#     bar_location,
# ).resolve_legend(
#     color="independent",
#     size="independent"
# )
# fig2

In [38]:
df_plot

Unnamed: 0,sex,name,year,department_id,count
0,1,ADAM,2009,92,233
1,1,ADAM,2010,92,237
2,1,ADAM,2011,34,101
3,1,ADAM,2011,84,69
4,1,ADAM,2011,92,284
...,...,...,...,...,...
13333,2,VANESSA,1985,971,77
13334,2,VANESSA,1985,972,83
13335,2,VANESSA,1986,971,77
13336,2,VANESSA,1986,972,89


In [31]:
df_plot.sort_values('department_id')

Unnamed: 0,sex,name,year,department_id,count
7636,2,MARIE,1920,1,434
2255,1,JEAN,1934,1,202
9269,1,NICOLAS,1984,1,73
3590,1,JEAN,1953,1,276
5415,1,LUCAS,2019,1,28
...,...,...,...,...,...
8388,2,MARIE,1952,974,3367
8396,2,MARIE,1953,974,3437
8407,1,MARIE,1954,974,68
8359,1,MARIE,1948,974,49


In [16]:
stop()

NameError: name 'stop' is not defined

In [None]:
# df_wo_sex_france = df_wo_sex.groupby(by=['name', 'sex', 'year']).sum().reset_index()
# df_wo_sex_france['sex'] = df_wo_sex_france['sex'].map({1:'man', 2:'woman', 3:'both'})
# df_wo_sex_france

In [None]:
# # Filter dataframe
# df_w_sex_filtered = df_w_sex[df_w_sex['name'].isin(top_names[:10])]
# df_w_sex_filtered = df_w_sex_filtered.groupby(by=['name', 'sex', 'year']).sum().reset_index(['name', 'sex', 'year'])
# df_w_sex_filtered['sex'] = df_w_sex_filtered['sex'].map({1:'man', 2:'woman'})
# df_w_sex_filtered

In [None]:
# # Line chart for men
# line_men = alt.Chart(df_w_sex_filtered).mark_line().encode(
#     alt.X('year:N', scale=alt.Scale(zero=False), axis=alt.Axis(tickCount=10, labelExpr='parseInt(datum.value) % 5 === 0 ? datum.label : ""')),
#     alt.Y('count:Q', scale=alt.Scale(zero=False)),
#     alt.Color('name:N', legend=alt.Legend(title='Name')),
#     alt.Shape('sex:N', scale=alt.Scale(domain=['woman', 'man'], range=['triangle', 'square']), legend=alt.Legend(title='Sex')),
#     tooltip=[alt.Tooltip('year:N', title='Year'),
#              alt.Tooltip('name:N', title='Name'),
#              alt.Tooltip('sex:N', title='Sex'),
#              alt.Tooltip('count:Q', title='Count')
#     ],
# ).transform_filter(
#     alt.datum.sex == 'man'
# ).properties(
#     title=alt.TitleParams(text=f"Evolution of the baby names in France over time", fontSize=20),
#     width=800,
#     height=400
# )

# # Line chart for women
# line_women = alt.Chart(df_w_sex_filtered).mark_line().encode(
#     alt.X('year:N', scale=alt.Scale(zero=False), axis=alt.Axis(tickCount=10, labelExpr='parseInt(datum.value) % 5 === 0 ? datum.label : ""')),
#     alt.Y('count:Q', scale=alt.Scale(zero=False)),
#     alt.Color('name:N', legend=alt.Legend(title='Name')),
#     alt.Shape('sex:N', scale=alt.Scale(domain=['woman', 'man'], range=['triangle', 'square']), legend=alt.Legend(title='Sex')),
#     tooltip=[alt.Tooltip('year:N', title='Year'),
#              alt.Tooltip('name:N', title='Name'),
#              alt.Tooltip('sex:N', title='Sex'),
#              alt.Tooltip('count:Q', title='Count')
#     ],
# ).transform_filter(
#     alt.datum.sex == 'woman'
# ).properties(
#     title=alt.TitleParams(text=f"Evolution of the baby names in France over time", fontSize=20),
#     width=800,
#     height=400
# )

# # Combine the line charts
# line = alt.layer(line_men, line_women).resolve_scale(shape='independent').properties(
#     title=alt.TitleParams(text=f"Evolution of the baby names in France over time", fontSize=20),
#     width=1000,
#     height=400
# )

# ## Create a selection that chooses the nearest point & selects based on x-value
# # nearest = alt.selection_point(nearest=True, on='mouseover', fields=['year'], empty=False)

# # # Transparent selectors across the chart. This is what tells us the x-value of the cursor
# # selectors = alt.Chart(df_w_sex_filtered).mark_point().encode(
# #     x='year:N',
# #     opacity=alt.value(0),
# # ).add_params(
# #     nearest
# # )

# # # Draw points on the line, and highlight based on selection
# # points_men = line_men.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
# # points_women = line_women.mark_point().encode(opacity=alt.condition(nearest, alt.value(1), alt.value(0)))

# # # Draw text labels near the points, and highlight based on selection
# # text_men = line_men.mark_text(align='left', dx=5, dy=-5).encode(text=alt.condition(nearest, 'count:Q', alt.value(' ')))
# # text_women = line_women.mark_text(align='left', dx=5, dy=-5).encode(text=alt.condition(nearest, 'count:Q', alt.value(' ')))

# # # Draw a rule at the location of the selection
# # rules = alt.Chart(df_w_sex_filtered).mark_rule(color='gray').encode(
# #     x='year:N',
# # ).transform_filter(
# #     nearest
# # )

# # # Display the line chart
# # line_chart = alt.layer(
# #     line, selectors, points_men, points_women, rules, text_men, text_women
# # )
# # line_chart

# line_men_women = line_men + line_women
# line_men_women = line_men_women#.add_params(brush).transform_filter(brush)
# line_men_women

In [None]:
# # line_chart = line_men.resolve_scale(y='independent')
# # line_chart = line_men.properties(height=200)

# # Combine the charts using layer()
# # combined_chart = alt.layer(top_names_bar_chart, line_chart)
# # combined_chart = alt.vconcat(alt.layer(top_names_bar_chart), line_chart)

# combined_chart = (top_names_bar_chart & line_men_women)
# combined_chart

### B) Visualization 2

##### Evolution of baby names over time (map)

In [None]:
df_count_per_year_and_dpt = df_wo_sex.copy()
df_count_per_year_and_dpt['year'] = df_count_per_year_and_dpt['year'].astype(str)
df_count_per_year_and_dpt = df_count_per_year_and_dpt[['year', 'department_id', 'count']].groupby(['year', 'department_id']).sum().reset_index()

max_count = df_count_per_year_and_dpt['count'].max()//2
df_count_per_year_and_dpt = df_count_per_year_and_dpt.pivot(index=['department_id'], columns='year', values='count').reset_index()
years_to_plot = list(df_count_per_year_and_dpt.columns[2:])

df_count_per_year_and_dpt

In [None]:
df_plot = df_count_per_year_and_dpt

# # Create slider
# min_year = int(min(years_to_plot))
# max_year = int(max(years_to_plot))
# slider_year = alt.binding_range(min=min_year, max=max_year, step=1, name='Year:')
# selector_year = alt.selection_point(name='year_selector', fields=['year'], bind=slider_year, value=1900)

# Create map chart
map = alt.Chart(df_map).mark_geoshape(
    stroke='white',
    strokeWidth=0.05
).transform_lookup(
    lookup='department_id',
    from_=alt.LookupData(df_plot, key='department_id', fields=years_to_plot)
).transform_fold(
    years_to_plot, as_=['year', 'count']
).transform_calculate(
    year='parseInt(datum.year)',
    count='isValid(datum.count) ? datum.count : -0',
    # name=alt.condition(alt.datum.name == default_name, alt.datum.name, alt.value(None))
    # name='datum.name'

).encode(
    color=alt.condition(
        predicate='datum.count > 0',
        if_true=alt.Color('count:Q', scale=alt.Scale(scheme='viridis')),
        if_false=alt.value('#dfdbf6')
    ),
    tooltip=[
        alt.Tooltip('department_id:N', title='Department code'),
        alt.Tooltip('department_name:N', title='Department name'),
        # alt.Tooltip('name:N', title='Name'),
        alt.Tooltip('count:Q', title='Count'),
        alt.Tooltip('year:Q', title='Year')
    ],
    # opacity=alt.condition(brush, alt.value(1), alt.value(0.2)),
).add_params(
    selector_year,
    # brush,
    # selector_name
# ).add_params(
#     brush
# ).transform_filter(
#     selector_name
).transform_filter(
    selector_year
).project(
    type='conicConformal',
    center=[2.454071, 46.279229],
    scale=3000,
    translate=[300, 300]
).properties(
    title=alt.TitleParams(text=f"Evolution of a baby name in France over time", fontSize=16),
    width=600,
    height=600
)

# Display the map
fig2 = alt.hconcat(
    fig, 
    map,
).resolve_legend(
    color="independent",
    size="independent"
)
fig2
# map

In [None]:
stop()

##### Evolution of baby names over time (bar chart)

In [None]:
df_w_sex_target_name = df_w_sex[df_w_sex['name']==default_name]
df_w_sex_target_name = df_w_sex_target_name.groupby(by=['sex', 'name', 'year']).sum().reset_index()
df_w_sex_target_name['sex'] = df_w_sex_target_name['sex'].map({1:'man', 2:'woman'})
df_w_sex_target_name

In [None]:
df_wo_sex_target_name = df_wo_sex[df_wo_sex['name']==default_name]
df_wo_sex_target_name = df_wo_sex_target_name.groupby(by=['name', 'year']).sum().reset_index()
df_wo_sex_target_name['sex'] = 'man + woman'
df_wo_sex_target_name

In [None]:
### Code adapated from https://altair-viz.github.io/gallery/multiline_tooltip.html


# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection_point(nearest=True, on='mouseover', fields=['year'], empty=False)

# Create line chart (men, women)
line_men_women = alt.Chart(df_w_sex_target_name).mark_line(interpolate='basis').encode(
    alt.X('year:N', scale=alt.Scale(zero=False), axis=alt.Axis(tickCount=10, labelExpr='parseInt(datum.value) % 5 === 0 ? datum.label : ""')),
    alt.Y('count:Q', scale=alt.Scale(zero=False)),
    alt.Color('sex:N', scale=alt.Scale(domain=['woman', 'man'], range=['red', 'blue'])),
    tooltip = [alt.Tooltip('year:N', title='Year'),
               alt.Tooltip('name:N', title='Name'),
               alt.Tooltip('sex:N', title='Sex'),
               alt.Tooltip('count:Q', title='Count')
    ],
)

# Create line chart (both men and women)
line_both = alt.Chart(df_wo_sex_target_name).mark_line(interpolate='basis').encode(
    alt.X('year:N', scale=alt.Scale(zero=False), axis=alt.Axis(tickCount=10, labelExpr='parseInt(datum.value) % 5 === 0 ? datum.label : ""')),
    alt.Y('count:Q', scale=alt.Scale(zero=False)),
    alt.Color('sex:N', scale=alt.Scale(domain=['man + woman'], range=['green'])),
    tooltip = [alt.Tooltip('year:N', title='Year'),
               alt.Tooltip('name:N', title='Name'),
               alt.Tooltip('sex:N', title='Sex'),
               alt.Tooltip('count:Q', title='Count')
    ],
)

# Combine the charts using layer
# line = alt.layer(
#     line_men_women, line_both
# ).properties(
#     title=alt.TitleParams(text=f"Evolution of the baby name '{target_name}' in France over time", fontSize=20),
#     width=1000,
#     height=200
# )

line = line_men_women.properties(
    title=alt.TitleParams(text=f"Evolution of the baby name '{default_name}' in France over time", fontSize=20),
    width=1000,
    height=200
)

# Transparent selectors across the chart. This is what tells us the x-value of the cursor
selectors = alt.Chart(df_w_sex_target_name).mark_point().encode(
    x='year:N',
    opacity=alt.value(0),
).add_params(
    nearest
)

# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'count:Q', alt.value(' '))
)

# Draw a rule at the location of the selection
rules = alt.Chart(df_w_sex_target_name).mark_rule(color='gray').encode(
    x='year:N',
).transform_filter(
    nearest
)

# Display the line chart
line_chart = alt.layer(
    line, selectors, points, rules, text
)
line_chart

# Final dashboard (stacked visualizations)

In [None]:
top_names_bar_chart & 