# SSA Popular Baby Names

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import datetime as dt
import glob
import os

In [3]:
import altair as alt
import altair_stiles as altstiles

In [4]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

In [6]:
today = dt.datetime.today().strftime("%Y-%m-%d")

---

In [7]:
name_string = "Taylor"

In [8]:
gender = "F"
gender_name = "female"

---

In [20]:
df = pd.read_csv("data/processed/names_births.csv", dtype={"year": str})

In [21]:
df[df["year"] == "2023"].sort_values("name_percentage", ascending=False).head()

Unnamed: 0,name,sex,count,year,male,female,total,name_percentage,name_per_100k
1931131,Liam,M,20802,2023,1831150,1749200,3580350,1.14,1136.01
1931132,Noah,M,18995,2023,1831150,1749200,3580350,1.04,1037.33
1913598,Olivia,F,15270,2023,1831150,1749200,3580350,0.87,872.97
1931133,Oliver,M,14741,2023,1831150,1749200,3580350,0.81,805.01
1913599,Emma,F,13527,2023,1831150,1749200,3580350,0.77,773.32


In [22]:
name = (
    df[(df["name"] == name_string) & (df["sex"] == gender)]
    .sort_values("year")
    .reset_index(drop=True)
)

In [23]:
name.query('year=="1989"').sort_values("year", ascending=True).head()

Unnamed: 0,name,sex,count,year,male,female,total,name_percentage,name_per_100k
33,Taylor,F,4069,1989,2095982,1992183,4088165,0.2,204.25


In [24]:
line_chart = (
    alt.Chart(name)
    .mark_line(color="#cc0000", size=2)
    .encode(
        x=alt.X(
            "year:T",
            title="",
            axis=alt.Axis(tickCount=4, grid=False),
        ),
        y=alt.Y(
            f"name_percentage",
            title=f"Percentage of all {gender_name} names in the US, 1880-2022",
            stack=None,
            axis=alt.Axis(
                tickSize=0,
                domainOpacity=0,
                tickCount=6,
                offset=4,
                gridWidth=0.6,
                gridColor="#dddddd",
            ),
        ),
        color=alt.Color("name", title=" ", legend=None),
    )
)


chart = (
    (line_chart)
    .properties(
        width=320,
        height=500,
        title=f"Popularity of the name {name_string}",
    )
    .configure_legend(
        orient="right", symbolType="stroke", labelFont="Roboto", labelFontSize=13
    )
)

chart

In [25]:
name.sort_values("year").to_json(
    f"data/processed/babynames_{name_string.lower()}_{gender_name}.json",
    indent=4,
    orient="records",
)