# Get births data

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt

In [3]:
src = pd.read_csv("data/processed/daily_births_1994-2014.csv")

In [4]:
df = src.copy()

In [5]:
df.head()

Unnamed: 0,year,month,date_of_month,day_of_week,births
0,1994,1,1,6,8096
1,1994,1,2,7,7772
2,1994,1,3,1,10142
3,1994,1,4,2,11248
4,1994,1,5,3,11053


In [6]:
birthdays = (
    df.groupby(["month", "date_of_month"])
    .agg({"births": ["mean", "count", "sum"]})
    .round()
    .reset_index()
)

In [7]:
birthdays.columns = ["".join(a) for a in birthdays.columns.to_flat_index()]

In [8]:
birthdays["display"] = (
    birthdays["month"].astype(str) + "/" + birthdays["date_of_month"].astype(str)
)

In [9]:
birthdays.sort_values("birthscount").head()

Unnamed: 0,month,date_of_month,birthsmean,birthscount,birthssum,display
59,2,29,10701.0,6,64206,2/29
0,1,1,7849.0,25,196215,1/1
248,9,5,11769.0,25,294232,9/5
247,9,4,11391.0,25,284778,9/4
246,9,3,11076.0,25,276898,9/3


### Ranks

In [10]:
birthdays["default_rank"] = birthdays["birthsmean"].rank()
birthdays["max_rank"] = birthdays["birthsmean"].rank(method="max", ascending=False)
birthdays["pct_rank"] = birthdays["birthsmean"].rank(pct=True)

In [36]:
birthdays.head()

Unnamed: 0,month,date_of_month,birthsmean,birthscount,birthssum,display,default_rank,max_rank,pct_rank
0,1,1,7849.0,25,196215,1/1,2.0,365.0,0.005464
1,1,2,9437.0,25,235926,1/2,5.0,362.0,0.013661
2,1,3,11024.0,25,275594,1/3,148.0,219.0,0.404372
3,1,4,11132.0,25,278293,1/4,182.0,185.0,0.497268
4,1,5,10865.0,25,271618,1/5,92.5,275.0,0.252732


In [None]:
import jenkspy

In [46]:
jenkspy.jenks_breaks(birthdays["birthsmean"], n_classes=9)

[6596.0,
 6596.0,
 8915.0,
 10286.0,
 10837.0,
 11097.0,
 11353.0,
 11653.0,
 11984.0,
 12468.0]

### Simple heatmap

In [47]:
# Define your class breaks from Jenks natural breaks
class_breaks = [
    6596.0,
    6596.0,
    8915.0,
    10286.0,
    10837.0,
    11097.0,
    11353.0,
    11653.0,
    11984.0,
    12468.0,
]

# Define corresponding colors for the classes
# color_scheme = ["#eff3ff", "#bdd7e7", "#6baed6", "#2171b5", "#084594"]
color_scheme = [
    "#fff5f0",
    "#fee0d2",
    "#fcbba1",
    "#fc9272",
    "#fb6a4a",
    "#ef3b2c",
    "#cb181d",
    "#a50f15",
    "#67000d",
]

# Create the heatmap
chart = (
    alt.Chart(birthdays, title="Average daily births: 1994-2014")
    .mark_rect()
    .encode(
        x="date_of_month:O",
        y="month:O",
        color=alt.Color(
            "birthsmean:Q",
            scale=alt.Scale(
                domain=class_breaks, range=color_scheme, type="linear", nice=True
            ),
            title="< Fewer births  |  More births >",
        ),
        tooltip=[
            alt.Tooltip("display:O", title="Birthday"),
            alt.Tooltip("birthsmean:Q", title="Average births"),
            alt.Tooltip("max_rank", title="Rank"),
        ],
    )
    .configure_legend(orient="top")
)

# Display the chart
chart.display()

In [32]:
alt.Chart(birthdays, title="Average daily births: 1994-2014").mark_rect().encode(
    y="date_of_month:O",
    x="month:O",
    color=alt.Color(
        "birthsmean",
        scale=alt.Scale(scheme="orangered"),
        title="< Fewer births  |  More births >",
    ),
    tooltip=[
        alt.Tooltip("display:O", title="Birthday"),
        alt.Tooltip("birthsmean:Q", title="Average births"),
        alt.Tooltip("max_rank", title="Rank"),
    ],
).configure_legend(orient="top").properties(width=300, height=800)

### Export

In [13]:
birthdays.to_csv("data/processed/birthdays_counts_sum_mean.csv", index=False)

In [40]:
birthdays.to_json(
    "data/processed/birthdays_counts_sum_mean.json",
    indent=4,
    lines=False,
    orient="records",
)