In [1]:
import pandas as pd
import altair as alt
import ephem
import calendar

In [2]:
birth_url = "https://raw.githubusercontent.com/fivethirtyeight/data/3d4cab795fa601867cdd9f8f6bce566696e3a118/births/US_births_2000-2014_SSA.csv"
df = pd.read_csv(birth_url)
iso_date = pd.date_range("1/1/2000", "12/31/2014")
df["iso_date"] = iso_date
df["moon_phase"] = df["iso_date"].apply(lambda x: ephem.Moon(x).moon_phase)
df["month"] = df["month"].apply(lambda x: calendar.month_name[x])
df["day_of_week"] = df["day_of_week"].apply(lambda x: calendar.day_name[x-1])

df.head()

Unnamed: 0,year,month,date_of_month,day_of_week,births,iso_date,moon_phase
0,2000,January,1,Saturday,9083,2000-01-01,0.272007
1,2000,January,2,Sunday,8006,2000-01-02,0.191606
2,2000,January,3,Monday,11363,2000-01-03,0.12297
3,2000,January,4,Tuesday,13032,2000-01-04,0.067989
4,2000,January,5,Wednesday,12558,2000-01-05,0.028386


In [3]:
# we have >5000 rows, so we must to override altair's soft limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
# Visualization 1: date vs birth rate
zoom = alt.selection_interval(encodings=["x", "y"])
selection = alt.selection_multi(fields=['day_of_week'], bind='legend')

minimap = (
    alt.Chart(df)
        .mark_point()
        .add_selection(zoom)
        .add_selection(selection)
        .encode(
        x=alt.X("iso_date:T", title="Date"),
        y=alt.Y("births:Q", title="Daily Births"),
        color=alt.condition(zoom, "day_of_week", alt.value("lightgray")),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
        tooltip=["iso_date"]
    )
        .properties(
        width=300,
        height=250,
        title="Minimap -- click and drag to zoom",
    )
)

detail = (
    alt.Chart(df)
        .mark_point()
        .encode(
        x=alt.X(
            "iso_date:T",
            scale=alt.Scale(domain={"selection": zoom.name, "encoding": "x"}),
            title="Date"
        ),
        y=alt.Y(
            "births:Q",
            scale=alt.Scale(domain={"selection": zoom.name, "encoding": "y"}),
            title="Daily Births",
        ),
        color=alt.Color("day_of_week",
                        legend=alt.Legend(title="Click to filter"),
                        sort=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.1)),
        tooltip = ["iso_date", "day_of_week"]
    )
        .properties(width=650, height=500, title="detail view")
        .add_selection(selection)
)

step = 20
overlap = 1

ridge_month = alt.Chart(df, height=step, width=350).transform_timeunit(
    Month='month(iso_date)'
).transform_joinaggregate(
    mean_births='mean(births)', groupby=['Month']
).transform_bin(
    ['bin_max', 'bin_min'],
    bin=alt.Bin(step=250),
    field='births'
).transform_aggregate(
    value='count()', groupby=['Month', 'mean_births', 'bin_min', 'bin_max']
).transform_impute(
    impute='value', groupby=['Month', 'mean_births'], key='bin_min', value=0
).mark_area(
    interpolate='basis',
    fillOpacity=0.8,
    stroke='lightgray',
    strokeWidth=0.5
).encode(
    alt.X('bin_min:Q', bin='binned', title='Daily Births'),
    alt.Y(
        'value:Q',
        scale=alt.Scale(range=[step, -step * overlap]),
        axis=None
    ),
    alt.Fill(
        'mean_births:Q',
        legend=alt.Legend(orient="right", title="mean daily births"),
        scale=alt.Scale(domain=[11000, 12500], scheme='yellowgreenblue')
    )
).facet(
    row=alt.Row(
        'Month:T',
        title=None,
        header=alt.Header(labelAngle=0, labelAlign='left', format='%B')
    )
).properties(
    bounds='flush'
)

right_side = alt.vconcat(minimap, ridge_month)

alt.hconcat(detail, right_side).configure_facet(spacing=0).configure_view(stroke=None)

In [5]:
# Visualization 2: moon phase vs birth rate
phase_brush = alt.selection_multi(encodings=['x'])
day_selection = alt.selection_multi(fields=['day_of_week'], bind='legend')

phase_scatter = (
    alt.Chart(df)
        .mark_point()
        .encode(
        x = alt.X("iso_date", title="Date"),
        y =alt.Y("births",
                 scale = alt.Scale(domain=(5000,17000)),
                 title="Daily Births"),
        color = alt.Color('moon_phase', legend = None, scale=alt.Scale(scheme='purpleorange', reverse=False)),
        opacity=alt.condition(phase_brush, alt.value(1), alt.value(0.04)),
        tooltip = ["iso_date", "day_of_week", "moon_phase"])
        .properties(width=600, height=600, title="Overview")
        .add_selection(alt.selection_multi())

)

phase_hist = (
    alt.Chart(df)
        .mark_bar()
        .encode(
        x = alt.X("moon_phase",
                  bin = alt.Bin(extent=[0,1], step=.05),
                  title="Moon Phase (0 = New Moon, 1 = Full Moon"),
        y='count()',
        color = alt.Color('moon_phase',
                          scale=alt.Scale(scheme='purpleorange', reverse=False),
                          legend=None))
        .add_selection(phase_brush)
        .properties(title="Select bin(s) to filter scatterplot")
)

phase_vs_births = alt.Chart(df).mark_point().encode(
    x = alt.X("moon_phase", title="Moon Phase (0 = New Moon, 1 = Full Moon)"),
    y = alt.Y("births", title="Daily Births"),
    color = alt.Color("day_of_week",
                      sort=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"],
                      legend=alt.Legend(title="filter by day(s)")),
    opacity=alt.condition(day_selection, alt.value(1), alt.value(0.1)),
    tooltip = ["iso_date", "moon_phase", "day_of_week"]
).add_selection(day_selection)


phase_scatter | phase_hist & phase_vs_births