In [None]:
import altair as alt
import pandas as pd
import numpy as np

In [None]:
# If this errors out simply execute this:
# !pip install vega_datasets
from vega_datasets import data
cars = data.cars()
cars.sample(5)

In [None]:
import altair as alt
alt.Chart(cars).mark_point()

In [None]:
alt.Chart(cars).mark_point().encode(x="Miles_per_Gallon", y="Horsepower")


In [None]:
wdi_data = (
    "https://raw.githubusercontent.com/nickeubank/"
    "practicaldatascience/master/Example_Data/wdi_plotting.csv"
)
world = pd.read_csv(wdi_data)
world.sample(5)

In [None]:
for c in world.columns: print(c)

In [None]:
# How many countries?
world["Country Name"].nunique()

In [None]:
world = world[world.Year == 2018]

In [None]:
alt.Chart(world).mark_point().encode(
    x="GDP per capita (constant 2010 US$)",
    y="Mortality rate, under-5 (per 1,000 live births)",
)


In [None]:
world["log_gdp_per_cap"] = np.log(world["GDP per capita (constant 2010 US$)"])
world["log_under5_mortality_rate"] = np.log(
    world["Mortality rate, under-5 (per 1,000 live births)"]
)

In [None]:
alt.Chart(world).mark_point().encode(
    x="log_gdp_per_cap",
    y="log_under5_mortality_rate",
)

In [None]:
alt.Chart(world).mark_point().encode(
    x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
    y="log_under5_mortality_rate",
)


In [None]:
alt.Chart(world).mark_point().encode(
    x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
    y="log_under5_mortality_rate",
    size="Population, total"
)


In [None]:
world["log_population"] = np.log(world["Population, total"])
alt.Chart(world).mark_point().encode(
    x=alt.X("log_population", scale=alt.Scale(zero=False)),
    y="log_under5_mortality_rate",
    size=alt.Size("log_gdp_per_cap", scale=alt.Scale(zero=False)),
)

In [None]:
base = (
    alt.Chart(world)
    .mark_point()
    .encode(
        x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
        y="log_under5_mortality_rate",
        size="Population, total",
    )
)

In [None]:
base

In [None]:
fit = base.transform_regression(
        "log_gdp_per_cap", "log_under5_mortality_rate"
    ).mark_line()
fit

In [None]:
base + fit

In [None]:
loess = base.transform_loess(
    "log_gdp_per_cap", "log_under5_mortality_rate").mark_line(color="red")
base + fit + loess


In [None]:
base | base.encode(
    y=alt.Y("Life expectancy at birth, total (years)", scale=alt.Scale(zero=False))
)


In [None]:
(
    base
    + fit
    + alt.Chart(world)
    .encode(
        x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
        y="log_under5_mortality_rate",
        text="Country Code",
    )
    .mark_text(size=10)
)

In [None]:
base = (
    alt.Chart(world, title="GDP per Capita and Child Mortality")
    .mark_point()
    .encode(
        x=alt.X(
            "log_gdp_per_cap", scale=alt.Scale(zero=False), title="Log GDP per Capita"
        ),
        y=alt.Y("log_under5_mortality_rate", title="Log Under-5 Mortality Rate"),
        size=alt.Size("Population, total", title="Population"),
    )
)

base + fit

In [None]:
c = base + fit
c.properties(title="A New Title!")

In [None]:
fit = base.transform_regression(
    "log_gdp_per_cap", "log_under5_mortality_rate"
).mark_line(color="red", strokeDash=[15, 15])
base + fit