In [2]:
import altair as alt
import pandas as pd
import numpy as np

In [3]:
from vega_datasets import data
cars = data.cars()
cars.sample(5)

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
267,dodge aspen,18.6,6,225.0,110.0,3620,18.7,1978-01-01,USA
171,buick skyhawk,21.0,6,231.0,110.0,3039,15.0,1975-01-01,USA
159,plymouth valiant custom,19.0,6,225.0,95.0,3264,16.0,1975-01-01,USA
84,volkswagen 411 (sw),22.0,4,121.0,76.0,2511,18.0,1972-01-01,Europe
249,bmw 320i,21.5,4,121.0,110.0,2600,12.8,1977-01-01,Europe


In [4]:
import altair as alt
alt.Chart(cars).mark_point()

In [5]:
alt.Chart(cars).mark_point().encode(x="Miles_per_Gallon", y="Horsepower")


In [6]:
wdi_data = (
    "https://raw.githubusercontent.com/nickeubank/"
    "practicaldatascience/master/Example_Data/wdi_plotting.csv"
)
world = pd.read_csv(wdi_data)
world.sample(5)

Unnamed: 0,Year,Country Name,Country Code,GDP per capita (constant 2010 US$),"Population, total",CO2 emissions (metric tons per capita),"Mortality rate attributed to household and ambient air pollution, age-standardized (per 100,000 population)","PM2.5 air pollution, population exposed to levels exceeding WHO guideline value (% of total)","Life expectancy at birth, total (years)","Mortality rate, under-5 (per 1,000 live births)","Literacy rate, youth female (% of females ages 15-24)"
4604,1992,Cote d'Ivoire,CIV,1385.000815,12812428.0,0.216196,,,52.779,152.8,
9666,2015,Madagascar,MDG,469.942522,24234080.0,0.136997,,100.0,65.539,57.9,
10415,2018,Zimbabwe,ZWE,1289.146499,14438812.0,0.849793,,,61.195,55.9,
2280,1981,Lebanon,LBN,,2594299.0,2.448154,,,68.113,48.2,
4202,1990,Guam,GUM,,130480.0,,,100.0,71.925,,99.847618


In [7]:
for c in world.columns: print(c)

Year
Country Name
Country Code
GDP per capita (constant 2010 US$)
Population, total
CO2 emissions (metric tons per capita)
Mortality rate attributed to household and ambient air pollution, age-standardized (per 100,000 population)
PM2.5 air pollution, population exposed to levels exceeding WHO guideline value (% of total)
Life expectancy at birth, total (years)
Mortality rate, under-5 (per 1,000 live births)
Literacy rate, youth female (% of females ages 15-24)


In [8]:
# How many countries?
world["Country Name"].nunique()

217

In [9]:
world = world[world.Year == 2018]

In [10]:
alt.Chart(world).mark_point().encode(
    x="GDP per capita (constant 2010 US$)",
    y="Mortality rate, under-5 (per 1,000 live births)",
)


In [11]:
world["log_gdp_per_cap"] = np.log(world["GDP per capita (constant 2010 US$)"])
world["log_under5_mortality_rate"] = np.log(
    world["Mortality rate, under-5 (per 1,000 live births)"]
)

In [12]:
alt.Chart(world).mark_point().encode(
    x="log_gdp_per_cap",
    y="log_under5_mortality_rate",
)

In [13]:
alt.Chart(world).mark_point().encode(
    x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
    y="log_under5_mortality_rate",
)


In [14]:
alt.Chart(world).mark_point().encode(
    x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
    y="log_under5_mortality_rate",
    size="Population, total"
)


In [15]:
world["log_population"] = np.log(world["Population, total"])
alt.Chart(world).mark_point().encode(
    x=alt.X("log_population", scale=alt.Scale(zero=False)),
    y="log_under5_mortality_rate",
    size=alt.Size("log_gdp_per_cap", scale=alt.Scale(zero=False)),
)

In [16]:
base = (
    alt.Chart(world)
    .mark_point()
    .encode(
        x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
        y="log_under5_mortality_rate",
        size="Population, total",
    )
)

In [17]:
base

In [18]:
fit = base.transform_regression(
        "log_gdp_per_cap", "log_under5_mortality_rate"
    ).mark_line()
fit

In [19]:
base + fit

In [20]:
loess = base.transform_loess(
    "log_gdp_per_cap", "log_under5_mortality_rate").mark_line(color="red")
base + fit + loess


In [21]:
base | base.encode(
    y=alt.Y("Life expectancy at birth, total (years)", scale=alt.Scale(zero=False))
)


In [24]:
(
    base
    + fit
    + alt.Chart(world)
    .encode(
        x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
        y="log_under5_mortality_rate",
        text="Country Code",
    )
    .mark_text(size=10)
)

In [25]:
(
    base
    + fit
    + alt.Chart(world)
    .encode(
        x=alt.X("log_gdp_per_cap", scale=alt.Scale(zero=False)),
        y="log_under5_mortality_rate",
        text="Country Code",
    )
    .mark_text(size=10)
).interactive()

In [26]:
base = (
    alt.Chart(world, title="GDP per Capita and Child Mortality")
    .mark_point()
    .encode(
        x=alt.X(
            "log_gdp_per_cap", scale=alt.Scale(zero=False), title="Log GDP per Capita"
        ),
        y=alt.Y("log_under5_mortality_rate", title="Log Under-5 Mortality Rate"),
        size=alt.Size("Population, total", title="Population"),
    )
)

base + fit


In [27]:
c = base + fit
c.properties(title="A New Title!")

In [28]:
fit = base.transform_regression(
    "log_gdp_per_cap", "log_under5_mortality_rate"
).mark_line(color="red", strokeDash=[15, 15])
base + fit