> The greatest value of a picture is when it forces us to notice what we never expected to see.
     — John Tukey

# Layers

在 `ggplot` 中使用的参数（`mappings`）都是全局的参数，而使用在 `layers` 中的参数都是本地（`locally`）参数。这意味着，如果你在 `ggplot` 中定义了一个 `mapping`，那么它将会被应用到所有的 `layers` 中。如果你在 `layers` 中定义了一个 `mapping`，那么它将会被应用到这个 `layer` 中。`layer` 指的是 `geom` 或者 `stat`，也就是各种 `geom_` 开头的函数，例如 `geom_point`、`geom_line` 等等。

In [2]:
import pandas as pd
from lets_plot import *
from lets_plot.geo_data import *
from lets_plot.mapping import as_discrete

LetsPlot.setup_html()

The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).


In [7]:
mpg = pd.read_csv(
    "https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/mpg.csv", index_col=0
)

mpg = mpg.astype(
    {
        "manufacturer": "category",
        "model": "category",
        "displ": "double",
        "year": "int64",
        "cyl": "int64",
        "trans": "category",
        "drv": "category",
        "cty": "double",
        "hwy": "double",
        "fl": "category",
        "class": "category",
    }
)
mpg.head()

Unnamed: 0_level_0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,audi,a4,1.8,1999,4,auto(l5),f,18.0,29.0,p,compact
2,audi,a4,1.8,1999,4,manual(m5),f,21.0,29.0,p,compact
3,audi,a4,2.0,2008,4,manual(m6),f,20.0,31.0,p,compact
4,audi,a4,2.0,2008,4,auto(av),f,21.0,30.0,p,compact
5,audi,a4,2.8,1999,6,auto(l5),f,16.0,26.0,p,compact


In [6]:
(ggplot(mpg, aes(x="displ", y="hwy", color="class"))
 + geom_point()
 )

In [7]:
(ggplot(mpg, aes(x="displ", y="hwy", shape="class"))
 + geom_point()
 )

In [9]:
(ggplot(mpg, aes(x="displ", y="hwy", size="class"))
 + geom_point()
 )

In [10]:
(ggplot(mpg, aes(x="displ", y="hwy", alpha="class"))
 + geom_point()
 )

In [11]:
(ggplot(mpg, aes(x="displ", y="hwy", alpha="class"))
 + geom_point(color="blue")
 )

In [17]:
(ggplot(mpg)
 + geom_point(aes(x="displ", y="hwy", color="class"))
 )

# Geometric objects

In [18]:
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_point(size=4)
 )

In [19]:
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_smooth(method="loess", size=2)
 )

`lets-plot` 可以从全局或者局部获取 `mapping` 参数。但这并不意味着所有的参数均有效。例如可以设置 `point` 的 `shape`，但是无法设置 `line` 的 `shape`。

In [20]:
# 设置 line 的 shape 会导致失败，只能设置 linetype
(ggplot(mpg, aes(x="displ", y="hwy", line="drv"))
 + geom_smooth(method="loess")
 )

In [21]:
#  4 代表四轮驱动（four-wheel drive）；f 代表前轮驱动（front-wheel drive）；r 代表后轮驱动（rear-wheel drive）
(ggplot(mpg, aes(x="displ", y="hwy", linetype="drv"))
 + geom_smooth(method="loess")
 )

In [22]:
# 增加多层
(ggplot(mpg, aes(x="displ", y="hwy", color="drv"))
 + geom_point()
 + geom_smooth(aes(linetype="drv"), method="loess")
 )

In [5]:
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_point(aes(color="class"))
 + geom_smooth()
 )

In [12]:
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_point()
 + geom_point(data=mpg.loc[mpg["class"] == "2seater", :], color="red", size=2)
 + geom_point(data=mpg.loc[mpg["class"] == "2seater", :], shape=1, size=5, color="blue")
 )

# Facets
根据分类变量将图表分割成多个子图表，每个子图表显示一个数据子集

In [13]:
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_point()
 + facet_wrap("cyl")
 )

In [14]:
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_point()
 + facet_grid("drv", "cyl")
 )

In [15]:
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_point()
 + facet_grid("drv", "cyl", scales="free_y")
 )

In [16]:
(ggplot(mpg)
 + geom_point(aes(x="displ", y="hwy"))
 + facet_wrap("class", nrow=2)
 )

# Statistical transformations

In [3]:
diamonds = pd.read_csv(
    "https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/diamonds.csv",
    index_col=0,
)
diamonds_cut_order = ["Fair", "Good", "Very Good", "Premium", "Ideal"]
diamonds["cut"] = diamonds["cut"].astype(
    pd.CategoricalDtype(categories=diamonds_cut_order, ordered=True)
)
diamonds.head()

Unnamed: 0_level_0,carat,cut,color,clarity,depth,table,price,x,y,z
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [4]:
(ggplot(diamonds, aes(x="cut"))
 + geom_bar()
 )

- Bar charts, histograms, and frequency polygons bin your data and then plot bin counts, the number of points that fall in each bin.

- Smoothers fit a model to your data and then plot predictions from the model.

- Boxplots compute the five-number summary of the distribution and then display that summary as a specially formatted box.

![stat](./pic/visualization-stat-bar.png)

In [5]:
# explicitly specify the stat
(ggplot(
    diamonds.value_counts("cut").reset_index(name="counts"),
    aes(x="cut", y="counts"),
)
 + geom_bar(stat="identity")
 )

# Position adjustments

In [8]:
(ggplot(mpg, aes(x="drv", color="drv"))
 + geom_bar()
 )

In [9]:
(ggplot(mpg, aes(x="drv", fill="drv"))
 + geom_bar()
 )

In [10]:
# automatically stacked
(ggplot(mpg, aes(x="drv", fill="class"))
 + geom_bar()
 )

In [11]:
(ggplot(mpg, aes(x="drv", fill="class"))
 + geom_bar(position="fill")
 )

In [12]:
(ggplot(mpg, aes(x="drv", fill="class"))
 + geom_bar(position="dodge")
 )

In [13]:
# void overplotting
(ggplot(mpg, aes(x="displ", y="hwy"))
 + geom_point(position="jitter")
 )

# Co-ordinate Systems

In [14]:
uk = geocode("state").scope("United Kingdom").get_boundaries(6)
uk

Unnamed: 0,state,found name,geometry
0,Wales,Wales,"MULTIPOLYGON (((-5.35304 51.86591, -5.33064 51..."
1,Scotland,Scotland,"MULTIPOLYGON (((-8.61762 57.82752, -8.56173 57..."
2,England,England,"MULTIPOLYGON (((-6.33758 49.94591, -6.34859 49..."
3,Northern Ireland,Northern Ireland,"MULTIPOLYGON (((-8.00601 54.54594, -7.85066 54..."


In [15]:
(ggplot()
 + geom_map(map=uk, fill="gray", color="white")
 + coord_map(xlim=(-10, 6), ylim=(50, 59))
 )

In [16]:
names = ["London", "Edinburgh", "Belfast", "Cardiff"]
states = ["England", "Scotland", "Northern Ireland", "Wales"]
cities = geocode(names=names, states=states).ignore_not_found().get_centroids()
cities

Unnamed: 0,city,found name,state,geometry
0,London,London,England,POINT (-0.14406 51.48933)
1,Edinburgh,Edinburgh,Scotland,POINT (-3.18837 55.95335)
2,Belfast,Belfast,Northern Ireland,POINT (-5.93028 54.59644)
3,Cardiff,Cardiff,Wales,POINT (-3.17919 51.48165)


In [17]:
(ggplot()
 + geom_map(aes(fill="found name"), map=uk, color="white")
 + coord_map(xlim=(-10, 6), ylim=(50, 59))
 + geom_point(data=cities, size=5)
 + theme(legend_position="none")
 )