In [3]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import altair as alt
import pandas as pd
import numpy as np

## Specifying Data
### pandas DataFrame

In [4]:
rand = np.random.RandomState(0)

data = pd.DataFrame(
    {'value': rand.randn(100).cumsum()},
    index=pd.date_range('2018', freq='D', periods=100)
)
data.head()

Unnamed: 0,value
2018-01-01,1.764052
2018-01-02,2.16421
2018-01-03,3.142948
2018-01-04,5.383841
2018-01-05,7.251399


In [9]:
# 在生成Altair图表时使用reset_index()是因为 ：
# 1. 数据框架的索引: 在你的代码中，data的索引是日期时间格式。这个索引在直接使用Pandas DataFrame作为Altair的数据源时不会自动转换为普通列
# 2. Altair数据理解: Altair需要所有用于绘图的变量（包括 x 轴和 y 轴）都作为列存在于DataFrame中。当你执行 data.reset_index() 时，
#    原来的索引转成了一个新的列，通常列名为 'index'，所以这个日期索引现在可以被Altair直接使用。
chart = alt.Chart(data.reset_index()).mark_line().encode(
    x='index:T',
    y='value:Q'
)
chart

##### Long-form vs. Wide-form Data
There are two common conventions for storing data in a dataframe, sometimes called long-form and wide-form. Both are sensible patterns for storing data in a tabular format; briefly, the difference is this:
+ wide-form data has one row per independent variable, with metadata recorded in the row and column labels.
+ long-form data has one row per observation, with metadata recorded within the table as values.

Altair’s grammar works best with long-form data, in which each row corresponds to a single observation along with its metadata.

In [10]:
# wide form
wide_form = pd.DataFrame({
    'Date': ['2007-10-01', '2007-11-01', '2007-12-01'],
    'AAPL': [189.95, 182.22, 198.08],
    'AMZN': [89.15, 90.56, 92.64],
    'GOOG': [707.00, 693.00, 691.48]
})
wide_form

Unnamed: 0,Date,AAPL,AMZN,GOOG
0,2007-10-01,189.95,89.15,707.0
1,2007-11-01,182.22,90.56,693.0
2,2007-12-01,198.08,92.64,691.48


In [11]:
# long form
long_form = pd.DataFrame({
    'Date': ['2007-10-01', '2007-11-01', '2007-12-01', '2007-10-01', '2007-11-01', '2007-12-01', '2007-10-01', '2007-11-01', '2007-12-01'],
    'company': ['AAPL', 'AAPL', 'AAPL', 'AMZN', 'AMZN', 'AMZN', 'GOOG', 'GOOG', 'GOOG'],
    'price': [189.95, 182.22, 198.08, 89.15,  90.56,  92.64, 707.00, 693.00, 691.48]
})
long_form

Unnamed: 0,Date,company,price
0,2007-10-01,AAPL,189.95
1,2007-11-01,AAPL,182.22
2,2007-12-01,AAPL,198.08
3,2007-10-01,AMZN,89.15
4,2007-11-01,AMZN,90.56
5,2007-12-01,AMZN,92.64
6,2007-10-01,GOOG,707.0
7,2007-11-01,GOOG,693.0
8,2007-12-01,GOOG,691.48


In [12]:
chart = alt.Chart(long_form).mark_line().encode(
    x='Date:T',
    y='price:Q',
    color='company:N'
)
chart

In [13]:
# for converting wide-form data to the long-form data used by Altair, the melt method of dataframes can be used
wide_form.melt('Date', var_name='company', value_name='price')

Unnamed: 0,Date,company,price
0,2007-10-01,AAPL,189.95
1,2007-11-01,AAPL,182.22
2,2007-12-01,AAPL,198.08
3,2007-10-01,AMZN,89.15
4,2007-11-01,AMZN,90.56
5,2007-12-01,AMZN,92.64
6,2007-10-01,GOOG,707.0
7,2007-11-01,GOOG,693.0
8,2007-12-01,GOOG,691.48


In [14]:
# In case you would like to undo this operation and convert from long-form back to wide-form
long_form.pivot(index='Date', columns='company', values='price').reset_index()

company,Date,AAPL,AMZN,GOOG
0,2007-10-01,189.95,89.15,707.0
1,2007-11-01,182.22,90.56,693.0
2,2007-12-01,198.08,92.64,691.48


In [15]:
# If you would like to avoid data preprocessing, you can reshape your data using Altair’s Fold Transform
chart = alt.Chart(wide_form).transform_fold(
    ['AAPL', 'AMZN', 'GOOG'],
    as_=['company', 'price']
).mark_line().encode(
    x='Date:T',
    y='price:Q',
    color='company:N'
)
chart

### Generated Data

In [16]:
# Here is an example of using the sequence() function to generate a sequence of x data, along with a Calculate to compute y data.

# Note that the following generator is functionally similar to
# data = pd.DataFrame({'x': np.arange(0, 10, 0.1)})
data = alt.sequence(0, 10, 0.1, as_='x')

chart = alt.Chart(data).transform_calculate(
    y='sin(datum.x)'
).mark_line().encode(
    x='x:Q',
    y='y:Q',
)
chart

In [17]:
# Another type of data that is convenient to generate in the chart itself is the latitude/longitude lines on a geographic visualization, known as a graticule
data = alt.graticule(step=[15, 15])

chart = alt.Chart(data).mark_geoshape(stroke='black').project(
    'orthographic',
    rotate=[0, -45, 0]
)
chart

## Encodings