In [2]:
from pandas_datareader import data as web
from pandas import Series,DataFrame
import pandas as pd
import numpy as np

### pandas有一个Panel数据结构,可以将其看作一个三维版的DF,pandas的大部分开发工作都集中在表格型数据的操作上,
### 因为这些数据更常见,而且层次化索引也是的多数情况下没有必要使用真正的N维数组
### 使用一个由DF对象组成的字典或一个三位ndarray来创建panel对象

In [3]:
all_data={}
for ticker in ['AAPL','IBM','MSFT','GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker,'1/1/2000','1/1/2018')
all_data

{'AAPL':                   Open        High         Low       Close   Adj Close  \
 Date                                                                     
 1999-12-31    3.604911    3.674107    3.553571    3.671875    3.291592   
 2000-01-03    3.745536    4.017857    3.631696    3.997768    3.583733   
 2000-01-04    3.866071    3.950893    3.613839    3.660714    3.281587   
 2000-01-05    3.705357    3.948661    3.678571    3.714286    3.329610   
 2000-01-06    3.790179    3.821429    3.392857    3.392857    3.041471   
 2000-01-07    3.446429    3.607143    3.410714    3.553571    3.185540   
 2000-01-10    3.642857    3.651786    3.383929    3.491071    3.129513   
 2000-01-11    3.426339    3.549107    3.232143    3.312500    2.969436   
 2000-01-12    3.392857    3.410714    3.089286    3.113839    2.791350   
 2000-01-13    3.374439    3.526786    3.303571    3.455357    3.097498   
 2000-01-14    3.571429    3.651786    3.549107    3.587054    3.215554   
 2000-01-18    3.

In [4]:
pdata = pd.Panel(all_data)

In [5]:
pdata

<class 'pandas.core.panel.Panel'>
Dimensions: 4 (items) x 4529 (major_axis) x 6 (minor_axis)
Items axis: AAPL to MSFT
Major_axis axis: 1999-12-31 00:00:00 to 2017-12-29 00:00:00
Minor_axis axis: Open to Volume

In [6]:
pdata = pdata.swapaxes('items','minor')
pdata

<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 4529 (major_axis) x 4 (minor_axis)
Items axis: Open to Volume
Major_axis axis: 1999-12-31 00:00:00 to 2017-12-29 00:00:00
Minor_axis axis: AAPL to MSFT

In [8]:
pdata['Adj Close']

Unnamed: 0_level_0,AAPL,GOOG,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999-12-31,3.291592,,79.187996,38.771053
2000-01-03,3.583733,,85.152336,38.708794
2000-01-04,3.281587,,82.261917,37.401215
2000-01-05,3.329610,,85.152336,37.795563
2000-01-06,3.041471,,83.684181,36.529484
2000-01-07,3.185540,,83.317116,37.006855
2000-01-10,3.129513,,86.620468,37.276661
2000-01-11,2.969436,,87.354546,36.321934
2000-01-12,2.791350,,87.721558,35.138874
2000-01-13,3.097498,,86.803993,35.803043


### 基于ix的标签索引被推广到了三个维度,因此可以选取指定日期或日期范围内的所有数据

In [9]:
pdata.ix[:,'6/1/2012',:]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
AAPL,81.308571,81.807144,80.074287,80.141426,71.841476,130246900.0
GOOG,284.047546,284.474762,282.338654,283.645172,283.645172,6155600.0
IBM,190.119995,191.720001,188.600006,189.080002,161.216751,5206400.0
MSFT,28.76,28.959999,28.440001,28.450001,24.545383,56634300.0


In [10]:
pdata.ix['Adj Close','5/22/2012',:]

AAPL     71.326653
GOOG    298.458801
IBM     167.816193
MSFT     25.675592
Name: 2012-05-22 00:00:00, dtype: float64

### 另一个用于呈现面板数据(尤其是对拟合统计模型)的办法是"堆积式的"DF形式

In [11]:
stacked = pdata.ix[:,'5/22/2012':,:].to_frame()
stacked

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume
Date,minor,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2012-05-22,AAPL,81.364288,81.982857,78.940002,79.567146,71.326653,173717600.0
2012-05-22,GOOG,304.737976,304.921783,296.074310,298.458801,298.458801,6143400.0
2012-05-22,IBM,198.039993,198.259995,196.059998,196.820007,167.816193,3567100.0
2012-05-22,MSFT,29.690001,29.879999,29.500000,29.760000,25.675592,39504900.0
2012-05-23,AAPL,79.642860,81.828575,79.032860,81.508568,73.067009,146224400.0
2012-05-23,GOOG,298.881073,302.830383,296.630707,302.760834,302.760834,6397400.0
2012-05-23,IBM,195.529999,196.490005,193.199997,196.119995,167.219315,4100000.0
2012-05-23,MSFT,29.350000,29.400000,28.639999,29.110001,25.114807,65171000.0
2012-05-24,AAPL,82.267143,82.357140,80.175713,80.760002,72.395958,124057500.0
2012-05-24,GOOG,302.611816,303.982880,297.500061,299.879578,299.879578,3807000.0


### DF有一个相应的to_panel方法,它是to_frame的逆运算

In [12]:
stacked.to_panel()

<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 1412 (major_axis) x 4 (minor_axis)
Items axis: Open to Volume
Major_axis axis: 2012-05-22 00:00:00 to 2017-12-29 00:00:00
Minor_axis axis: AAPL to MSFT