In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('data/macrodata.csv')
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [3]:
periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date')
columns = pd.Index(['realgdp', 'infl', 'unemp'], name='item')
data = data.reindex(columns=columns)
data.index = periods.to_timestamp('D', 'end')
ldata = data.stack().reset_index().rename(columns={0: 'value'})

ldata[:5]

Unnamed: 0,date,item,value
0,1959-03-31 23:59:59.999999999,realgdp,2710.349
1,1959-03-31 23:59:59.999999999,infl,0.0
2,1959-03-31 23:59:59.999999999,unemp,5.8
3,1959-06-30 23:59:59.999999999,realgdp,2778.801
4,1959-06-30 23:59:59.999999999,infl,2.34


In [4]:
pivoted = ldata.pivot('date', 'item', 'value')  # 3个参数分别为“行索引”、“列索引”以及“值”
pivoted[:5]

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31 23:59:59.999999999,0.0,2710.349,5.8
1959-06-30 23:59:59.999999999,2.34,2778.801,5.1
1959-09-30 23:59:59.999999999,2.74,2775.488,5.3
1959-12-31 23:59:59.999999999,0.27,2785.204,5.6
1960-03-31 23:59:59.999999999,2.31,2847.699,5.2


In [5]:
ldata['value2'] = np.random.randn(len(ldata))  # 给ldata添加第二个值
ldata[:5]

Unnamed: 0,date,item,value,value2
0,1959-03-31 23:59:59.999999999,realgdp,2710.349,0.87896
1,1959-03-31 23:59:59.999999999,infl,0.0,-1.136693
2,1959-03-31 23:59:59.999999999,unemp,5.8,1.449492
3,1959-06-30 23:59:59.999999999,realgdp,2778.801,-0.309249
4,1959-06-30 23:59:59.999999999,infl,2.34,1.531796


In [6]:
pivoted = ldata.pivot('date', 'item')  # 遗漏最后一个参数，则剩余的值全部为值
pivoted[:5]

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31 23:59:59.999999999,0.0,2710.349,5.8,-1.136693,0.87896,1.449492
1959-06-30 23:59:59.999999999,2.34,2778.801,5.1,1.531796,-0.309249,0.506283
1959-09-30 23:59:59.999999999,2.74,2775.488,5.3,-1.471957,1.501241,2.056933
1959-12-31 23:59:59.999999999,0.27,2785.204,5.6,-1.418251,0.368283,-0.882004
1960-03-31 23:59:59.999999999,2.31,2847.699,5.2,-0.050105,-0.776721,0.08539


In [7]:
pivoted['value'][:5]

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31 23:59:59.999999999,0.0,2710.349,5.8
1959-06-30 23:59:59.999999999,2.34,2778.801,5.1
1959-09-30 23:59:59.999999999,2.74,2775.488,5.3
1959-12-31 23:59:59.999999999,0.27,2785.204,5.6
1960-03-31 23:59:59.999999999,2.31,2847.699,5.2


In [8]:
# pivot()方法等价于使用set_index()创建分层索引，然后进行unstack()
unstacked = ldata.set_index(['date', 'item']).unstack('item')
unstacked[:5]

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31 23:59:59.999999999,0.0,2710.349,5.8,-1.136693,0.87896,1.449492
1959-06-30 23:59:59.999999999,2.34,2778.801,5.1,1.531796,-0.309249,0.506283
1959-09-30 23:59:59.999999999,2.74,2775.488,5.3,-1.471957,1.501241,2.056933
1959-12-31 23:59:59.999999999,0.27,2785.204,5.6,-1.418251,0.368283,-0.882004
1960-03-31 23:59:59.999999999,2.31,2847.699,5.2,-0.050105,-0.776721,0.08539
