In [75]:
import pandas as pd
import numpy as np

import requests
import io
from tempfile import TemporaryFile

# Pivoting "long" to "wide" format

First we download the data.

In [76]:
r = requests.get('https://raw.githubusercontent.com/wesm/pydata-book/2nd-edition/examples/macrodata.csv')

In [77]:
raw_data = io.StringIO(r.text)

In [78]:
data = pd.read_csv(raw_data)

In [79]:
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [80]:
periods = pd.PeriodIndex(
    year=data.year,
    quarter=data.quarter,
    name='date'
)

In [81]:
columns = pd.Index(['realgdp', 'infl', 'unemp'], name='item')

In [82]:
data = data.reindex(columns=columns)

In [83]:
data.index = periods.to_timestamp('D', 'end')

In [84]:
ldata = data.stack().reset_index().rename(columns={0:'value'})

In [87]:
ldata[:6]

Unnamed: 0,date,item,value
0,1959-03-31,realgdp,2710.349
1,1959-03-31,infl,0.0
2,1959-03-31,unemp,5.8
3,1959-06-30,realgdp,2778.801
4,1959-06-30,infl,2.34
5,1959-06-30,unemp,5.1


## Pivoting

In [89]:
# DataFrame.pivot(index=None, columns=None, values=None)

pivoted = ldata.pivot('date', 'item', 'value')
pivoted.head()

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31,0.0,2710.349,5.8
1959-06-30,2.34,2778.801,5.1
1959-09-30,2.74,2775.488,5.3
1959-12-31,0.27,2785.204,5.6
1960-03-31,2.31,2847.699,5.2


Oh wow. That is cool. That is really, really cool.

But maybe you have two values?

In [90]:
ldata['value2'] = np.random.randn(len(ldata))
ldata[:10]

Unnamed: 0,date,item,value,value2
0,1959-03-31,realgdp,2710.349,0.64783
1,1959-03-31,infl,0.0,-0.114846
2,1959-03-31,unemp,5.8,-0.162241
3,1959-06-30,realgdp,2778.801,-0.475013
4,1959-06-30,infl,2.34,0.111732
5,1959-06-30,unemp,5.1,0.208461
6,1959-09-30,realgdp,2775.488,0.53809
7,1959-09-30,infl,2.74,-1.537131
8,1959-09-30,unemp,5.3,-1.568731
9,1959-12-31,realgdp,2785.204,0.345913


In [92]:
pivoted = ldata.pivot('date', 'item')
pivoted[:5]

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31,0.0,2710.349,5.8,-0.114846,0.64783,-0.162241
1959-06-30,2.34,2778.801,5.1,0.111732,-0.475013,0.208461
1959-09-30,2.74,2775.488,5.3,-1.537131,0.53809,-1.568731
1959-12-31,0.27,2785.204,5.6,0.254781,0.345913,1.147604
1960-03-31,2.31,2847.699,5.2,1.174483,-0.351373,0.033583


In [93]:
pivoted['value'][:5]

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31,0.0,2710.349,5.8
1959-06-30,2.34,2778.801,5.1
1959-09-30,2.74,2775.488,5.3
1959-12-31,0.27,2785.204,5.6
1960-03-31,2.31,2847.699,5.2


Yeah that's really cool. I really should learn all of this better.

The above is equivalent to setting an index and unstacking.

In [94]:
unstacked = ldata.set_index(['date', 'item']).unstack('item')

In [95]:
unstacked[:7]

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31,0.0,2710.349,5.8,-0.114846,0.64783,-0.162241
1959-06-30,2.34,2778.801,5.1,0.111732,-0.475013,0.208461
1959-09-30,2.74,2775.488,5.3,-1.537131,0.53809,-1.568731
1959-12-31,0.27,2785.204,5.6,0.254781,0.345913,1.147604
1960-03-31,2.31,2847.699,5.2,1.174483,-0.351373,0.033583
1960-06-30,0.14,2834.39,5.2,-1.084217,-0.990765,-0.16801
1960-09-30,2.7,2839.022,5.6,-0.069453,-0.632133,-0.288371


## Melting

In [96]:
df = pd.DataFrame({
    'key': ['foo', 'bar', 'baz'],
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
})

In [97]:
df

Unnamed: 0,A,B,C,key
0,1,4,7,foo
1,2,5,8,bar
2,3,6,9,baz


In [100]:
melted = pd.melt(df, ['key'], var_name='spam')
melted

Unnamed: 0,key,spam,value
0,foo,A,1
1,bar,A,2
2,baz,A,3
3,foo,B,4
4,bar,B,5
5,baz,B,6
6,foo,C,7
7,bar,C,8
8,baz,C,9


Lol, why did they call that melting?

In [101]:
melted.pivot('key', 'spam', 'value')

spam,A,B,C
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,2,5,8
baz,3,6,9
foo,1,4,7
