In [1]:
import pandas as pd
import numpy as np

In [2]:
fruit = pd.read_csv('data/fruit.csv')
fruit

Unnamed: 0,State,Apple,Orange,Banana
0,Texas,12,10,40
1,Arizona,9,7,12
2,Florida,0,14,190


## Stack <-> (Unstack)

In [3]:
fruit = fruit.set_index('State')
fruit

Unnamed: 0_level_0,Apple,Orange,Banana
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Texas,12,10,40
Arizona,9,7,12
Florida,0,14,190


In [5]:
fruit_tidy = fruit.stack().reset_index(drop=False)
fruit_tidy

Unnamed: 0,State,level_1,0
0,Texas,Apple,12
1,Texas,Orange,10
2,Texas,Banana,40
3,Arizona,Apple,9
4,Arizona,Orange,7
5,Arizona,Banana,12
6,Florida,Apple,0
7,Florida,Orange,14
8,Florida,Banana,190


In [6]:
fruit_tidy.columns = ['State', 'fruit', 'weight']
fruit_tidy

Unnamed: 0,State,fruit,weight
0,Texas,Apple,12
1,Texas,Orange,10
2,Texas,Banana,40
3,Arizona,Apple,9
4,Arizona,Orange,7
5,Arizona,Banana,12
6,Florida,Apple,0
7,Florida,Orange,14
8,Florida,Banana,190


## Melt <-> (Pivot)

In [7]:
fruit = pd.read_csv('data/fruit.csv')
fruit = fruit.set_index('State')
fruit = fruit.reset_index()
fruit

Unnamed: 0,State,Apple,Orange,Banana
0,Texas,12,10,40
1,Arizona,9,7,12
2,Florida,0,14,190


In [8]:
fruit.melt(id_vars='State',
           value_vars=['Apple', 'Orange', 'Banana'],
           var_name='fruit',
           value_name='weight')

Unnamed: 0,State,fruit,weight
0,Texas,Apple,12
1,Arizona,Apple,9
2,Florida,Apple,0
3,Texas,Orange,10
4,Arizona,Orange,7
5,Florida,Orange,14
6,Texas,Banana,40
7,Arizona,Banana,12
8,Florida,Banana,190


## Unstack / Pivot

In [13]:
fruit.set_index('State').stack()

State          
Texas    Apple      12
         Orange     10
         Banana     40
Arizona  Apple       9
         Orange      7
         Banana     12
Florida  Apple       0
         Orange     14
         Banana    190
dtype: int64

In [11]:
# unstack changes the every datapoint on the last index column to the new column name! (by default)
fruit.set_index('State').stack().unstack()

Unnamed: 0_level_0,Apple,Orange,Banana
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Texas,12,10,40
Arizona,9,7,12
Florida,0,14,190


In [12]:
# But if you wanna unstack with the datapoints on the first index column, then try this!
fruit.set_index('State').stack().unstack(0)

State,Texas,Arizona,Florida
Apple,12,9,0
Orange,10,7,14
Banana,40,12,190


In [14]:
fruit_tidy.pivot(index='State',
                 columns='fruit',
                 values='weight')

fruit,Apple,Banana,Orange
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arizona,9,12,7
Florida,0,190,14
Texas,12,40,10


## Example

In [15]:
flights = pd.read_csv('data/flights.csv')
flights.head()

Unnamed: 0,MONTH,DAY,WEEKDAY,AIRLINE,ORG_AIR,DEST_AIR,SCHED_DEP,DEP_DELAY,AIR_TIME,DIST,SCHED_ARR,ARR_DELAY,DIVERTED,CANCELLED
0,1,1,4,WN,LAX,SLC,1625,58.0,94.0,590,1905,65.0,0,0
1,1,1,4,UA,DEN,IAD,823,7.0,154.0,1452,1333,-13.0,0,0
2,1,1,4,MQ,DFW,VPS,1305,36.0,85.0,641,1453,35.0,0,0
3,1,1,4,AA,DFW,DCA,1555,7.0,126.0,1192,1935,-7.0,0,0
4,1,1,4,WN,LAX,MCI,1720,48.0,166.0,1363,2225,39.0,0,0


In [16]:
agg = flights.groupby(['AIRLINE', 'WEEKDAY'])['CANCELLED'].mean()
agg

AIRLINE  WEEKDAY
AA       1          0.032106
         2          0.007341
         3          0.011949
         4          0.015004
         5          0.014151
                      ...   
WN       3          0.014118
         4          0.007911
         5          0.005828
         6          0.010132
         7          0.006066
Name: CANCELLED, Length: 98, dtype: float64

In [17]:
agg.unstack()

WEEKDAY,1,2,3,4,5,6,7
AIRLINE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AA,0.032106,0.007341,0.011949,0.015004,0.014151,0.018667,0.021837
AS,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B6,0.0,0.012658,0.0,0.0,0.0,0.0,0.0
DL,0.006068,0.005208,0.005131,0.00194,0.001982,0.003195,0.001294
EV,0.03413,0.023918,0.02291,0.026895,0.013111,0.022504,0.030233
F9,0.016129,0.005376,0.0,0.0,0.005155,0.01105,0.015625
HA,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MQ,0.086785,0.032819,0.025145,0.039146,0.028,0.038356,0.055777
NK,0.035354,0.013158,0.013953,0.013216,0.012821,0.0199,0.00939
OO,0.030581,0.011156,0.014478,0.013627,0.026399,0.024125,0.031385


In [18]:
agg.unstack(0)

AIRLINE,AA,AS,B6,DL,EV,F9,HA,MQ,NK,OO,UA,US,VX,WN
WEEKDAY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,0.032106,0.0,0.0,0.006068,0.03413,0.016129,0.0,0.086785,0.035354,0.030581,0.018914,0.026906,0.007194,0.012708
2,0.007341,0.0,0.012658,0.005208,0.023918,0.005376,0.0,0.032819,0.013158,0.011156,0.017498,0.018018,0.0,0.019562
3,0.011949,0.0,0.0,0.005131,0.02291,0.0,0.0,0.025145,0.013953,0.014478,0.007799,0.004545,0.0,0.014118
4,0.015004,0.0,0.0,0.00194,0.026895,0.0,0.0,0.039146,0.013216,0.013627,0.007719,0.003953,0.014184,0.007911
5,0.014151,0.0,0.0,0.001982,0.013111,0.005155,0.0,0.028,0.012821,0.026399,0.010195,0.00905,0.006667,0.005828
6,0.018667,0.0,0.0,0.003195,0.022504,0.01105,0.0,0.038356,0.0199,0.024125,0.006682,0.004695,0.0,0.010132
7,0.021837,0.0,0.0,0.001294,0.030233,0.015625,0.0,0.055777,0.00939,0.031385,0.013487,0.022814,0.014815,0.006066


## Pivot Table

In [19]:
flights.head()

Unnamed: 0,MONTH,DAY,WEEKDAY,AIRLINE,ORG_AIR,DEST_AIR,SCHED_DEP,DEP_DELAY,AIR_TIME,DIST,SCHED_ARR,ARR_DELAY,DIVERTED,CANCELLED
0,1,1,4,WN,LAX,SLC,1625,58.0,94.0,590,1905,65.0,0,0
1,1,1,4,UA,DEN,IAD,823,7.0,154.0,1452,1333,-13.0,0,0
2,1,1,4,MQ,DFW,VPS,1305,36.0,85.0,641,1453,35.0,0,0
3,1,1,4,AA,DFW,DCA,1555,7.0,126.0,1192,1935,-7.0,0,0
4,1,1,4,WN,LAX,MCI,1720,48.0,166.0,1363,2225,39.0,0,0


In [20]:
pd.pivot_table(data=flights,
               index='AIRLINE',
               columns='WEEKDAY',
               values='CANCELLED',
               aggfunc='mean',
               fill_value=0)

WEEKDAY,1,2,3,4,5,6,7
AIRLINE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AA,0.032106,0.007341,0.011949,0.015004,0.014151,0.018667,0.021837
AS,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B6,0.0,0.012658,0.0,0.0,0.0,0.0,0.0
DL,0.006068,0.005208,0.005131,0.00194,0.001982,0.003195,0.001294
EV,0.03413,0.023918,0.02291,0.026895,0.013111,0.022504,0.030233
F9,0.016129,0.005376,0.0,0.0,0.005155,0.01105,0.015625
HA,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MQ,0.086785,0.032819,0.025145,0.039146,0.028,0.038356,0.055777
NK,0.035354,0.013158,0.013953,0.013216,0.012821,0.0199,0.00939
OO,0.030581,0.011156,0.014478,0.013627,0.026399,0.024125,0.031385
