In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({'name':['Adam', 'Bob', 'Dave', 'Fred'],
                  'age': [15, 16, 16, 15],
                  'test1': [95, 81, 89, None],
                  'test2': [80, 82, 84, 88],
                  'teacher': ['Ashby', 'Ashby', 'Jones', 'Jones']})

In [3]:
df

Unnamed: 0,age,name,teacher,test1,test2
0,15,Adam,Ashby,95.0,80
1,16,Bob,Ashby,81.0,82
2,16,Dave,Jones,89.0,84
3,15,Fred,Jones,,88


## Pivoting

In [4]:
costs = pd.DataFrame({'item': ['bacon', 'cheese', 'bacon'],
                     'cost': [2.5, 4.5, 3.2],
                     'store': ['SuperS', 'DollarM', 'DollarM'],
                     'day': ['M', 'M', 'M']})

In [5]:
costs

Unnamed: 0,cost,day,item,store
0,2.5,M,bacon,SuperS
1,4.5,M,cheese,DollarM
2,3.2,M,bacon,DollarM


In [6]:
costs.pivot(index='item', columns='day')

ValueError: Index contains duplicate entries, cannot reshape

In [7]:
df

Unnamed: 0,age,name,teacher,test1,test2
0,15,Adam,Ashby,95.0,80
1,16,Bob,Ashby,81.0,82
2,16,Dave,Jones,89.0,84
3,15,Fred,Jones,,88


In [8]:
df.pivot(index='teacher', columns='name')

Unnamed: 0_level_0,age,age,age,age,test1,test1,test1,test1,test2,test2,test2,test2
name,Adam,Bob,Dave,Fred,Adam,Bob,Dave,Fred,Adam,Bob,Dave,Fred
teacher,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Ashby,15.0,16.0,,,95.0,81.0,,,80.0,82.0,,
Jones,,,16.0,15.0,,,89.0,,,,84.0,88.0


In [9]:
costs.duplicated(subset=['day', 'item'])

0    False
1    False
2     True
dtype: bool

In [10]:
costs

Unnamed: 0,cost,day,item,store
0,2.5,M,bacon,SuperS
1,4.5,M,cheese,DollarM
2,3.2,M,bacon,DollarM


In [11]:
costs.pivot_table(index='item', columns='day')

Unnamed: 0_level_0,cost
day,M
item,Unnamed: 1_level_2
bacon,2.85
cheese,4.5


In [12]:
costs.pivot_table(index='item', columns='day', aggfunc=[max, len])

Unnamed: 0_level_0,max,max,len,len
Unnamed: 0_level_1,cost,store,cost,store
day,M,M,M,M
item,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
bacon,3.2,SuperS,2.0,2.0
cheese,4.5,DollarM,1.0,1.0


In [13]:
# put mean 
costs.pivot_table(index='item', columns='day', margins=True)

Unnamed: 0_level_0,cost,cost
day,M,All
item,Unnamed: 1_level_2,Unnamed: 2_level_2
bacon,2.85,2.85
cheese,4.5,4.5
All,3.4,3.4


In [14]:
costs.pivot_table(index='item', columns='day', aggfunc=[max, len], margins=True)

Unnamed: 0_level_0,max,max,max,max,len,len,len,len
Unnamed: 0_level_1,cost,cost,store,store,cost,cost,store,store
day,M,All,M,All,M,All,M,All
item,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
bacon,3.2,3.2,SuperS,SuperS,2.0,2.0,2.0,2.0
cheese,4.5,4.5,DollarM,DollarM,1.0,1.0,1.0,1.0
All,4.5,4.5,SuperS,SuperS,3.0,3.0,3.0,3.0
