# This info is entirely from the following source: http://pbpython.com/pandas-pivot-table-explained.html

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_excel("/home/enumtheworld/Downloads/sales-funnel.xlsx")
df.head()

Unnamed: 0,Account,Name,Rep,Manager,Product,Quantity,Price,Status
0,714466,Trantow-Barrows,Craig Booker,Debra Henley,CPU,1,30000,presented
1,714466,Trantow-Barrows,Craig Booker,Debra Henley,Software,1,10000,presented
2,714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending
3,737550,"Fritsch, Russel and Anderson",Craig Booker,Debra Henley,CPU,1,35000,declined
4,146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won


## Define status column as a category and set the order we want to view

In [6]:
df["Status"] = df["Status"].astype("category")
df["Status"].cat.set_categories(["won", "pending", "presented", "declined"], inplace=True)

In [20]:
pd.pivot_table(df, index=["Manager", "Rep"], values=["Price"], aggfunc=[np.sum, 'count', 'max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,count,max
Unnamed: 0_level_1,Unnamed: 1_level_1,Price,Price,Price
Manager,Rep,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Debra Henley,Craig Booker,80000,4,35000
Debra Henley,Daniel Hilton,115000,3,65000
Debra Henley,John Smith,40000,2,35000
Fred Anderson,Cedric Moss,110000,4,65000
Fred Anderson,Wendy Yule,177000,4,100000


## same pivot with columns

In [7]:
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],
               columns=["Product"],aggfunc=[np.sum])

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,sum,sum,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,Price,Price,Price,Price
Unnamed: 0_level_2,Product,CPU,Maintenance,Monitor,Software
Manager,Rep,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
Debra Henley,Craig Booker,65000.0,5000.0,,10000.0
Debra Henley,Daniel Hilton,105000.0,,,10000.0
Debra Henley,John Smith,35000.0,5000.0,,
Fred Anderson,Cedric Moss,95000.0,5000.0,,10000.0
Fred Anderson,Wendy Yule,165000.0,7000.0,5000.0,


In [10]:
pd.pivot_table(df, index=["Manager", "Rep"], values=["Price"], 
aggfunc=['count'], columns=["Product"], fill_value=0, margins=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count,count,count,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Price,Price,Price,Price,Price
Unnamed: 0_level_2,Product,CPU,Maintenance,Monitor,Software,All
Manager,Rep,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
Debra Henley,Craig Booker,2,1,0,1,4
Debra Henley,Daniel Hilton,2,0,0,1,3
Debra Henley,John Smith,1,1,0,0,2
Fred Anderson,Cedric Moss,2,1,0,1,4
Fred Anderson,Wendy Yule,2,1,1,0,4
All,,9,4,1,3,17


## Note how status is ordered based on our earlier category definition

In [9]:
pd.pivot_table(df, index=["Manager", "Status"], values=["Price"], 
               aggfunc=[np.sum], fill_value=0, margins=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,Price
Manager,Status,Unnamed: 2_level_2
Debra Henley,declined,70000
Debra Henley,pending,50000
Debra Henley,presented,50000
Debra Henley,won,65000
Fred Anderson,declined,65000
Fred Anderson,pending,5000
Fred Anderson,presented,45000
Fred Anderson,won,172000
All,,522000


## Filter the data using standard DataFrame functions

In [46]:
_.query('Status == ["pending", "won"]')

Unnamed: 0,Account,Name,Rep,Manager,Product,Quantity,Price,Status
2,714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending
4,146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won
5,218895,Kulas Inc,Daniel Hilton,Debra Henley,CPU,2,40000,pending
7,412290,Jerde-Hilpert,John Smith,Debra Henley,Maintenance,2,5000,pending
