In [3]:
import pandas as pd
import numpy as np

In [4]:
# Read in sales.csv as a data frame

df = pd.read_csv('sales.csv')
df

IOError: File sales.csv does not exist

In [None]:
# The simplest pivot table must have a dataframe and an 
# index . Let’s use the Name as our index.

pd.pivot_table(df,index=["Name"])

In [None]:
# How about indexing on multiple values. Let's look at the 
# data by Manager and Rep next. 

pd.pivot_table(df,index=["Manager","Rep"])

In [None]:
# What we really want to take a look at next is Price. So, 
# the Account and Quantity columns aren’t useful. Let’s 
# remove them and explicitly define the 'Price' column by 
# using the values field.

pd.pivot_table(df,index=["Manager","Rep"],values=["Price"])

In [None]:
# The 'Price' column automatically averages the data.  
# We can do a count or a sum by using aggfunc and np.sum.

pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],aggfunc=np.sum)

In [None]:
# Columns are optional. They are an additional way to segment the 
# values you care about. But remember, the aggregation functions 
# are applied to the values you list.


pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],
               columns=["Product"],aggfunc=[np.sum])

In [None]:
# Quantity might be useful, let's add it to the values list. 

pd.pivot_table(df,index=["Manager","Rep"],values=["Price","Quantity"],
               columns=["Product"],aggfunc=[np.sum])

In [None]:
# To get a different take on things, let's remove Product 
# from the columns and add it to the index. 

pd.pivot_table(df,index=["Manager","Rep","Product"],
               values=["Price","Quantity"],aggfunc=[np.sum])

In [None]:
# The margins argument let's us see totals if we set it to 
# 'margins=True'. 

pd.pivot_table(df,index=["Manager","Rep","Product"],
               values=["Price","Quantity"],
               aggfunc=[np.sum,np.mean],margins=True)

### Independent practice

In [None]:
# raw data

raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], 
        'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], 
        'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], 
        'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],
        'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}

In [None]:
# Convert the raw data into a data frame

df = pd.DataFrame(raw_data, columns = ['regiment', 'company', 'name', 'preTestScore', 'postTestScore'])
df

In [None]:
# Convert the df into a pivot table, index on 'regiment' and 
# 'company'

pd.pivot_table(df,index=["regiment","company"])

In [None]:
# Bonus: what are the totals of postTestScore and preTestScore?

df.pivot_table(index=['regiment','company'], 
               aggfunc=[np.sum],margins=True)