In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in rock.csv as a data frame

df = pd.read_csv('rock.csv')
df

Unnamed: 0,Song Clean,ARTIST CLEAN,Release Year,COMBINED,First?,Year?,PlayCount,F*G
0,Caught Up in You,.38 Special,1982,Caught Up in You by .38 Special,1,1,82,82
1,Fantasy Girl,.38 Special,,Fantasy Girl by .38 Special,1,0,3,0
2,Hold On Loosely,.38 Special,1981,Hold On Loosely by .38 Special,1,1,85,85
3,Rockin' Into the Night,.38 Special,1980,Rockin' Into the Night by .38 Special,1,1,18,18
4,Art For Arts Sake,10cc,1975,Art For Arts Sake by 10cc,1,1,1,1
5,Kryptonite,3 Doors Down,2000,Kryptonite by 3 Doors Down,1,1,13,13
6,Loser,3 Doors Down,2000,Loser by 3 Doors Down,1,1,1,1
7,When I'm Gone,3 Doors Down,2002,When I'm Gone by 3 Doors Down,1,1,6,6
8,What's Up?,4 Non Blondes,1992,What's Up? by 4 Non Blondes,1,1,3,3
9,Take On Me,a-ha,1985,Take On Me by a-ha,1,1,1,1


In [3]:
# The simplest pivot table must have a dataframe and an 
# index . Let’s use the ARTIST CLEAN as our index.

pd.pivot_table(df,index=["ARTIST CLEAN"])

Unnamed: 0_level_0,F*G,First?,PlayCount,Year?
ARTIST CLEAN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
.38 Special,46.250000,1,47.000000,0.750000
10cc,1.000000,1,1.000000,1.000000
3 Doors Down,6.666667,1,6.666667,1.000000
4 Non Blondes,3.000000,1,3.000000,1.000000
AC/DC,26.275862,1,29.862069,0.689655
Ace,1.000000,1,1.000000,1.000000
Adelitas Way,4.000000,1,4.000000,1.000000
Aerosmith,23.645161,1,26.225806,0.806452
Alanis Morissette,3.500000,1,3.500000,1.000000
Alannah Myles,1.000000,1,1.000000,1.000000


In [4]:
# How about indexing on multiple values. Let's look at the 
# data by ARTIST CLEAN and Release Year next. 

pd.pivot_table(df,index=["ARTIST CLEAN","Release Year"])

Unnamed: 0_level_0,Unnamed: 1_level_0,F*G,First?,PlayCount,Year?
ARTIST CLEAN,Release Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
.38 Special,1980,18.000000,1,18.000000,1
.38 Special,1981,85.000000,1,85.000000,1
.38 Special,1982,82.000000,1,82.000000,1
10cc,1975,1.000000,1,1.000000,1
3 Doors Down,2000,7.000000,1,7.000000,1
3 Doors Down,2002,6.000000,1,6.000000,1
4 Non Blondes,1992,3.000000,1,3.000000,1
AC/DC,1975,52.500000,1,52.500000,1
AC/DC,1976,85.000000,1,85.000000,1
AC/DC,1977,3.000000,1,3.000000,1


In [5]:
# What we really want to take a look at next is PlayCount. So, 
# the F*G, First?, and Year? columns aren’t useful. Let’s 
# remove them and explicitly define the 'PlayCount' column by 
# using the values field.

pd.pivot_table(df,index=["ARTIST CLEAN","Release Year"],values=["PlayCount"])

Unnamed: 0_level_0,Unnamed: 1_level_0,PlayCount
ARTIST CLEAN,Release Year,Unnamed: 2_level_1
.38 Special,1980,18.000000
.38 Special,1981,85.000000
.38 Special,1982,82.000000
10cc,1975,1.000000
3 Doors Down,2000,7.000000
3 Doors Down,2002,6.000000
4 Non Blondes,1992,3.000000
AC/DC,1975,52.500000
AC/DC,1976,85.000000
AC/DC,1977,3.000000


In [6]:
# The 'PlayCount' column automatically averages the data.  
# We can do a count or a sum by using aggfunc and np.sum.

pd.pivot_table(df,index=["ARTIST CLEAN","Release Year"],values=["PlayCount"],aggfunc=np.sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,PlayCount
ARTIST CLEAN,Release Year,Unnamed: 2_level_1
.38 Special,1980,18
.38 Special,1981,85
.38 Special,1982,82
10cc,1975,1
3 Doors Down,2000,14
3 Doors Down,2002,6
4 Non Blondes,1992,3
AC/DC,1975,105
AC/DC,1976,85
AC/DC,1977,6
