In [17]:
# Load necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Plotting pretty figures and avoid blurry images
%config InlineBackend.figure_format = 'retina'
# Larger scale for plots in notebooks
sns.set_context('talk')

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Enable multiple cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

* pivot_table is an alternate to groupby
* groupby returns series
* pivot_table return dataframe

In [18]:
tips = sns.load_dataset('tips')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [28]:
tips.isna().sum().sum()

0

In [19]:
# Using groupby
result = tips.groupby('sex')['total_bill'].sum()
result

sex
Male      3256.82
Female    1570.95
Name: total_bill, dtype: float64

In [20]:
type(result)

pandas.core.series.Series

In [21]:
result_pivot = tips.pivot_table(values='total_bill', index='sex', aggfunc=np.sum)
result_pivot

Unnamed: 0_level_0,total_bill
sex,Unnamed: 1_level_1
Male,3256.82
Female,1570.95


In [22]:
type(result_pivot)

pandas.core.frame.DataFrame

# using groupby

In [23]:
tips.groupby(['sex', 'day'])['total_bill']\
            .agg([np.mean, np.median, np.sum]).reset_index()

Unnamed: 0,sex,day,mean,median,sum
0,Male,Thur,18.714667,16.975,561.44
1,Male,Fri,19.857,17.215,198.57
2,Male,Sat,20.802542,18.24,1227.35
3,Male,Sun,21.887241,20.725,1269.46
4,Female,Thur,16.715312,13.785,534.89
5,Female,Fri,14.145556,15.38,127.31
6,Female,Sat,19.680357,18.36,551.05
7,Female,Sun,19.872222,17.41,357.7


# using pivot_table

In [24]:
tips.pivot_table(values='total_bill', 
                 index=['sex', 'day'], 
                 aggfunc=[np.mean, np.median, np.sum])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,total_bill,total_bill,total_bill
sex,day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Male,Thur,18.714667,16.975,561.44
Male,Fri,19.857,17.215,198.57
Male,Sat,20.802542,18.24,1227.35
Male,Sun,21.887241,20.725,1269.46
Female,Thur,16.715312,13.785,534.89
Female,Fri,14.145556,15.38,127.31
Female,Sat,19.680357,18.36,551.05
Female,Sun,19.872222,17.41,357.7


* index
* values
* aggfunc

In [25]:
pivoted = tips.pivot_table(values='total_bill', 
                           index=['sex', 'day'], 
                           aggfunc=np.median, 
                           fill_value=0)
pivoted

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill
sex,day,Unnamed: 2_level_1
Male,Thur,16.975
Male,Fri,17.215
Male,Sat,18.24
Male,Sun,20.725
Female,Thur,13.785
Female,Fri,15.38
Female,Sat,18.36
Female,Sun,17.41


In [31]:
pivoted = tips.pivot_table(values='total_bill', 
                           index=['smoker', 'day'], 
                           aggfunc=np.median, 
                           fill_value=22220000)
pivoted

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill
smoker,day,Unnamed: 2_level_1
Yes,Thur,16.47
Yes,Fri,13.42
Yes,Sat,20.39
Yes,Sun,23.1
No,Thur,15.95
No,Fri,19.235
No,Sat,17.82
No,Sun,18.43


* the pivot_table's fill_value parameter is not available in groupby 