# Use `.reset_index` to turn `.groupby("x").y.agg` into a DataFrame
- Group by returns a series
- I often need that series's index to be duplicated for further analysis
- .reset_index to the rescue

In [1]:
import pandas as pd
from pydataset import data

In [2]:
df = data('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,16.99,1.01,Female,No,Sun,Dinner,2
2,10.34,1.66,Male,No,Sun,Dinner,3
3,21.01,3.5,Male,No,Sun,Dinner,3
4,23.68,3.31,Male,No,Sun,Dinner,2
5,24.59,3.61,Female,No,Sun,Dinner,4


## A Series is not a big deal on a one column groupby 

In [3]:
df.groupby("sex").tip.mean()

sex
Female    2.833448
Male      3.089618
Name: tip, dtype: float64

In [4]:
# Bingo
df.groupby("sex").tip.mean().reset_index()

Unnamed: 0,sex,tip
0,Female,2.833448
1,Male,3.089618


## A series with a multi-index is more challenging and we need the index "duplicated"

In [5]:
df.groupby(["time", "smoker"]).total_bill.sum()

time    smoker
Dinner  No        2130.14
        Yes       1530.16
Lunch   No         767.29
        Yes        400.18
Name: total_bill, dtype: float64

## Turning the Series into a DF produces a muti-level index, but not what I need

In [6]:
pd.DataFrame(df.groupby(["time", "smoker"]).total_bill.sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill
time,smoker,Unnamed: 2_level_1
Dinner,No,2130.14
Dinner,Yes,1530.16
Lunch,No,767.29
Lunch,Yes,400.18


In [7]:
# Bingo
df.groupby(["time", "smoker"]).total_bill.sum().reset_index()

Unnamed: 0,time,smoker,total_bill
0,Dinner,No,2130.14
1,Dinner,Yes,1530.16
2,Lunch,No,767.29
3,Lunch,Yes,400.18


In [8]:
df.groupby(["time", "smoker", "sex"]).tip.mean()

time    smoker  sex   
Dinner  No      Female    3.044138
                Male      3.158052
        Yes     Female    2.949130
                Male      3.123191
Lunch   No      Female    2.459600
                Male      2.941500
        Yes     Female    2.891000
                Male      2.790769
Name: tip, dtype: float64

## Fill rows with duplicated multi-indexes to show the new level of observation as a df

In [9]:
# BINGO!
df.groupby(["time", "smoker", "sex"]).tip.mean().reset_index()

Unnamed: 0,time,smoker,sex,tip
0,Dinner,No,Female,3.044138
1,Dinner,No,Male,3.158052
2,Dinner,Yes,Female,2.94913
3,Dinner,Yes,Male,3.123191
4,Lunch,No,Female,2.4596
5,Lunch,No,Male,2.9415
6,Lunch,Yes,Female,2.891
7,Lunch,Yes,Male,2.790769


## This Gets Even More Useful With Multiple Aggregate Functions!

In [10]:
df.groupby(["time", "smoker", "sex"]).tip.agg(["mean", "median"]).reset_index()

Unnamed: 0,time,smoker,sex,mean,median
0,Dinner,No,Female,3.044138,3.0
1,Dinner,No,Male,3.158052,3.0
2,Dinner,Yes,Female,2.94913,3.0
3,Dinner,Yes,Male,3.123191,3.0
4,Lunch,No,Female,2.4596,2.0
5,Lunch,No,Male,2.9415,2.405
6,Lunch,Yes,Female,2.891,2.5
7,Lunch,Yes,Male,2.790769,2.2


In [14]:
df.groupby(["time", "smoker", "sex"]).tip.agg(["min", "median", "max", "std"]).reset_index()

Unnamed: 0,time,smoker,sex,min,median,max,std
0,Dinner,No,Female,1.0,3.0,5.2,1.117885
1,Dinner,No,Male,1.25,3.0,9.0,1.497034
2,Dinner,Yes,Female,1.0,3.0,6.5,1.306243
3,Dinner,Yes,Male,1.0,3.0,10.0,1.596451
4,Lunch,No,Female,1.25,2.0,5.17,1.078369
5,Lunch,No,Male,1.44,2.405,6.7,1.485623
6,Lunch,Yes,Female,2.0,2.5,5.0,1.057286
7,Lunch,Yes,Male,1.58,2.2,5.0,1.095738


In [13]:
df.groupby(["time", "smoker", "sex"]).tip.describe().reset_index()

Unnamed: 0,time,smoker,sex,count,mean,std,min,25%,50%,75%,max
0,Dinner,No,Female,29.0,3.044138,1.117885,1.0,2.45,3.0,3.61,5.2
1,Dinner,No,Male,77.0,3.158052,1.497034,1.25,2.0,3.0,3.71,9.0
2,Dinner,Yes,Female,23.0,2.94913,1.306243,1.0,2.115,3.0,3.5,6.5
3,Dinner,Yes,Male,47.0,3.123191,1.596451,1.0,2.0,3.0,3.72,10.0
4,Lunch,No,Female,25.0,2.4596,1.078369,1.25,1.68,2.0,2.92,5.17
5,Lunch,No,Male,20.0,2.9415,1.485623,1.44,2.0,2.405,3.55,6.7
6,Lunch,Yes,Female,10.0,2.891,1.057286,2.0,2.0025,2.5,3.4175,5.0
7,Lunch,Yes,Male,13.0,2.790769,1.095738,1.58,2.0,2.2,4.0,5.0
