In [2]:
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')[['age', 'sex', 'class', 'fare', 'survived']]
df.head()

Unnamed: 0,age,sex,class,fare,survived
0,22.0,male,Third,7.25,0
1,38.0,female,First,71.2833,1
2,26.0,female,Third,7.925,1
3,35.0,female,First,53.1,1
4,35.0,male,Third,8.05,0


### 피벗 테이블 함수: pivot_table()
- 행 인덱스
- 열 인덱스
- 데이터 값
- 데이터 집계함수

In [3]:
# 행에는 class열의 3가지 그룹, 열에는 sex열의 2가지 그룹, 값에는 age열을 평균값(mean)으로 집계한 값이 들어간 것을 확인할 수 있다.
pdf1 = pd.pivot_table(df, 
                      index='class',
                      columns='sex',
                      values='age',
                      aggfunc='mean')
pdf1

sex,female,male
class,Unnamed: 1_level_1,Unnamed: 2_level_1
First,34.611765,41.281386
Second,28.722973,30.740707
Third,21.75,26.507589


In [4]:
# 이번에는 구조를 바꾸고 집계함수를 넣어본다.
pdf2 = pd.pivot_table(df,                       # 피벗할 df
                      index='class',            # 행 위치에 들어갈 열
                      columns='sex',            # 열 위치에 들어갈 열
                      values='survived',        # 데이터로 사용할 열
                      aggfunc=['mean', 'sum'])  # 데이터 집계함수
pdf2
# 3등석에 탄 여자들의 생존여부는 평균 50%정도이다.
# 1등석에 탄 남자들의 생존자는 모두 45명이다.

Unnamed: 0_level_0,mean,mean,sum,sum
sex,female,male,female,male
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
First,0.968085,0.368852,91,45
Second,0.921053,0.157407,70,17
Third,0.5,0.135447,72,47


In [5]:
pdf3 = pd.pivot_table(df,
                      index=['class', 'sex'],
                      columns='survived',
                      values=['age', 'fare'],
                      aggfunc=['mean', 'max'])
pdf3

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean,mean,max,max,max,max
Unnamed: 0_level_1,Unnamed: 1_level_1,age,age,fare,fare,age,age,fare,fare
Unnamed: 0_level_2,survived,0,1,0,1,0,1,0,1
class,sex,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
First,female,25.666667,34.939024,110.604167,105.978159,50.0,63.0,151.55,512.3292
First,male,44.581967,36.248,62.89491,74.63732,71.0,80.0,263.0,512.3292
Second,female,36.0,28.080882,18.25,22.288989,57.0,55.0,26.0,65.0
Second,male,33.369048,16.022,19.488965,21.0951,70.0,62.0,73.5,39.0
Third,female,23.818182,19.329787,19.773093,12.464526,48.0,63.0,69.55,31.3875
Third,male,27.255814,22.274211,12.204469,15.579696,74.0,45.0,69.55,56.4958


## 멀티 인덱스
---

In [6]:
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')[['age', 'sex', 'class', 'fare', 'survived']]
grouped = df.groupby(['class', 'sex'])
gdf = grouped.mean()
gdf

Unnamed: 0_level_0,Unnamed: 1_level_0,age,fare,survived
class,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
First,female,34.611765,106.125798,0.968085
First,male,41.281386,67.226127,0.368852
Second,female,28.722973,21.970121,0.921053
Second,male,30.740707,19.741782,0.157407
Third,female,21.75,16.11881,0.5
Third,male,26.507589,12.661633,0.135447


### 멀티 인덱스의 인덱싱
---
- 멀티 인덱스도 데이터프레임을 인덱싱하는 방법과 동일하다.

In [7]:
gdf.loc['First']

Unnamed: 0_level_0,age,fare,survived
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,34.611765,106.125798,0.968085
male,41.281386,67.226127,0.368852


In [8]:
gdf.loc[('First', 'female')]

age          34.611765
fare        106.125798
survived      0.968085
Name: (First, female), dtype: float64