## 17. 멀티 인덱스


## 참고자료
* [Python 완전정복 시리즈] 2편 : Pandas DataFrame 완전정복 : https://wikidocs.net/book/7188

In [2]:
import pandas as pd
import numpy as np

## 하위분류반환 (xs)

In [3]:
data = {'col1':[0,1,2,3,4], 'col2':[5,6,7,8,9],
        'level0':['A','A','A','B','B'],
        'level1':['X','X','Y','Y','Z'],
        'level2':['a','a','b','c','a']}
df = pd.DataFrame(data=data)
df = df.set_index(['level0', 'level1', 'level2'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,col1,col2
level0,level1,level2,Unnamed: 3_level_1,Unnamed: 4_level_1
A,X,a,0,5
A,X,a,1,6
A,Y,b,2,7
B,Y,c,3,8
B,Z,a,4,9


In [4]:
df.xs(key='A')

Unnamed: 0_level_0,Unnamed: 1_level_0,col1,col2
level1,level2,Unnamed: 2_level_1,Unnamed: 3_level_1
X,a,0,5
X,a,1,6
Y,b,2,7


In [5]:
df.xs(key=('A', 'X'))

Unnamed: 0_level_0,col1,col2
level2,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0,5
a,1,6


In [6]:
df.xs(key='Y', level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,col1,col2
level0,level2,Unnamed: 2_level_1,Unnamed: 3_level_1
A,b,2,7
B,c,3,8


In [7]:
df.xs(key='Y', level=1, drop_level=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,col1,col2
level0,level1,level2,Unnamed: 3_level_1,Unnamed: 4_level_1
A,Y,b,2,7
B,Y,c,3,8


## 피벗화, 열의 인덱스화 (stack)

In [8]:
data = [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]]
idx = [['idx1','idx1','idx2','idx2'],['row1','row2','row3','row4']]
col = [['val1','val1','val2','val2'],['col1','col2','col3','col4']]
df = pd.DataFrame(data = data, index = idx, columns = col)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,val1,val1,val2,val2
Unnamed: 0_level_1,Unnamed: 1_level_1,col1,col2,col3,col4
idx1,row1,0,1,2,3
idx1,row2,4,5,6,7
idx2,row3,8,9,10,11
idx2,row4,12,13,14,15


In [9]:
df.stack()

Unnamed: 0,Unnamed: 1,Unnamed: 2,val1,val2
idx1,row1,col1,0.0,
idx1,row1,col2,1.0,
idx1,row1,col3,,2.0
idx1,row1,col4,,3.0
idx1,row2,col1,4.0,
idx1,row2,col2,5.0,
idx1,row2,col3,,6.0
idx1,row2,col4,,7.0
idx2,row3,col1,8.0,
idx2,row3,col2,9.0,


In [10]:
df.stack(level=0)

Unnamed: 0,Unnamed: 1,Unnamed: 2,col1,col2,col3,col4
idx1,row1,val1,0.0,1.0,,
idx1,row1,val2,,,2.0,3.0
idx1,row2,val1,4.0,5.0,,
idx1,row2,val2,,,6.0,7.0
idx2,row3,val1,8.0,9.0,,
idx2,row3,val2,,,10.0,11.0
idx2,row4,val1,12.0,13.0,,
idx2,row4,val2,,,14.0,15.0


In [11]:
df.stack(level=(0,1))

idx1  row1  val1  col1     0.0
                  col2     1.0
            val2  col3     2.0
                  col4     3.0
      row2  val1  col1     4.0
                  col2     5.0
            val2  col3     6.0
                  col4     7.0
idx2  row3  val1  col1     8.0
                  col2     9.0
            val2  col3    10.0
                  col4    11.0
      row4  val1  col1    12.0
                  col2    13.0
            val2  col3    14.0
                  col4    15.0
dtype: float64

In [12]:
data = [[np.NaN,1],[2,3]]
idx = ['row1','row2']
col = ['col1','col2']
df = pd.DataFrame(data = data, index = idx, columns = col)
df

Unnamed: 0,col1,col2
row1,,1
row2,2.0,3


In [13]:
df.stack()

row1  col2    1.0
row2  col1    2.0
      col2    3.0
dtype: float64

In [14]:
df.stack(dropna=False)

row1  col1    NaN
      col2    1.0
row2  col1    2.0
      col2    3.0
dtype: float64

## 언피벗화, 행의 열로 변환 (unstack)

In [15]:
data =[1,2,3,4,5]
idx = [['idx1','idx1','idx2','idx2','idx2'],['row1','row2','row1','row2','row3']]
df = pd.Series(data=data, index = idx)
df

idx1  row1    1
      row2    2
idx2  row1    3
      row2    4
      row3    5
dtype: int64

In [16]:
df.unstack()

Unnamed: 0,row1,row2,row3
idx1,1.0,2.0,
idx2,3.0,4.0,5.0


In [17]:
df.unstack(fill_value='-')

Unnamed: 0,row1,row2,row3
idx1,1,2,-
idx2,3,4,5


In [18]:
data = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
level0=['idx1','idx1','idx1','idx1','idx1','idx1','idx1','idx1',
        'idx2','idx2','idx2','idx2','idx2','idx2','idx2','idx2']
level1=['row1','row1','row1','row1','row2','row2','row2','row2',
        'row3','row3','row3','row3','row4','row4','row4','row4']
level2=['val1','val1','val2','val2','val1','val1','val2','val2',
        'val1','val1','val2','val2','val1','val1','val2','val2']
level3=['col1','col2','col3','col4','col1','col2','col3','col4',
        'col1','col2','col3','col4','col1','col2','col3','col4']
idx = [level0,level1,level2,level3]
df = pd.Series(data =data, index =idx)
df

idx1  row1  val1  col1     0
                  col2     1
            val2  col3     2
                  col4     3
      row2  val1  col1     4
                  col2     5
            val2  col3     6
                  col4     7
idx2  row3  val1  col1     8
                  col2     9
            val2  col3    10
                  col4    11
      row4  val1  col1    12
                  col2    13
            val2  col3    14
                  col4    15
dtype: int64

In [19]:
df.unstack(level=2)

Unnamed: 0,Unnamed: 1,Unnamed: 2,val1,val2
idx1,row1,col1,0.0,
idx1,row1,col2,1.0,
idx1,row1,col3,,2.0
idx1,row1,col4,,3.0
idx1,row2,col1,4.0,
idx1,row2,col2,5.0,
idx1,row2,col3,,6.0
idx1,row2,col4,,7.0
idx2,row3,col1,8.0,
idx2,row3,col2,9.0,


In [20]:
df.unstack(level=(2,3))

Unnamed: 0_level_0,Unnamed: 1_level_0,val1,val1,val2,val2
Unnamed: 0_level_1,Unnamed: 1_level_1,col1,col2,col3,col4
idx1,row1,0,1,2,3
idx1,row2,4,5,6,7
idx2,row3,8,9,10,11
idx2,row4,12,13,14,15


## 인덱스 순서변경 (swaplevel)

In [21]:
data = {'col':[0,1,2,3,4,5]}
level0 = ['idx1','idx1','idx1','idx1','idx2','idx2']
level1 = ['val1','val1','val2','val2','val3','val4']
level2 = ['row1','row2','row3','row4','row5','row6']
idx = [level0, level1, level2]
df = pd.DataFrame(data = data, index=idx)
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,col
idx1,val1,row1,0
idx1,val1,row2,1
idx1,val2,row3,2
idx1,val2,row4,3
idx2,val3,row5,4
idx2,val4,row6,5


In [22]:
df.swaplevel() # 아무것도 입력하지 않으면 제일 낮은 두 레벨의 인덱스가 교환

Unnamed: 0,Unnamed: 1,Unnamed: 2,col
idx1,row1,val1,0
idx1,row2,val1,1
idx1,row3,val2,2
idx1,row4,val2,3
idx2,row5,val3,4
idx2,row6,val4,5


In [23]:
df.swaplevel(0) # 한 숫자만 입력하면, 가장 낮은 레벨의 인덱스와 인수로 입력한 숫자의 인덱스가 교환

Unnamed: 0,Unnamed: 1,Unnamed: 2,col
row1,val1,idx1,0
row2,val1,idx1,1
row3,val2,idx1,2
row4,val2,idx1,3
row5,val3,idx2,4
row6,val4,idx2,5


In [24]:
df.swaplevel(i=0, j=1)

Unnamed: 0,Unnamed: 1,Unnamed: 2,col
val1,idx1,row1,0
val1,idx1,row2,1
val2,idx1,row3,2
val2,idx1,row4,3
val3,idx2,row5,4
val4,idx2,row6,5


## 인덱스 제거 (droplevel)

In [25]:
data= [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]]
idx = [['idx1','idx1','idx2','idx2'],['row1','row2','row3','row4']]
col = [['col1','col1','col2','col2'],['val1','val2','val3','val4']]
df = pd.DataFrame(data=data, index = idx, columns = col)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,col1,col1,col2,col2
Unnamed: 0_level_1,Unnamed: 1_level_1,val1,val2,val3,val4
idx1,row1,0,1,2,3
idx1,row2,4,5,6,7
idx2,row3,8,9,10,11
idx2,row4,12,13,14,15


In [26]:
df.droplevel(axis=0, level=0)

Unnamed: 0_level_0,col1,col1,col2,col2
Unnamed: 0_level_1,val1,val2,val3,val4
row1,0,1,2,3
row2,4,5,6,7
row3,8,9,10,11
row4,12,13,14,15


In [27]:
df.droplevel(axis=0, level=1)

Unnamed: 0_level_0,col1,col1,col2,col2
Unnamed: 0_level_1,val1,val2,val3,val4
idx1,0,1,2,3
idx1,4,5,6,7
idx2,8,9,10,11
idx2,12,13,14,15


In [28]:
df.droplevel(axis=1, level=0)

Unnamed: 0,Unnamed: 1,val1,val2,val3,val4
idx1,row1,0,1,2,3
idx1,row2,4,5,6,7
idx2,row3,8,9,10,11
idx2,row4,12,13,14,15


In [None]:
df.droplevel(axis=)