In [1]:
import numpy as np
import pandas as pd

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"

## Pandas 다중 인덱스 (multi index)
- 행이나 열 인덱스가 계층으로 구성된 인덱스

### 1. 시리즈의 다중 인덱스

###### 1) 난수 데이터를 갖는 시리즈일 때 

In [5]:
index = [np.array(['bar', 'bar', 'baz', 'baz',
                  'foo','foo','qux','qux']),
        np.array(['one', 'two', 'one', 'two',
                 'one', 'two', 'one', 'two'])]
index #두 개의 level을 갖게 됨. 

[array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
       dtype='<U3'),
 array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'],
       dtype='<U3')]

In [6]:
#두 개의 level을 갖는 (다중) 인덱스를 갖는 시리즈 생성
s1 = pd.Series(np.random.randn(8), index = index)

In [7]:
s1

bar  one   -1.024024
     two   -0.530575
baz  one    1.738948
     two    0.939966
foo  one    0.735257
     two   -0.167458
qux  one    0.037448
     two   -0.131981
dtype: float64

In [8]:
s1.index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

###### 2) 키가 튜플 형태인 딕셔너리의 데이터를 갖는 시리즈일 때 
- 키가 튜플 형태인 딕셔너리 = 시리즈의 인덱스가 여러개 

In [3]:
data = {('James','Eng'):100,
       ('James','Math'):90,
       ('Ted', 'Eng'):90,
       ('Ted', 'Math'):70,
       ('Adam','Eng'):85,
       ('Adam','Math'):90}
data

{('James', 'Eng'): 100,
 ('James', 'Math'): 90,
 ('Ted', 'Eng'): 90,
 ('Ted', 'Math'): 70,
 ('Adam', 'Eng'): 85,
 ('Adam', 'Math'): 90}

In [4]:
s2=pd.Series(data=data)
s2 #멀티 인덱스 형태의 시리즈 ! 

James  Eng     100
       Math     90
Ted    Eng      90
       Math     70
Adam   Eng      85
       Math     90
dtype: int64

###### 3) 멀티 인덱스의 이름 지정 
- **`시리즈.index.names=[ , ]`**

In [5]:
#이름 지정 전 
s2.index.names 
s2

FrozenList([None, None])

James  Eng     100
       Math     90
Ted    Eng      90
       Math     70
Adam   Eng      85
       Math     90
dtype: int64

In [6]:
#이름 지정 후 
s2.index.names=['name','subject']
s2

name   subject
James  Eng        100
       Math        90
Ted    Eng         90
       Math        70
Adam   Eng         85
       Math        90
dtype: int64

###### 4) 인덱싱 

In [16]:
# 요소 1개 추출
s2['James']
s2.James

subject
Eng     100
Math     90
dtype: int64

subject
Eng     100
Math     90
dtype: int64

In [22]:
# 요소 2개 추출 - (상위인덱스, 하위 인덱스)
s2[('James','Eng')]
s2.James.Eng

# 상위 항목 여러개라면 ? 
s2[['James', 'Ted']]
s2.loc[['James','Ted']]

100

100

name   subject
James  Eng        100
       Math        90
Ted    Eng         90
       Math        70
dtype: int64

In [24]:
s2[:, 'Eng']
s2['James', :]

name
James    100
Ted       90
Adam      85
dtype: int64

subject
Eng     100
Math     90
dtype: int64

### 2. 데이터 프레임의 다중 인덱스

###### 1) column 인덱스가 다중일 때

In [6]:
np.random.seed(0)
data = np.round(np.random.randn(5,4),2)
data

array([[ 1.76,  0.4 ,  0.98,  2.24],
       [ 1.87, -0.98,  0.95, -0.15],
       [-0.1 ,  0.41,  0.14,  1.45],
       [ 0.76,  0.12,  0.44,  0.33],
       [ 1.49, -0.21,  0.31, -0.85]])

In [8]:
df = pd.DataFrame(data=data, columns=[['A','A','B','B'],
                                     ['C1','C2','C3','C4']])
#상위 컬럼 : A, B
#하위 컬럼 : C1, C2. ..
df

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,C1,C2,C3,C4
0,1.76,0.4,0.98,2.24
1,1.87,-0.98,0.95,-0.15
2,-0.1,0.41,0.14,1.45
3,0.76,0.12,0.44,0.33
4,1.49,-0.21,0.31,-0.85


In [9]:
df.columns

MultiIndex([('A', 'C1'),
            ('A', 'C2'),
            ('B', 'C3'),
            ('B', 'C4')],
           )

###### 2) 열 인덱싱

In [10]:
#상위 인덱스 단독 추출
df['B']
df.B

Unnamed: 0,C3,C4
0,0.98,2.24
1,0.95,-0.15
2,0.14,1.45
3,0.44,0.33
4,0.31,-0.85


Unnamed: 0,C3,C4
0,0.98,2.24
1,0.95,-0.15
2,0.14,1.45
3,0.44,0.33
4,0.31,-0.85


In [13]:
#상위 인덱스, 하위 인덱스 추출
df[('B','C3')]
df['B','C3']

0    0.98
1    0.95
2    0.14
3    0.44
4    0.31
Name: (B, C3), dtype: float64

0    0.98
1    0.95
2    0.14
3    0.44
4    0.31
Name: (B, C3), dtype: float64

In [14]:
df.B.C3

0    0.98
1    0.95
2    0.14
3    0.44
4    0.31
Name: C3, dtype: float64

###### 3) column 인덱스 이름 지정
- **`df.columns.names=[ , ]`**

In [16]:
#이름 지정 전
df.columns.names
df

FrozenList([None, None])

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,C1,C2,C3,C4
0,1.76,0.4,0.98,2.24
1,1.87,-0.98,0.95,-0.15
2,-0.1,0.41,0.14,1.45
3,0.76,0.12,0.44,0.33
4,1.49,-0.21,0.31,-0.85


In [18]:
#이름 지정 후
df.columns.names = ['upper', 'lower']
df

upper,A,A,B,B
lower,C1,C2,C3,C4
0,1.76,0.4,0.98,2.24
1,1.87,-0.98,0.95,-0.15
2,-0.1,0.41,0.14,1.45
3,0.76,0.12,0.44,0.33
4,1.49,-0.21,0.31,-0.85


###### 4) 행 인덱스가 다중일 때

In [21]:
data2 = np.random.randint(1,10,size=(4,4))
df2 = pd.DataFrame(data = data2, 
                  index = [['a', 'a', 'b', 'b'],
                          ['1','2','1','2']],
                  columns = ['A', 'B', 'C', 'D'])
df2

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,5,4,5,5
a,2,9,5,4,8
b,1,6,6,1,2
b,2,6,4,1,6


In [22]:
df2.index

MultiIndex([('a', '1'),
            ('a', '2'),
            ('b', '1'),
            ('b', '2')],
           )

###### 5) 인덱싱 (행방향)

In [None]:
# 기본 인덱싱
df2['A']

In [23]:
# 행 인덱싱
df2.loc['a']

Unnamed: 0,A,B,C,D
1,5,4,5,5
2,9,5,4,8


In [25]:
df2['a':'c']
df2.loc['a':'c']

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,5,4,5,5
a,2,9,5,4,8
b,1,6,6,1,2
b,2,6,4,1,6


Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,5,4,5,5
a,2,9,5,4,8
b,1,6,6,1,2
b,2,6,4,1,6


In [26]:
df2.loc[('a','1')]

A    5
B    4
C    5
D    5
Name: (a, 1), dtype: int64

In [27]:
df2.loc['a','1']

A    5
B    4
C    5
D    5
Name: (a, 1), dtype: int64

In [28]:
# iloc 이용

df2.iloc[0]

A    5
B    4
C    5
D    5
Name: (a, 1), dtype: int64

In [29]:
df2.iloc[3,0]

6

###### 6) 행과 열 모두 다중일때

In [38]:
data3 = np.round(np.random.randn(6,4), 2)
data3

array([[ 1.74,  0.59, -0.54, -1.72],
       [-0.58,  1.43,  0.28,  0.79],
       [ 0.32,  0.7 ,  0.39, -0.04],
       [ 0.3 , -0.43,  1.73, -0.87],
       [-0.82, -1.1 ,  0.09,  0.46],
       [ 0.43,  2.07, -0.54, -1.38]])

In [39]:
#컬럼 4개 생성
col1 = ['A']*2 + ['B']*2
col2 = ['C'+str(i) for i in range(1,5)]


In [40]:
#인덱스 6개 생성
idx1 = ['M']*3 + ['F']*3
idx2 = ['id'+str(i) for i in range(1,4)]*2

In [42]:
df3 = pd.DataFrame(data = data3, index=[idx1,idx2],
                  columns=[col1,col2])
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,A,A,B,B
Unnamed: 0_level_1,Unnamed: 1_level_1,C1,C2,C3,C4
M,id1,1.74,0.59,-0.54,-1.72
M,id2,-0.58,1.43,0.28,0.79
M,id3,0.32,0.7,0.39,-0.04
F,id1,0.3,-0.43,1.73,-0.87
F,id2,-0.82,-1.1,0.09,0.46
F,id3,0.43,2.07,-0.54,-1.38


In [44]:
# 인덱스 이름 설정
df3.columns.names=['Cidx1','Cidx2']
df3.index.names=['Ridx1','Ridx2']
df3

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C3,C4
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,1.74,0.59,-0.54,-1.72
M,id2,-0.58,1.43,0.28,0.79
M,id3,0.32,0.7,0.39,-0.04
F,id1,0.3,-0.43,1.73,-0.87
F,id2,-0.82,-1.1,0.09,0.46
F,id3,0.43,2.07,-0.54,-1.38


### 3. 멀티 인덱스 객체
- 멀티 인덱스 객체 자체를 생성하는 방법 (4가지)

###### 1. MultiIndex.from_arrays() 사용
- 배열(array)의 리스트

In [47]:
arrays = np.array([['one', 'two', 'one', 'two'], 
                  ['bar', 'baz', 'foo', 'qux']])
arrays

index = pd.MultiIndex.from_arrays(arrays, 
                                 names=['first', 'second'])
index

array([['one', 'two', 'one', 'two'],
       ['bar', 'baz', 'foo', 'qux']], dtype='<U3')

MultiIndex([('one', 'bar'),
            ('two', 'baz'),
            ('one', 'foo'),
            ('two', 'qux')],
           names=['first', 'second'])

###### 2. MultiIndex.from_tuples() 사용
- 튜플들(tuples)의 리스트

In [50]:
arrays = [['bar','bar','baz','baz','foo','foo','qux','qux'],
         ['one','two','one','two','one','two','one','two']]

tuples = list(zip(*arrays))

tuples

index = pd.MultiIndex.from_tuples(tuples,
                                 name=['first','second'])
index

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

###### 3. MultiIndex.from_product() 사용
- 리스트의 cross product

In [52]:
iterables = [['bar','baz','foo','qux'],['one','two']]
#첫번째와 두번째 level 길이가 다름 > 교차해서 곱 
pd.MultiIndex.from_product(iterables,
                          names=['first','second'])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

###### 4. MultiIndex.from_frame() 사용
- Dataframe

In [53]:
df = pd.DataFrame([['bar','one'],['bar','two'],
                  ['foo','one'],['foo','two']], 
                 columns=['first','second'])
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [9]:
# 활용

data4 = np.round(np.random.randn(4,9),2)

index = pd.MultiIndex.from_product([[1995,2000], ['May','Dec']],
                                    names=['year','month'])

columns = pd.MultiIndex.from_product([['A','B','C'], [1,2,3]],
                                    names=['name', 'count'])

df4 = pd.DataFrame(data = data4, index = index,
                  columns = columns)

df4

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,0.06,-0.47,-0.01,-1.5,-0.05,-0.03,0.28,0.05,-0.09
1995,Dec,-0.21,-1.54,1.1,0.47,1.37,-1.15,1.18,0.23,-1.47
2000,May,0.52,0.8,1.03,1.17,-0.44,0.8,-2.08,-0.53,-0.61
2000,Dec,0.86,1.52,1.03,0.69,0.47,2.47,0.14,-0.74,-0.9


### 4. 다중 인덱스의 특정 레벨 제거
- **`droplevel(level, axis)`**

###### 1. 시리즈

In [7]:
s2

name   subject
James  Eng        100
       Math        90
Ted    Eng         90
       Math        70
Adam   Eng         85
       Math        90
dtype: int64

In [8]:
s2.droplevel(0)

subject
Eng     100
Math     90
Eng      90
Math     70
Eng      85
Math     90
dtype: int64

###### 2. 데이터 프레임

In [10]:
df4

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,0.06,-0.47,-0.01,-1.5,-0.05,-0.03,0.28,0.05,-0.09
1995,Dec,-0.21,-1.54,1.1,0.47,1.37,-1.15,1.18,0.23,-1.47
2000,May,0.52,0.8,1.03,1.17,-0.44,0.8,-2.08,-0.53,-0.61
2000,Dec,0.86,1.52,1.03,0.69,0.47,2.47,0.14,-0.74,-0.9


In [11]:
# 행인덱스 레벨 제거
df4.droplevel(level=0, axis=0)

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,0.06,-0.47,-0.01,-1.5,-0.05,-0.03,0.28,0.05,-0.09
Dec,-0.21,-1.54,1.1,0.47,1.37,-1.15,1.18,0.23,-1.47
May,0.52,0.8,1.03,1.17,-0.44,0.8,-2.08,-0.53,-0.61
Dec,0.86,1.52,1.03,0.69,0.47,2.47,0.14,-0.74,-0.9


In [12]:
df4.droplevel(level=1, axis=0)

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1995,0.06,-0.47,-0.01,-1.5,-0.05,-0.03,0.28,0.05,-0.09
1995,-0.21,-1.54,1.1,0.47,1.37,-1.15,1.18,0.23,-1.47
2000,0.52,0.8,1.03,1.17,-0.44,0.8,-2.08,-0.53,-0.61
2000,0.86,1.52,1.03,0.69,0.47,2.47,0.14,-0.74,-0.9


In [13]:
# 열인덱스 레벨 제거
df4.droplevel(level=0,axis=1)

Unnamed: 0_level_0,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,0.06,-0.47,-0.01,-1.5,-0.05,-0.03,0.28,0.05,-0.09
1995,Dec,-0.21,-1.54,1.1,0.47,1.37,-1.15,1.18,0.23,-1.47
2000,May,0.52,0.8,1.03,1.17,-0.44,0.8,-2.08,-0.53,-0.61
2000,Dec,0.86,1.52,1.03,0.69,0.47,2.47,0.14,-0.74,-0.9


In [14]:
df4.droplevel(level=1, axis=1)

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,0.06,-0.47,-0.01,-1.5,-0.05,-0.03,0.28,0.05,-0.09
1995,Dec,-0.21,-1.54,1.1,0.47,1.37,-1.15,1.18,0.23,-1.47
2000,May,0.52,0.8,1.03,1.17,-0.44,0.8,-2.08,-0.53,-0.61
2000,Dec,0.86,1.52,1.03,0.69,0.47,2.47,0.14,-0.74,-0.9


### 5. 행 인덱스 레벨 해제 
- **`unstack`**
- 행 인덱스를 열 인덱스로 변환 

In [16]:
# 시리즈 

s = pd.Series([1, 2, 3, 4],
              index=pd.MultiIndex.from_product([['one', 'two'],
                                                ['a', 'b']]))
s

one  a    1
     b    2
two  a    3
     b    4
dtype: int64

In [17]:
s.unstack(level=0)

Unnamed: 0,one,two
a,1,3
b,2,4


In [19]:
s.unstack(level=1)

Unnamed: 0,a,b
one,1,2
two,3,4


In [None]:
# 데이터 프레임 

In [20]:
index = pd.MultiIndex.from_product([[1995, 2000], ['May','Dec']],
                                 names = ['year', 'month'])
columns = pd.MultiIndex.from_product([['A','B','C'],[1,2]],
                                    names = ['name', 'count'])

df =pd.DataFrame(np.round(np.random.randn(4,6), 1),
                  index = index, columns = columns)
df

Unnamed: 0_level_0,name,A,A,B,B,C,C
Unnamed: 0_level_1,count,1,2,1,2,1,2
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1995,May,0.9,-1.0,1.2,-0.5,1.0,-0.1
1995,Dec,-0.8,0.1,0.4,0.3,0.6,-0.3
2000,May,-1.5,0.2,-0.9,0.2,-0.2,0.7
2000,Dec,0.4,1.5,0.6,-0.2,0.0,-0.7


In [21]:
df.unstack(level=0)

name,A,A,A,A,B,B,B,B,C,C,C,C
count,1,1,2,2,1,1,2,2,1,1,2,2
year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
Dec,-0.8,0.4,0.1,1.5,0.4,0.6,0.3,-0.2,0.6,0.0,-0.3,-0.7
May,0.9,-1.5,-1.0,0.2,1.2,-0.9,-0.5,0.2,1.0,-0.2,-0.1,0.7


In [22]:
df.unstack(level=1)

name,A,A,A,A,B,B,B,B,C,C,C,C
count,1,1,2,2,1,1,2,2,1,1,2,2
month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
1995,-0.8,0.9,0.1,-1.0,0.4,1.2,0.3,-0.5,0.6,1.0,-0.3,-0.1
2000,0.4,-1.5,1.5,0.2,0.6,-0.9,-0.2,0.2,0.0,-0.2,-0.7,0.7


### 6. 열 인덱스 레벨 해제 
- **`stack`**
- 열 인덱스를 행 인덱스로 변환 

In [23]:
df = pd.DataFrame([[0, 1], [2, 3]],
                  index=['cat', 'dog'],
                  columns=['weight', 'height'])
df

Unnamed: 0,weight,height
cat,0,1
dog,2,3


In [24]:
df.stack(level=0)

cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

In [25]:
df.stack()

cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

In [26]:
df.stack(level=-1)

cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

In [27]:
multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'),
                                       ('weight', 'pounds')])
df2 = pd.DataFrame([[1, 2], [2, 4]],
                                    index=['cat', 'dog'],
                                    columns=multicol1)
df2

Unnamed: 0_level_0,weight,weight
Unnamed: 0_level_1,kg,pounds
cat,1,2
dog,2,4


In [32]:
df3=df2.stack()

In [33]:
df3.index

MultiIndex([('cat',     'kg'),
            ('cat', 'pounds'),
            ('dog',     'kg'),
            ('dog', 'pounds')],
           )

In [34]:
df2.stack(level=0)

Unnamed: 0,Unnamed: 1,kg,pounds
cat,weight,1,2
dog,weight,2,4


In [35]:
df2.stack(level=[0,1])

cat  weight  kg        1
             pounds    2
dog  weight  kg        2
             pounds    4
dtype: int64

### 7. 다중인덱스의 레벨 교환
- **`swaplevel()`**

In [36]:
df = pd.DataFrame({"Grade": ["A", "B", "A", "C"]},
                  index=[
                      ["Final exam", "Final exam", "Coursework", "Coursework"],
                      ["History", "Geography", "History", "Geography"],
                      ["January", "February", "March", "April"],],)
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,History,January,A
Final exam,Geography,February,B
Coursework,History,March,A
Coursework,Geography,April,C


In [37]:
df.swaplevel()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,January,History,A
Final exam,February,Geography,B
Coursework,March,History,A
Coursework,April,Geography,C


In [38]:
df.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Grade,Grade,Grade,Grade
Unnamed: 0_level_1,Unnamed: 1_level_1,April,February,January,March
Coursework,Geography,C,,,
Coursework,History,,,,A
Final exam,Geography,,B,,
Final exam,History,,,A,


In [39]:
df.swaplevel(0,2)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
January,History,Final exam,A
February,Geography,Final exam,B
March,History,Coursework,A
April,Geography,Coursework,C


### 8. 다중인덱스의 행/열 추가


In [40]:
data = np.round(np.random.rand(6, 4), 2)
columns = pd.MultiIndex.from_product([['A','B'],['C1','C2']],
                                   names=['cidx1','cidx2'])
index = pd.MultiIndex.from_product([['M','F'],['id1','id2','id3']],
                                    names=['ridx1','ridx2'])
df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.8,0.84,0.91,0.57
M,id2,0.71,0.17,0.54,0.85
M,id3,0.38,0.84,0.15,0.25
F,id1,0.23,0.93,0.87,0.04
F,id2,0.94,0.11,0.66,0.95
F,id3,0.36,0.22,0.56,0.85


In [48]:
df2=df.copy()

In [49]:
#첫번째 레벨의 F행과 id1, B컬럼의 C1컬럼
df2.loc['F','id1']['B','C1']=20
df2

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.8,0.84,0.91,0.57
M,id2,0.71,0.17,0.54,0.85
M,id3,0.38,0.84,0.15,0.25
F,id1,0.23,0.93,20.0,0.04
F,id2,0.94,0.11,0.66,0.95
F,id3,0.36,0.22,0.56,0.85


In [50]:
df2.drop(columns=('B','C1'))

Unnamed: 0_level_0,cidx1,A,A,B
Unnamed: 0_level_1,cidx2,C1,C2,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
M,id1,0.8,0.84,0.57
M,id2,0.71,0.17,0.85
M,id3,0.38,0.84,0.25
F,id1,0.23,0.93,0.04
F,id2,0.94,0.11,0.95
F,id3,0.36,0.22,0.85


In [52]:
df2.drop(columns=('B','C1'), inplace=True)
df2

KeyError: 9

In [53]:
df2

Unnamed: 0_level_0,cidx1,A,A,B
Unnamed: 0_level_1,cidx2,C1,C2,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
M,id1,0.8,0.84,0.57
M,id2,0.71,0.17,0.85
M,id3,0.38,0.84,0.25
F,id1,0.23,0.93,0.04
F,id2,0.94,0.11,0.95
F,id3,0.36,0.22,0.85


In [54]:
df2.sum()

cidx1  cidx2
A      C1       3.42
       C2       3.11
B      C2       3.51
dtype: float64

In [55]:
df2.sum(axis=1)

ridx1  ridx2
M      id1      2.21
       id2      1.73
       id3      1.47
F      id1      1.20
       id2      2.00
       id3      1.43
dtype: float64

In [60]:
df2.loc[('Row','Sum')] = df2.sum()
df2

Unnamed: 0_level_0,cidx1,A,A,B,Row,Sum
Unnamed: 0_level_1,cidx2,C1,C2,C2,Sum,Unnamed: 6_level_1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M,id1,0.8,0.84,0.57,,
M,id2,0.71,0.17,0.85,,
M,id3,0.38,0.84,0.25,,
F,id1,0.23,0.93,0.04,,
F,id2,0.94,0.11,0.95,,
F,id3,0.36,0.22,0.85,,
Row,,,,,,
