In [55]:
import numpy as np
import pandas as pd

In [56]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

### 참고. 난수 생성 : np.random 모듈

: https://docs.python.org/ko/3/library/random.html

##### np.random.seed(seed값)

- seed : 난수 알고리즘에서 사용하는 기본 값
    - seed 값이 같으면 동일한 난수 발생
    - 예. np.random.seed(10) 


- 계속 변경되는 난수를 생성하려면 시드값이 매번 변하도록 지정
    - 예. np.random.seed(int(time.time()))
    

#### 난수 생성 함수

- random.rand() : 주어진 형태의 난수 배열 생성
- random.randint(최소값, 최대값, size=n) 
    - [최소값, 최대값)의 범위에서 임의의 정수 생성
    
- random.randn() : 표준정규분포(Standard normal distribution)로부터 샘플링된 난수 생성

- random.standard_normal() : 표준정규분포 난수 발생

- random.normal([loc, scale, size]) : 정규분포 난수 생성

- random.random_sample(size) : [0,1)사이의 난수 생성

- random.choice(a[, size, replace, p]) : 주어진 배열로 부터 표본추출

In [57]:
# 고정 시드 값

np.random.seed(20)
np.random.randint(5, size=(4))

array([3, 2, 4, 2])

In [58]:
np.random.randint(5, size=(4))

array([1, 4, 3, 2])

In [59]:
np.random.randint(10,20, size=(4))

array([10, 16, 18, 15])

In [60]:
#변경 시드 값
import time

time.time()

1712036680.509949

In [61]:
np.random.seed(int(time.time()))

### 참고. 파이썬의 random 모듈

#### 정수 난수 발생 함수

##### random.randrange(start, stop[, step])
- range(start, stop, step)에서 임의로 선택된 요소를 반환
- choice(range(start, stop, step))와 동등하지만 실제로 range 객체를 만들지는 않음

##### random.randint(a, b)
- `a <= N <= b` 를 만족하는 임의의 정수 N을 반환
- randrange(a, b+1)의 별칭

#### 시퀀스 난수 발생 함수
##### random.choice(seq)
- 비어 있지 않은 시퀀스 seq에서 임의의 요소를 반환
- seq가 비어 있으면, IndexError를 발생


##### random.choices(population, weights=None, *, cum_weights=None, k=1)
- population에서 중복을 허락하면서(with replacement) 선택한 k 크기의 요소 리스트를 반환
- population이 비어 있으면 IndexError 발생
- weights 시퀀스가 지정되면 상대 가중치에 따라 선택됨
- weights나 cum_weights를 지정하지 않으면 같은 확률로 선택
- weights 시퀀스가 제공되면, population 시퀀스와 길이가 같아야 함
- weights와 cum_weights를 모두 지정하는 것은 TypeError

##### random.sample(population, k, *, counts=None)
- population 시퀀스로부터 추출한 k개 길이의 새 리스트를 반환
- random sampling without replacement

#### 실수 난수 발생 함수
##### random.random()
- `0.0 <= X < 1.0` 사이의 실수 반환

##### random.uniform(a, b)
- `a <= b` 일 때 `a <= N <= b`, `b < a` 일 때 `b <= N <= a`를 만족하는 임의의 부동 소수점 숫자 N을 반환
- 종단 값 b는 방정식 a + (b-a) * random()의 부동 소수점 자리 올림에 따라 범위에 포함되거나 포함되지 않을 수 있음

----

# pandas 다중 인덱스(multi index)

- 행이나 열 인덱스가 계층으로 구성된 인덱스(Hierarchical indexing)

In [62]:
df = pd.DataFrame([[1,2,3,4],[2,3,4,6]],
                 index=['a b'.split()])
df

Unnamed: 0,0,1,2,3
a,1,2,3,4
b,2,3,4,6


In [63]:
df.index

MultiIndex([('a',),
            ('b',)],
           )

In [64]:
df.columns

RangeIndex(start=0, stop=4, step=1)

[학습 내용]

1. 다중 인덱스를 갖는 Series
2. 다중 인덱스를 갖는 DataFrame
3. MultiIndex 객체
4. 다중인덱스의 특정 레벨 제거 : droplevel()
5. 행인덱스 레벨 해제 : unstack()
6. 열인덱스 레벨 해제 : stack()
7. 다중인덱스의 레벨 교환 : swaplevel()
8. 다중인덱스의 행/열 추가
9. 다중인덱스 정렬

### 1. 다중인덱스를 갖는 Series

#### 예1. 난수 데이터를 갖는 Series

In [65]:
arrays = [np.array(["bar", "bar", "baz", "baz",
                    "foo", "foo", "qux", "qux"]),
          np.array(["one", "two", "one", "two",
                    "one", "two", "one", "two"])]
type(arrays)
type(arrays[0])
np.random.seed(0)
s1 = pd.Series(np.random.randn(8), index=arrays)
s1

list

numpy.ndarray

bar  one    1.764052
     two    0.400157
baz  one    0.978738
     two    2.240893
foo  one    1.867558
     two   -0.977278
qux  one    0.950088
     two   -0.151357
dtype: float64

#### 예2. 키를 튜플로 갖는 딕셔너리의 데이터로 Series 생성

In [66]:
# 키를 튜플로 갖는 딕셔너리 데이터
data= {('James', 'Eng'): 100,
        ('James', 'Math') : 90,
        ('Ted', 'Eng') : 90,
        ('Ted', 'Math') : 70,
        ('Adam', 'Eng') : 85,
        ('Adam', 'Math') : 90 }
data

{('James', 'Eng'): 100,
 ('James', 'Math'): 90,
 ('Ted', 'Eng'): 90,
 ('Ted', 'Math'): 70,
 ('Adam', 'Eng'): 85,
 ('Adam', 'Math'): 90}

In [67]:
data_list = [{'name': key[0], 'course': key[1], 'score': value} for key, value in data.items()]
s2 = pd.DataFrame(data_list)
s2

Unnamed: 0,name,course,score
0,James,Eng,100
1,James,Math,90
2,Ted,Eng,90
3,Ted,Math,70
4,Adam,Eng,85
5,Adam,Math,90


In [68]:
s2 = pd.DataFrame(data.values(), 
                  index=pd.MultiIndex.from_tuples(data.keys(), 
                names=['name', 'course']), columns=['score'])
s2

Unnamed: 0_level_0,Unnamed: 1_level_0,score
name,course,Unnamed: 2_level_1
James,Eng,100
James,Math,90
Ted,Eng,90
Ted,Math,70
Adam,Eng,85
Adam,Math,90


In [69]:
s2.index

MultiIndex([('James',  'Eng'),
            ('James', 'Math'),
            (  'Ted',  'Eng'),
            (  'Ted', 'Math'),
            ( 'Adam',  'Eng'),
            ( 'Adam', 'Math')],
           names=['name', 'course'])

#### 인덱스의 이름 지정 : 시리즈.index.names = [ , ]

In [70]:
s2.index.names=['name', 'course']

#### 다중인덱스를 갖는 Series의 인덱싱

- 시리즈[상위인덱스]
- 시리즈.상위인덱스
- 시리즈[(상위인덱스, 하위인덱스)]
- 시리즈.상위인데스,하위인덱스
- 시리즈[:, 하위인덱스]

In [71]:
s2.loc['James']

Unnamed: 0_level_0,score
course,Unnamed: 1_level_1
Eng,100
Math,90


In [72]:
s2.loc['Ted']

Unnamed: 0_level_0,score
course,Unnamed: 1_level_1
Eng,90
Math,70


In [73]:
s2.Ted

AttributeError: 'DataFrame' object has no attribute 'Ted'

In [None]:
s2['James', 'Ted']

In [None]:
s2[('James', 'Eng')]

In [None]:
s2.Ted.Eng

In [None]:
s2[:, 'Eng']

In [None]:
s2['James', :]
#s2['James']

### 2. 다중 인덱스를 갖는 DataFrame

- 데이터 프레임 생성 시 생성자에서 columns인수나 index 인수를  2차원 리스트(행렬) 형태로 지정할 경우

#### 1) column인덱스를 다중 인덱스로 갖는 DataFrame

In [None]:
np.random.seed(10)
data = np.round(np.random.randn(5,4), 2) #round 로 소숫점 아래 둘째 자리로 자름
data

In [74]:
df = pd.DataFrame(data=data, 
                  columns = [['A','A','B','B'],['C1','C2','C3','C4']])
df

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,C1,C2,C3,C4


In [75]:
df.columns

MultiIndex([('A', 'C1'),
            ('A', 'C2'),
            ('B', 'C3'),
            ('B', 'C4')],
           )

#### 열 인덱싱(1): df[상위인덱스]
- 상위 인덱스의 모든 열에 대한 데이터프레임 반환

In [76]:
df[('A')]

Unnamed: 0,C1,C2


#### 열 인덱싱(2): df[(상위인덱스, 하위인덱스)]
- 인덱스가 하나가 아니므로 묶어서(튜플로) 전달해야 함
- 시리즈로 반환

In [77]:
df[('A','C2')]

Series([], Name: (A, C2), dtype: object)

In [78]:
df['B','C3']
df[('B','C3')]

Series([], Name: (B, C3), dtype: object)

Series([], Name: (B, C3), dtype: object)

#### 열인덱싱(3) :  . 연산자로 확장

- df.상위인덱스
- df.상위인덱스.하위인덱스

In [79]:
df.A

Unnamed: 0,C1,C2


In [80]:
df.A.C1

Series([], Name: C1, dtype: object)

#### 열인덱스 이름 지정 : df.columns.names = []

In [81]:
df.columns.names = ['upper', 'lower']
df

upper,A,A,B,B
lower,C1,C2,C3,C4


#### 2) 행인덱스를 다중 인덱스로 갖는 DataFrame

In [82]:
data2 = np.random.randint(1,10, size=(4,4))
data2

array([[9, 2, 7, 8],
       [8, 9, 2, 6],
       [9, 5, 4, 1],
       [4, 6, 1, 3]])

In [83]:
df2 = pd.DataFrame(data=data2,
                  columns=['A','B','C','D'],
                  index = ['a a b b'.split(), '1 2 1 2'.split()])
df2

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,9,2,7,8
a,2,8,9,2,6
b,1,9,5,4,1
b,2,4,6,1,3


In [84]:
df2.index

MultiIndex([('a', '1'),
            ('a', '2'),
            ('b', '1'),
            ('b', '2')],
           )

#### 행인덱싱(1) : df.loc[상위인덱스]

In [85]:
df2.loc['a']

Unnamed: 0,A,B,C,D
1,9,2,7,8
2,8,9,2,6


In [86]:
df2.loc[('a','1')]

A    9
B    2
C    7
D    8
Name: (a, 1), dtype: int64

In [87]:
df2.loc[['a','b']]
df2.loc['a':'b']

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,9,2,7,8
a,2,8,9,2,6
b,1,9,5,4,1
b,2,4,6,1,3


Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,9,2,7,8
a,2,8,9,2,6
b,1,9,5,4,1
b,2,4,6,1,3


#### 행인덱싱(2) : df.loc[(상위인덱스, 하위인덱스)]
- 상위인덱스와 하위인덱스를 튜플로 전달

In [88]:
df2.loc[('a','1')]
df2.loc['a','1']

A    9
B    2
C    7
D    8
Name: (a, 1), dtype: int64

A    9
B    2
C    7
D    8
Name: (a, 1), dtype: int64

#### 인덱서 iloc : df.iloc[  ]
- iloc인덱서는 행이름, 열이름 기반이 아님

In [89]:
df2
df2.iloc[0]

Unnamed: 0,Unnamed: 1,A,B,C,D
a,1,9,2,7,8
a,2,8,9,2,6
b,1,9,5,4,1
b,2,4,6,1,3


A    9
B    2
C    7
D    8
Name: (a, 1), dtype: int64

In [90]:
df2.iloc[1,1:]

B    9
C    2
D    6
Name: (a, 2), dtype: int64

In [91]:
df2.iloc[0,2]

7

#### 3)  행과 열에 모두 다중인덱스를 갖는 DataFrame

In [92]:
np.random.seed(0)
data3 = np.round(np.random.randn(6,4),2)
data3
col1 = ['A']*2 + ['B']*2
col1
col2 = ['C'+str(i) for i in range(1,5)]
col2

columns = [col1, col2]
columns
idx1 = ['M']*3 + ['F']*3
idx2 = ['id_'+str(i) for i in range(1,4)]*2
index = [idx1, idx2]
index

array([[ 1.76,  0.4 ,  0.98,  2.24],
       [ 1.87, -0.98,  0.95, -0.15],
       [-0.1 ,  0.41,  0.14,  1.45],
       [ 0.76,  0.12,  0.44,  0.33],
       [ 1.49, -0.21,  0.31, -0.85],
       [-2.55,  0.65,  0.86, -0.74]])

['A', 'A', 'B', 'B']

['C1', 'C2', 'C3', 'C4']

[['A', 'A', 'B', 'B'], ['C1', 'C2', 'C3', 'C4']]

[['M', 'M', 'M', 'F', 'F', 'F'],
 ['id_1', 'id_2', 'id_3', 'id_1', 'id_2', 'id_3']]

In [93]:
df3 = pd.DataFrame(data=data3, index=index, columns=columns)
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,A,A,B,B
Unnamed: 0_level_1,Unnamed: 1_level_1,C1,C2,C3,C4
M,id_1,1.76,0.4,0.98,2.24
M,id_2,1.87,-0.98,0.95,-0.15
M,id_3,-0.1,0.41,0.14,1.45
F,id_1,0.76,0.12,0.44,0.33
F,id_2,1.49,-0.21,0.31,-0.85
F,id_3,-2.55,0.65,0.86,-0.74


#### 행/열 각 인덱스에 이름(names) 설정

- 이름을 지정하면 직관성이 높아지고 편리하게 사용할 수 있음
- 열이름/행이름 구분하는데 용이
- 문법
    - df.columns.names = 값 또는 리스트
    - df.index.names = 값 또는 리스트

In [94]:
df3.index.names = ['Ridx1', 'Ridx2']
df3.columns.names = ['Cidx1', 'Cidx2']
df3

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C3,C4
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,1.76,0.4,0.98,2.24
M,id_2,1.87,-0.98,0.95,-0.15
M,id_3,-0.1,0.41,0.14,1.45
F,id_1,0.76,0.12,0.44,0.33
F,id_2,1.49,-0.21,0.31,-0.85
F,id_3,-2.55,0.65,0.86,-0.74


In [95]:
df3['A']

Unnamed: 0_level_0,Cidx2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_1,Unnamed: 3_level_1
M,id_1,1.76,0.4
M,id_2,1.87,-0.98
M,id_3,-0.1,0.41
F,id_1,0.76,0.12
F,id_2,1.49,-0.21
F,id_3,-2.55,0.65


In [96]:
df3.loc['M', 'A']

Cidx2,C1,C2
Ridx2,Unnamed: 1_level_1,Unnamed: 2_level_1
id_1,1.76,0.4
id_2,1.87,-0.98
id_3,-0.1,0.41


In [97]:
df3.B

Unnamed: 0_level_0,Cidx2,C3,C4
Ridx1,Ridx2,Unnamed: 2_level_1,Unnamed: 3_level_1
M,id_1,0.98,2.24
M,id_2,0.95,-0.15
M,id_3,0.14,1.45
F,id_1,0.44,0.33
F,id_2,0.31,-0.85
F,id_3,0.86,-0.74


In [98]:
df3.B.loc['F']

Cidx2,C3,C4
Ridx2,Unnamed: 1_level_1,Unnamed: 2_level_1
id_1,0.44,0.33
id_2,0.31,-0.85
id_3,0.86,-0.74


In [99]:
df3.B.loc['F', 'id_1']

Cidx2
C3    0.44
C4    0.33
Name: (F, id_1), dtype: float64

### 3. MultiIndex 객체
- https://pandas.pydata.org/docs/user_guide/advanced.html

- 생성 방법
1. MultiIndex.from_arrays() 사용 : 배열(array)의 리스트
2. MultiIndex.from_tuples() : 튜플들(tuples)의 리스트
3. MultiIndex.from_product() : 리스트의 cross product
4. MultiIndex.from_frame() : DataFrame

In [100]:
# 1. MultiIndex.from_arrays() 이용한 다중인덱스 생성

arrays = np.array([['one', 'two', 'one', 'two'],
                   ['bar', 'baz', 'foo', 'quz']])

arrays
index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second'])
index

array([['one', 'two', 'one', 'two'],
       ['bar', 'baz', 'foo', 'quz']], dtype='<U3')

MultiIndex([('one', 'bar'),
            ('two', 'baz'),
            ('one', 'foo'),
            ('two', 'quz')],
           names=['first', 'second'])

In [101]:
# 2. MultiIndex.from_tuples() 이용한 다중인덱스 생성

lists = [['one', 'two', 'one', 'two'],
        ['bar', 'baz', 'foo', 'quz']]
lists
tuples = list(zip(*lists))
tuples

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
index

[['one', 'two', 'one', 'two'], ['bar', 'baz', 'foo', 'quz']]

[('one', 'bar'), ('two', 'baz'), ('one', 'foo'), ('two', 'quz')]

MultiIndex([('one', 'bar'),
            ('two', 'baz'),
            ('one', 'foo'),
            ('two', 'quz')],
           names=['first', 'second'])

In [102]:
# 3. MultiIndex.from_product() 사용한 다중인덱스 생성

iterables = [['bar', 'baz', 'foo', 'quz'],
              ['one', 'two']]

index = pd.MultiIndex.from_product(iterables, names=['first','second'])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('quz', 'one'),
            ('quz', 'two')],
           names=['first', 'second'])

In [103]:
# 3. MultiIndex.from_product() 사용한 다중인덱스 생성

iterables = [['bar', 'baz', 'foo', 'quz'],
              ['one', 'two'],
                ['1', '2', '3']]

index = pd.MultiIndex.from_product(iterables, names=['first','second', 'third'])
index

MultiIndex([('bar', 'one', '1'),
            ('bar', 'one', '2'),
            ('bar', 'one', '3'),
            ('bar', 'two', '1'),
            ('bar', 'two', '2'),
            ('bar', 'two', '3'),
            ('baz', 'one', '1'),
            ('baz', 'one', '2'),
            ('baz', 'one', '3'),
            ('baz', 'two', '1'),
            ('baz', 'two', '2'),
            ('baz', 'two', '3'),
            ('foo', 'one', '1'),
            ('foo', 'one', '2'),
            ('foo', 'one', '3'),
            ('foo', 'two', '1'),
            ('foo', 'two', '2'),
            ('foo', 'two', '3'),
            ('quz', 'one', '1'),
            ('quz', 'one', '2'),
            ('quz', 'one', '3'),
            ('quz', 'two', '1'),
            ('quz', 'two', '2'),
            ('quz', 'two', '3')],
           names=['first', 'second', 'third'])

In [104]:
# 4. MultiIndex.from_frame()를 사용한 다중인덱스 생성

idx_df = pd.DataFrame([['bar', 'one'],['bar', 'two'],
                     ['baz', 'one'], ['baz', 'two']],
                     columns=['first', 'second'])
idx_df
index = pd.MultiIndex.from_frame(idx_df)
index

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,baz,one
3,baz,two


MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two')],
           names=['first', 'second'])

#### 예제. MultiIndex객체 생성하여 다중인덱스 설정

In [105]:
data4 = np.round(np.random.randn(4,9),1)
data4

index = pd.MultiIndex.from_product([[1995, 2000], ['May', 'Dec']], 
                                   names=['year', 'month'])
index

columns = pd.MultiIndex.from_product([['A','B','C'],[1,2,3]], 
                                     names=['name', 'count'])
columns

df4 = pd.DataFrame(data=data4, index=index, columns=columns)
df4

array([[ 2.3, -1.5,  0. , -0.2,  1.5,  1.5,  0.2,  0.4, -0.9],
       [-2. , -0.3,  0.2,  1.2,  1.2, -0.4, -0.3, -1. , -1.4],
       [-1.7,  2. , -0.5, -0.4, -1.3,  0.8, -1.6, -0.2, -0.9],
       [ 0.4, -0.5, -1.2, -0. ,  0.4,  0.1,  0.3, -0.6, -0.4]])

MultiIndex([(1995, 'May'),
            (1995, 'Dec'),
            (2000, 'May'),
            (2000, 'Dec')],
           names=['year', 'month'])

MultiIndex([('A', 1),
            ('A', 2),
            ('A', 3),
            ('B', 1),
            ('B', 2),
            ('B', 3),
            ('C', 1),
            ('C', 2),
            ('C', 3)],
           names=['name', 'count'])

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
1995,Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
2000,May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
2000,Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


In [106]:
df4.A

Unnamed: 0_level_0,count,1,2,3
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1995,May,2.3,-1.5,0.0
1995,Dec,-2.0,-0.3,0.2
2000,May,-1.7,2.0,-0.5
2000,Dec,0.4,-0.5,-1.2


In [107]:
df4.index
df4.loc[1995]

MultiIndex([(1995, 'May'),
            (1995, 'Dec'),
            (2000, 'May'),
            (2000, 'Dec')],
           names=['year', 'month'])

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4


In [108]:
df4.reset_index()

name,year,month,A,A,A,B,B,B,C,C,C
count,Unnamed: 1_level_1,Unnamed: 2_level_1,1,2,3,1,2,3,1,2,3
0,1995,May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
1,1995,Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
2,2000,May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
3,2000,Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


### 4. 다중인덱스의 특정 레벨 제거 : droplevel(level, axis)

#### 1) 시리즈의 다중인덱스 레벨 제거

In [109]:
s2.droplevel(0)
s2

Unnamed: 0_level_0,score
course,Unnamed: 1_level_1
Eng,100
Math,90
Eng,90
Math,70
Eng,85
Math,90


Unnamed: 0_level_0,Unnamed: 1_level_0,score
name,course,Unnamed: 2_level_1
James,Eng,100
James,Math,90
Ted,Eng,90
Ted,Math,70
Adam,Eng,85
Adam,Math,90


#### 2) 데이터프레임에서 다중인덱스 레벨 제거

In [110]:
s2.droplevel(1)

Unnamed: 0_level_0,score
name,Unnamed: 1_level_1
James,100
James,90
Ted,90
Ted,70
Adam,85
Adam,90


#### 행인덱스 레벨 제거

In [111]:
df4.droplevel(level=0)
df4.droplevel(level=0, axis=0)

name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


name,A,A,A,B,B,B,C,C,C
count,1,2,3,1,2,3,1,2,3
month,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


#### 열인덱스 레벨 제거

In [112]:
df4
df4.droplevel(level=0, axis=1) #name A B C가 사라짐

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
1995,Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
2000,May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
2000,Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


Unnamed: 0_level_0,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
1995,Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
2000,May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
2000,Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


In [113]:
df4.droplevel(level=1, axis=1)

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1995,May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
1995,Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
2000,May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
2000,Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


### 5. 행인덱스 레벨 해제 : unstack()

: 행인덱스 -> 열인덱스로 변환

#### 1) 시리즈의 다중인덱스 레벨 해제

- 해제된 레벨 인덱스는 열인덱스로 변경되며, 데이터프레임으로 반환됨

In [114]:
s1

bar  one    1.764052
     two    0.400157
baz  one    0.978738
     two    2.240893
foo  one    1.867558
     two   -0.977278
qux  one    0.950088
     two   -0.151357
dtype: float64

#### 마지막 레벨 해제 : unstack(level=-1)

In [115]:
s1
s1.unstack()

bar  one    1.764052
     two    0.400157
baz  one    0.978738
     two    2.240893
foo  one    1.867558
     two   -0.977278
qux  one    0.950088
     two   -0.151357
dtype: float64

Unnamed: 0,one,two
bar,1.764052,0.400157
baz,0.978738,2.240893
foo,1.867558,-0.977278
qux,0.950088,-0.151357


#### 첫번째 레벨 해제 : unstack(level=0)

In [116]:
s1.unstack(level=0)

Unnamed: 0,bar,baz,foo,qux
one,1.764052,0.978738,1.867558,0.950088
two,0.400157,2.240893,-0.977278,-0.151357


#### 2) 데이터프레임의 다중 행인덱스 레벨 해제

- 데이터프레임의 인덱스 레벨 해제
- 해제된 레벨은 열인덱스 중 가장 마지막 레벨이 됨

[형식] DataFrame.unstack(level=-1, fill_value=None)


- level : int, str, or list of these, default= -1 (last level)
    - Level(s) of index to unstack, can pass level name.

- fill_value : int, str or dict
    - Replace NaN with this value if the unstack produces missing values.


- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.unstack.html

In [117]:
df4

Unnamed: 0_level_0,name,A,A,A,B,B,B,C,C,C
Unnamed: 0_level_1,count,1,2,3,1,2,3,1,2,3
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1995,May,2.3,-1.5,0.0,-0.2,1.5,1.5,0.2,0.4,-0.9
1995,Dec,-2.0,-0.3,0.2,1.2,1.2,-0.4,-0.3,-1.0,-1.4
2000,May,-1.7,2.0,-0.5,-0.4,-1.3,0.8,-1.6,-0.2,-0.9
2000,Dec,0.4,-0.5,-1.2,-0.0,0.4,0.1,0.3,-0.6,-0.4


In [118]:
df4.unstack(level=0) #year이 컬럼으로 바뀜

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
Dec,-2.0,0.4,-0.3,-0.5,0.2,-1.2,1.2,-0.0,1.2,0.4,-0.4,0.1,-0.3,0.3,-1.0,-0.6,-1.4,-0.4
May,2.3,-1.7,-1.5,2.0,0.0,-0.5,-0.2,-0.4,1.5,-1.3,1.5,0.8,0.2,-1.6,0.4,-0.2,-0.9,-0.9


In [119]:
df4.unstack(level='year') #year이 컬럼으로 바뀜

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
year,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000,1995,2000
month,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
Dec,-2.0,0.4,-0.3,-0.5,0.2,-1.2,1.2,-0.0,1.2,0.4,-0.4,0.1,-0.3,0.3,-1.0,-0.6,-1.4,-0.4
May,2.3,-1.7,-1.5,2.0,0.0,-0.5,-0.2,-0.4,1.5,-1.3,1.5,0.8,0.2,-1.6,0.4,-0.2,-0.9,-0.9


In [120]:
df4.unstack(level='month')

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
1995,-2.0,2.3,-0.3,-1.5,0.2,0.0,1.2,-0.2,1.2,1.5,-0.4,1.5,-0.3,0.2,-1.0,0.4,-1.4,-0.9
2000,0.4,-1.7,-0.5,2.0,-1.2,-0.5,-0.0,-0.4,0.4,-1.3,0.1,0.8,0.3,-1.6,-0.6,-0.2,-0.4,-0.9


In [121]:
df4.unstack()

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
1995,-2.0,2.3,-0.3,-1.5,0.2,0.0,1.2,-0.2,1.2,1.5,-0.4,1.5,-0.3,0.2,-1.0,0.4,-1.4,-0.9
2000,0.4,-1.7,-0.5,2.0,-1.2,-0.5,-0.0,-0.4,0.4,-1.3,0.1,0.8,0.3,-1.6,-0.6,-0.2,-0.4,-0.9


In [122]:
df4.unstack(1)

name,A,A,A,A,A,A,B,B,B,B,B,B,C,C,C,C,C,C
count,1,1,2,2,3,3,1,1,2,2,3,3,1,1,2,2,3,3
month,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May,Dec,May
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
1995,-2.0,2.3,-0.3,-1.5,0.2,0.0,1.2,-0.2,1.2,1.5,-0.4,1.5,-0.3,0.2,-1.0,0.4,-1.4,-0.9
2000,0.4,-1.7,-0.5,2.0,-1.2,-0.5,-0.0,-0.4,0.4,-1.3,0.1,0.8,0.3,-1.6,-0.6,-0.2,-0.4,-0.9


### 6. 열인덱스 레벨 해제 : stack()

: 열인덱스 -> 행인덱스로 변환

#### 데이터프레임에서 열인덱스 레벨 해제

- 지정한 열인덱스가 행인덱스의 마지막 레벨로 변환 추가됨
- single level 열인덱스를 갖는 경우 시리즈로 반환
- multi level 열인덱스를 갖는 경우 데이터프레임 반환

[형식] DataFrame.stack(level=- 1, dropna=True)

- level위치 또는 열이름 지정
- level : int, str, list, default= -1
- dropna : bool, default True

- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.stack.html

#### 예1. 1차 레벨을 가진 데이터프레임

In [123]:
df5 = pd.DataFrame([[0,1],[2,3]], 
                  index=['cat','dog'],
                  columns=['weight', 'height'])
df5

Unnamed: 0,weight,height
cat,0,1
dog,2,3


In [124]:
df5.unstack()

weight  cat    0
        dog    2
height  cat    1
        dog    3
dtype: int64

In [125]:
df5.stack()

cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

In [126]:
df5.stack(level=0)

cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

#### 예2. 다중레벨을 갖는 데이터프레임

In [127]:
df

upper,A,A,B,B
lower,C1,C2,C3,C4


In [128]:
df.stack()

Unnamed: 0_level_0,upper,A,B
Unnamed: 0_level_1,lower,Unnamed: 2_level_1,Unnamed: 3_level_1


In [129]:
df.stack(level=0)

Unnamed: 0_level_0,lower,C1,C2,C3,C4
Unnamed: 0_level_1,upper,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [130]:
df.stack(level=[0,1])

Series([], dtype: object)

#### 예3. 열인덱스 이름을 갖는 데이터프레임

In [131]:
df

upper,A,A,B,B
lower,C1,C2,C3,C4


In [132]:
multicol2 = pd.MultiIndex.from_tuples([('weight','kg'),
                                      ('height', 'm')])

df6 = pd.DataFrame([[1.0,2.0],[3.0,4.0]],
                  index=['cat','dog'],
                  columns=multicol2)
df6

Unnamed: 0_level_0,weight,height
Unnamed: 0_level_1,kg,m
cat,1.0,2.0
dog,3.0,4.0


In [133]:
df6.stack()

Unnamed: 0,Unnamed: 1,weight,height
cat,kg,1.0,
cat,m,,2.0
dog,kg,3.0,
dog,m,,4.0


### 7. 다중인덱스의 레벨 교환 : swaplevel()

[형식] DataFrame.swaplevel(i=- 2, j=- 1, axis=0)

- i, j : int or str
    - Levels of the indices to be swapped. Can pass level name as string.

- axis : 0 or ‘index’, 1 or ‘columns', default=0
    - The axis to swap levels on
    - 0 or‘index’ for row-wise
    - 1 or ‘columns’ for column-wise

- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.swaplevel.html

In [134]:
df = pd.DataFrame({"Grade": ["A", "B", "A", "C"]},
                  index=[
                      ["Final exam", "Final exam", "Coursework", "Coursework"],
                      ["History", "Geography", "History", "Geography"],
                      ["January", "February", "March", "April"],],)
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,History,January,A
Final exam,Geography,February,B
Coursework,History,March,A
Coursework,Geography,April,C


In [135]:
df.swaplevel()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
Final exam,January,History,A
Final exam,February,Geography,B
Coursework,March,History,A
Coursework,April,Geography,C


In [136]:
df.swaplevel(0,1)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Grade
History,Final exam,January,A
Geography,Final exam,February,B
History,Coursework,March,A
Geography,Coursework,April,C


### [정리] 다중인덱스의 접근 방법

- 인덱스가 하나가 아니므로 묶어서(튜플로) 전달
- 열 접근 : df[(튜플)]
- 행 접근 : df.loc[(튜플)]
- 참고. df.iloc[]은 정수위치로 접근하여 다중인덱스에 구애받지 않음

-----

### 8. 다중인덱스의 행/열 추가

In [149]:
data = np.round(np.random.rand(6, 4), 2)
columns = pd.MultiIndex.from_product([['A','B'],['C1','C2']],
                                   names=['cidx1','cidx2'])
index = pd.MultiIndex.from_product([['M','F'],['id1','id2','id3']],
                                    names=['ridx1','ridx2'])
df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.02,0.3,0.66,0.29
M,id2,0.62,0.43,0.14,0.3
M,id3,0.57,0.59,0.57,0.65
F,id1,0.65,0.43,0.9,0.37
F,id2,0.44,0.89,0.81,0.7
F,id3,0.1,0.92,0.71,1.0


In [141]:
df.loc[('F','id1'),('B','C1')]= 100
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.74,0.96,0.25,0.58
M,id2,0.59,0.57,0.22,0.95
M,id3,0.45,0.85,0.7,0.3
F,id1,0.81,0.4,100.0,0.58
F,id2,0.88,0.69,0.73,0.5
F,id3,0.96,0.64,0.42,0.61


#### 값입력을 위한 위치 지정 실수?

In [141]:
df.loc[('F','id1'),('B','C1')]= 100
df

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.74,0.96,0.25,0.58
M,id2,0.59,0.57,0.22,0.95
M,id3,0.45,0.85,0.7,0.3
F,id1,0.81,0.4,100.0,0.58
F,id2,0.88,0.69,0.73,0.5
F,id3,0.96,0.64,0.42,0.61


In [142]:
df.drop(columns=[('B','C1')])
df

Unnamed: 0_level_0,cidx1,A,A,B
Unnamed: 0_level_1,cidx2,C1,C2,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
M,id1,0.74,0.96,0.58
M,id2,0.59,0.57,0.95
M,id3,0.45,0.85,0.3
F,id1,0.81,0.4,0.58
F,id2,0.88,0.69,0.5
F,id3,0.96,0.64,0.61


Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.74,0.96,0.25,0.58
M,id2,0.59,0.57,0.22,0.95
M,id3,0.45,0.85,0.7,0.3
F,id1,0.81,0.4,100.0,0.58
F,id2,0.88,0.69,0.73,0.5
F,id3,0.96,0.64,0.42,0.61


#### 각 행의 총합을 마지막 열로 추가

In [143]:
df[('Row', 'Sum')] = df2.sum(axis=1)
df

Unnamed: 0_level_0,cidx1,A,A,B,B,Row
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,Sum
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M,id1,0.74,0.96,0.25,0.58,
M,id2,0.59,0.57,0.22,0.95,
M,id3,0.45,0.85,0.7,0.3,
F,id1,0.81,0.4,100.0,0.58,200.0
F,id2,0.88,0.69,0.73,0.5,
F,id3,0.96,0.64,0.42,0.61,


#### 각 열의 총합을 마지막 행으로 추가

In [144]:
df.loc[('Col','Sum')] = df.sum(axis=0)
df

Unnamed: 0_level_0,cidx1,A,A,B,B,Row,Sum
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,Sum,Unnamed: 7_level_1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
M,id1,0.74,0.96,0.25,0.58,,
M,id2,0.59,0.57,0.22,0.95,,
M,id3,0.45,0.85,0.7,0.3,,
F,id1,0.81,0.4,100.0,0.58,200.0,
F,id2,0.88,0.69,0.73,0.5,,
F,id3,0.96,0.64,0.42,0.61,,
Col,,,,,,,


In [148]:
df.loc[('Col','Sum'),:] = df.sum(axis=0)
df

Unnamed: 0_level_0,cidx1,A,A,B,B,Row,Sum
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2,Sum,Unnamed: 7_level_1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
M,id1,0.74,0.96,0.25,0.58,,
M,id2,0.59,0.57,0.22,0.95,,
M,id3,0.45,0.85,0.7,0.3,,
F,id1,0.81,0.4,100.0,0.58,200.0,
F,id2,0.88,0.69,0.73,0.5,,
F,id3,0.96,0.64,0.42,0.61,,
Col,,,,,,,
Col,Sum,13.29,12.33,306.96,10.56,600.0,0.0


### 9. 다중인덱스 정렬

### 1) sort_index()

[형식] sort_index(*, axis=0, level=None, ascending=True, inplace=False,)

- 행/열 인덱스 기준으로 정렬
- 기본 정렬 방식 : 오름차순 정렬
- 내림차순 : ascending=Flase 설정

####  행인덱스 정렬

In [150]:
df.sort_index()
df.sort_index(ascending=True)

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.65,0.43,0.9,0.37
F,id2,0.44,0.89,0.81,0.7
F,id3,0.1,0.92,0.71,1.0
M,id1,0.02,0.3,0.66,0.29
M,id2,0.62,0.43,0.14,0.3
M,id3,0.57,0.59,0.57,0.65


Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.65,0.43,0.9,0.37
F,id2,0.44,0.89,0.81,0.7
F,id3,0.1,0.92,0.71,1.0
M,id1,0.02,0.3,0.66,0.29
M,id2,0.62,0.43,0.14,0.3
M,id3,0.57,0.59,0.57,0.65


In [152]:
df.sort_index(level=1)
df.sort_index(level=1, ascending=True)

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.65,0.43,0.9,0.37
M,id1,0.02,0.3,0.66,0.29
F,id2,0.44,0.89,0.81,0.7
M,id2,0.62,0.43,0.14,0.3
F,id3,0.1,0.92,0.71,1.0
M,id3,0.57,0.59,0.57,0.65


Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
F,id1,0.65,0.43,0.9,0.37
M,id1,0.02,0.3,0.66,0.29
F,id2,0.44,0.89,0.81,0.7
M,id2,0.62,0.43,0.14,0.3
F,id3,0.1,0.92,0.71,1.0
M,id3,0.57,0.59,0.57,0.65


#### 열인덱스 기준으로 정렬

In [155]:
df
df.sort_index(axis=1, ascending=False)

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.02,0.3,0.66,0.29
M,id2,0.62,0.43,0.14,0.3
M,id3,0.57,0.59,0.57,0.65
F,id1,0.65,0.43,0.9,0.37
F,id2,0.44,0.89,0.81,0.7
F,id3,0.1,0.92,0.71,1.0


Unnamed: 0_level_0,cidx1,B,B,A,A
Unnamed: 0_level_1,cidx2,C2,C1,C2,C1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.29,0.66,0.3,0.02
M,id2,0.3,0.14,0.43,0.62
M,id3,0.65,0.57,0.59,0.57
F,id1,0.37,0.9,0.43,0.65
F,id2,0.7,0.81,0.89,0.44
F,id3,1.0,0.71,0.92,0.1


In [156]:
df.sort_index(axis=1, level=1, ascending=False)

Unnamed: 0_level_0,cidx1,B,A,B,A
Unnamed: 0_level_1,cidx2,C2,C2,C1,C1
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.29,0.3,0.66,0.02
M,id2,0.3,0.43,0.14,0.62
M,id3,0.65,0.59,0.57,0.57
F,id1,0.37,0.43,0.9,0.65
F,id2,0.7,0.89,0.81,0.44
F,id3,1.0,0.92,0.71,0.1


In [157]:
df.sort_index(axis=1, level=1)

Unnamed: 0_level_0,cidx1,A,B,A,B
Unnamed: 0_level_1,cidx2,C1,C1,C2,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.02,0.66,0.3,0.29
M,id2,0.62,0.14,0.43,0.3
M,id3,0.57,0.57,0.59,0.65
F,id1,0.65,0.9,0.43,0.37
F,id2,0.44,0.81,0.89,0.7
F,id3,0.1,0.71,0.92,1.0


### 2) sort_values()

[형식] ort_values(by, *, axis=0, ascending=True, inplace=False, )

- 특정 컬럼 값을 기준으로 정렬
- by = 특정컬럼
    - 특정컬럼이 다중인덱스 일 경우 컬럼명을 튜플로 전달

#### df의 A.C1 컬럼을 기준으로 정렬

In [158]:
df.sort_values(by=('A','C1')) #A의 C1을 기준으로 오름차순 정렬

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.02,0.3,0.66,0.29
F,id3,0.1,0.92,0.71,1.0
F,id2,0.44,0.89,0.81,0.7
M,id3,0.57,0.59,0.57,0.65
M,id2,0.62,0.43,0.14,0.3
F,id1,0.65,0.43,0.9,0.37


In [161]:
df.sort_values(by=[('A','C1'),('B','C2')]) #A의 C1, B의 C2 기준으로 오름차순 정렬

Unnamed: 0_level_0,cidx1,A,A,B,B
Unnamed: 0_level_1,cidx2,C1,C2,C1,C2
ridx1,ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id1,0.02,0.3,0.66,0.29
F,id3,0.1,0.92,0.71,1.0
F,id2,0.44,0.89,0.81,0.7
M,id3,0.57,0.59,0.57,0.65
M,id2,0.62,0.43,0.14,0.3
F,id1,0.65,0.43,0.9,0.37


-----------