In [1]:
import numpy as np
import pandas as pd

# 단일 계층 인덱스 

In [2]:
s = pd.Series(data=np.arange(1, 7), 
              index=['a', 'b', 'c'] * 2)
s

a    1
b    2
c    3
a    4
b    5
c    6
dtype: int64

In [3]:
s.index  # pd.Series 객체의 index 속성 

Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object')

In [5]:
s.index.nlevels  # Pd.Index 클래스가 가지고 있는 객체의 nlevels 속성 

1

In [6]:
# pd.Series 또는 pd.DataFrame의 인덱스는 중복된 값이 있을 수 있음 
s.loc['a']

a    1
a    4
dtype: int64

# 계층적 색인(Hierarchical Index) 

In [7]:
s = pd.Series(data=np.arange(10, 70, 10), 
              index=[['a', 'a', 'b', 'b', 'c', 'c'],
                     [1, 2, 3, 1, 2, 3]])
s

a  1    10
   2    20
b  3    30
   1    40
c  2    50
   3    60
dtype: int64

In [8]:
s.index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 3),
            ('b', 1),
            ('c', 2),
            ('c', 3)],
           )

In [9]:
s.index.nlevels

2

* nlevels의 값이 2 이상인 경우 loc 사용 방법: 
    * 첫번재 레벨의 인덱스만 가지고 인덱싱 가능
    * 두번째 레벨의 인덱스만으로는 인덱싱 불가능!
    * 다중 레벨 인덱스는 튜플 형태로 인덱싱 사용

In [16]:
s.loc['a']

1    10
2    20
dtype: int64

In [None]:
# s.loc[1]
#> KeyError 

In [17]:
s.loc[('a', 1)]

10

In [69]:
s

a  1    10
   2    20
b  3    30
   1    40
c  2    50
   3    60
dtype: int64

In [70]:
s.loc['a':'b']  # slicing

a  1    10
   2    20
b  3    30
   1    40
dtype: int64

In [71]:
s.loc['b':'c']

b  3    30
   1    40
c  2    50
   3    60
dtype: int64

In [None]:
# s.loc[('a', 1):('b', 3)]
#> UnsortedIndexError 

In [74]:
s.loc[[('a', 1), ('b', 1)]]   # fancy indexing: 리스트를 이용한 인덱싱 

a  1    10
b  1    40
dtype: int64

In [75]:
# swaplevel() 메서드: 인덱스의 레벨을 바꿔줌 
s

a  1    10
   2    20
b  3    30
   1    40
c  2    50
   3    60
dtype: int64

In [76]:
s.swaplevel()

1  a    10
2  a    20
3  b    30
1  b    40
2  c    50
3  c    60
dtype: int64

In [77]:
s.swaplevel().loc[1]

a    10
b    40
dtype: int64

In [78]:
s

a  1    10
   2    20
b  3    30
   1    40
c  2    50
   3    60
dtype: int64

In [83]:
# sort_index():인덱스를 정렬해 줌
s.swaplevel().sort_index()

1  a    10
   b    40
2  a    20
   c    50
3  b    30
   c    60
dtype: int64

In [84]:
s.swaplevel().sort_index().loc[1:2]
# chain call(연쇄 호출)

1  a    10
   b    40
2  a    20
   c    50
dtype: int64

# DataFrame의 Multi-index

* row multi-index

In [20]:
df = pd.DataFrame(data=np.arange(1, 19).reshape((6, 3)),
                  columns=['A', 'B', 'C'],                      # 컬럼 인덱스(이름_)
                  index=[['a', 'a', 'b', 'b', 'c', 'c'],        # index=행 인덱스 
                         ['a1', 'a2', 'b1', 'b2', 'c1', 'c2']])
df

Unnamed: 0,Unnamed: 1,A,B,C
a,a1,1,2,3
a,a2,4,5,6
b,b1,7,8,9
b,b2,10,11,12
c,c1,13,14,15
c,c2,16,17,18


In [21]:
df.index   # pd.DataFrame 객체의 index 속성: row index 

MultiIndex([('a', 'a1'),
            ('a', 'a2'),
            ('b', 'b1'),
            ('b', 'b2'),
            ('c', 'c1'),
            ('c', 'c2')],
           )

In [None]:
df.columns # pd.DataFrame 객체의 columns 속성: column index 

In [22]:
df.index.nlevels  # pd.Index 객체의 nlevels 속성: 인덱스 레벨의 개수

2

In [23]:
df.columns.nlevels 

1

In [24]:
df.loc['a']   # indexing: row 선택

Unnamed: 0,A,B,C
a1,1,2,3
a2,4,5,6


In [25]:
df.loc['a':'b']  # slicing

Unnamed: 0,Unnamed: 1,A,B,C
a,a1,1,2,3
a,a2,4,5,6
b,b1,7,8,9
b,b2,10,11,12


In [None]:
df.loc[('a', 'a1')]

In [29]:
df.swaplevel().loc['a1']  # indexing

Unnamed: 0,A,B,C
a,1,2,3


In [28]:
df.swaplevel().loc['a1':'b1']   # slicing

Unnamed: 0,Unnamed: 1,A,B,C
a1,a,1,2,3
a2,a,4,5,6
b1,b,7,8,9


In [31]:
df.loc[['a', 'c']]  # list indexing - fancy indexing

Unnamed: 0,Unnamed: 1,A,B,C
a,a1,1,2,3
a,a2,4,5,6
c,c1,13,14,15
c,c2,16,17,18


In [32]:
df.swaplevel().loc[['a1', 'b1']]

Unnamed: 0,Unnamed: 1,A,B,C
a1,a,1,2,3
b1,b,7,8,9


* row/column multi-index 

In [34]:
df = pd.DataFrame(data=np.arange(1, 19).reshape(6, 3), 
                  columns=[['a', 'a', 'b'], ['v1', 'v2', 'v1']],
                  index=[['A', 'A', 'B', 'B', 'C', 'C'], 
                         ['a1', 'a2'] * 3])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,v1,v2,v1
A,a1,1,2,3
A,a2,4,5,6
B,a1,7,8,9
B,a2,10,11,12
C,a1,13,14,15
C,a2,16,17,18


In [35]:
df.index

MultiIndex([('A', 'a1'),
            ('A', 'a2'),
            ('B', 'a1'),
            ('B', 'a2'),
            ('C', 'a1'),
            ('C', 'a2')],
           )

In [36]:
df.columns

MultiIndex([('a', 'v1'),
            ('a', 'v2'),
            ('b', 'v1')],
           )

In [37]:
df.index.nlevels

2

In [38]:
df.columns.nlevels

2

In [39]:
df.loc['A']

Unnamed: 0_level_0,a,a,b
Unnamed: 0_level_1,v1,v2,v1
a1,1,2,3
a2,4,5,6


In [41]:
# df.loc['a1']
df.swaplevel().loc['a1']

Unnamed: 0_level_0,a,a,b
Unnamed: 0_level_1,v1,v2,v1
A,1,2,3
B,7,8,9
C,13,14,15


In [42]:
# DataFrame에서 컬럼 선택
df['a']

Unnamed: 0,Unnamed: 1,v1,v2
A,a1,1,2
A,a2,4,5
B,a1,7,8
B,a2,10,11
C,a1,13,14
C,a2,16,17


In [46]:
# df['V1']
# 두번째 레벨의 컬럼 이름으로는 indexing할 수 없음! 
df.swaplevel(axis=1)['v1']

Unnamed: 0,Unnamed: 1,a,b
A,a1,1,3
A,a2,4,6
B,a1,7,9
B,a2,10,12
C,a1,13,15
C,a2,16,18


# DataFrame column <==> row index 

`pandas.DataFrame.set_index()` 메서드:
* DataFrame의 column(들)을 row index로 변환한 DataFrame을 리턴
* keys 파라미터: row index로 변환할 컬럼 이름(들)

`pandas.DataFrame.reset_index()`메서드:
* DataFra의 row index(들)을 column으로 변환한 DataFrame을 리턴


In [48]:
np.random.seed(1)

score = pd.DataFrame(data={
    'class': [1] * 5 + [2] * 5,
    'id': [1, 2, 3, 4, 5] * 2,
    'korean': np.random.randint(0, 101, 10),
    'science': np.random.randint(0, 101, 10),
    'math': np.random.randint(0, 101, 10)
})
score

Unnamed: 0,class,id,korean,science,math
0,1,1,37,76,29
1,1,2,12,71,14
2,1,3,72,6,50
3,1,4,9,25,68
4,1,5,75,50,87
5,2,1,5,20,87
6,2,2,79,18,94
7,2,3,64,84,96
8,2,4,16,11,86
9,2,5,1,28,13


In [49]:
score.shape

(10, 5)

In [50]:
# score에서 class=1인 자료 선택
score[score['class'] == 1]

Unnamed: 0,class,id,korean,science,math
0,1,1,37,76,29
1,1,2,12,71,14
2,1,3,72,6,50
3,1,4,9,25,68
4,1,5,75,50,87


In [52]:
# class 컬럼을 row index로 변환
df2 = score.set_index(keys='class')
df2.shape

(10, 4)

In [53]:
df2

Unnamed: 0_level_0,id,korean,science,math
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,37,76,29
1,2,12,71,14
1,3,72,6,50
1,4,9,25,68
1,5,75,50,87
2,1,5,20,87
2,2,79,18,94
2,3,64,84,96
2,4,16,11,86
2,5,1,28,13


In [54]:
df2.loc[1]

Unnamed: 0_level_0,id,korean,science,math
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,37,76,29
1,2,12,71,14
1,3,72,6,50
1,4,9,25,68
1,5,75,50,87


In [56]:
df2.reset_index()

Unnamed: 0,class,id,korean,science,math
0,1,1,37,76,29
1,1,2,12,71,14
2,1,3,72,6,50
3,1,4,9,25,68
4,1,5,75,50,87
5,2,1,5,20,87
6,2,2,79,18,94
7,2,3,64,84,96
8,2,4,16,11,86
9,2,5,1,28,13


In [57]:
df3 = score.set_index(keys='id')
df3

Unnamed: 0_level_0,class,korean,science,math
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,37,76,29
2,1,12,71,14
3,1,72,6,50
4,1,9,25,68
5,1,75,50,87
1,2,5,20,87
2,2,79,18,94
3,2,64,84,96
4,2,16,11,86
5,2,1,28,13


In [58]:
df3.reset_index()

Unnamed: 0,id,class,korean,science,math
0,1,1,37,76,29
1,2,1,12,71,14
2,3,1,72,6,50
3,4,1,9,25,68
4,5,1,75,50,87
5,1,2,5,20,87
6,2,2,79,18,94
7,3,2,64,84,96
8,4,2,16,11,86
9,5,2,1,28,13


In [59]:
df4 = score.set_index(keys=['class', 'id'])
df4

Unnamed: 0_level_0,Unnamed: 1_level_0,korean,science,math
class,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,37,76,29
1,2,12,71,14
1,3,72,6,50
1,4,9,25,68
1,5,75,50,87
2,1,5,20,87
2,2,79,18,94
2,3,64,84,96
2,4,16,11,86
2,5,1,28,13


In [61]:
 df4.reset_index()  # multi-index 모두를 column으로 변환

Unnamed: 0,class,id,korean,science,math
0,1,1,37,76,29
1,1,2,12,71,14
2,1,3,72,6,50
3,1,4,9,25,68
4,1,5,75,50,87
5,2,1,5,20,87
6,2,2,79,18,94
7,2,3,64,84,96
8,2,4,16,11,86
9,2,5,1,28,13


In [66]:
df4.reset_index(level=1)

Unnamed: 0_level_0,id,korean,science,math
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,37,76,29
1,2,12,71,14
1,3,72,6,50
1,4,9,25,68
1,5,75,50,87
2,1,5,20,87
2,2,79,18,94
2,3,64,84,96
2,4,16,11,86
2,5,1,28,13


In [68]:
df4.reset_index(level='id')

Unnamed: 0_level_0,id,korean,science,math
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,37,76,29
1,2,12,71,14
1,3,72,6,50
1,4,9,25,68
1,5,75,50,87
2,1,5,20,87
2,2,79,18,94
2,3,64,84,96
2,4,16,11,86
2,5,1,28,13
