# 데이터프레임의 인데스 조작

## 인덱스 설정 및 제거

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 인데스 설정 및 제거

In [5]:
np.random.seed(2021)
np.random.seed(0)
df1 = pd.DataFrame(np.vstack([list('ABCDE'), np.round(np.random.rand(3, 5), 2)]).T, columns=["C1", "C2", "C3", "C4"])
df1

Unnamed: 0,C1,C2,C3,C4
0,A,0.55,0.65,0.79
1,B,0.72,0.44,0.53
2,C,0.6,0.89,0.57
3,D,0.54,0.96,0.93
4,E,0.42,0.38,0.07


In [9]:
df2 = df1.set_index('C1', inplace=False)
df2

Unnamed: 0_level_0,C2,C3,C4
C1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0.55,0.65,0.79
B,0.72,0.44,0.53
C,0.6,0.89,0.57
D,0.54,0.96,0.93
E,0.42,0.38,0.07


In [10]:
df2.reset_index(drop=False, inplace=True)
df2

Unnamed: 0,C1,C2,C3,C4
0,A,0.55,0.65,0.79
1,B,0.72,0.44,0.53
2,C,0.6,0.89,0.57
3,D,0.54,0.96,0.93
4,E,0.42,0.38,0.07


5명의 학생의 국어, 영어, 수학 점수를 나타내는 데이터프레임을 다음과 같이 만든다.

학생 이름을 나타내는 열을 포함시키지 않고 데이터프레임 df_score1 을 생성한 후, df_score1.index 속성에 학생 이름을 나타내는 열을 지정하여 인덱스를 지정한다. reset_index 명령으로 이 인덱스 열을 명령으로 일반 데이터열로 바꾸여 데이터프레임 df_score2을 만든다.

학생 이름을 나타내는 열이 일반 데이터 열을 포함하는 데이터프레임 df_score2에 set_index 명령을 적용하여 다시 학생 이름을 나타내는 열을 인덱스로 변경한다.

In [11]:
np.random.seed(2021)
data = np.random.randint(low=40, high=100, size=(5, 3))
df_score1 = pd.DataFrame(data=data, columns=['국어','영어','수학'])
df_score1

Unnamed: 0,국어,영어,수학
0,92,61,97
1,40,85,70
2,62,84,67
3,69,61,69
4,64,52,94


In [12]:
df_score1.index = ['Ailee', 'Elizabeth', 'James', 'Maria', 'Bryan']
df_score1

Unnamed: 0,국어,영어,수학
Ailee,92,61,97
Elizabeth,40,85,70
James,62,84,67
Maria,69,61,69
Bryan,64,52,94


In [14]:
df_score2 = df_score1.reset_index().rename(columns={'index':'이름'})
df_score2

Unnamed: 0,이름,국어,영어,수학
0,Ailee,92,61,97
1,Elizabeth,40,85,70
2,James,62,84,67
3,Maria,69,61,69
4,Bryan,64,52,94


In [15]:
df_score2.set_index('이름', inplace=True)
df_score2

Unnamed: 0_level_0,국어,영어,수학
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ailee,92,61,97
Elizabeth,40,85,70
James,62,84,67
Maria,69,61,69
Bryan,64,52,94


## 다중 인덱스

In [17]:
np.random.seed(0)
df4 = pd.DataFrame(np.round(np.random.randn(6, 4), 2),
                   columns=[["A", "A", "B", "B"],
                            ["C", "D", "C", "D"]],
                   index=[["M", "M", "M", "F", "F", "F"],
                          ["id_" + str(i + 1) for i in range(3)] * 2])
df4.columns.names = ["Cidx1", "Cidx2"]
df4.index.names = ["Ridx1", "Ridx2"]
df4

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C,D,C,D
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,1.76,0.4,0.98,2.24
M,id_2,1.87,-0.98,0.95,-0.15
M,id_3,-0.1,0.41,0.14,1.45
F,id_1,0.76,0.12,0.44,0.33
F,id_2,1.49,-0.21,0.31,-0.85
F,id_3,-2.55,0.65,0.86,-0.74
