##### 데이터프레임 인덱스 조작

In [1]:
import numpy as np
import pandas as pd

##### 인덱스 설정 및 제거

In [2]:
list('ABCD')

['A', 'B', 'C', 'D']

In [3]:
# random.randint(low, high=None, size=None, dtype=int)
np.random.seed(123)

low, high = 3, 10

sample_10 = np.random.randint(low=low, high=high, size=10)
print(sample_10)

[9 8 9 5 7 5 9 4 6 5]


In [4]:
# random.randn(d0, d1, ..., dn) : standard normal dist에서 원하는 차원의 난수들을 생성
sample_2_5 = np.random.randn(2,5)
print(sample_2_5)
print(np.round(sample_2_5, 2))

[[ 0.32210607 -0.05151772 -0.20420096  1.97934843 -1.61930007]
 [-1.11396442 -0.44744072  1.66840161 -0.14337247 -0.6191909 ]]
[[ 0.32 -0.05 -0.2   1.98 -1.62]
 [-1.11 -0.45  1.67 -0.14 -0.62]]


In [5]:
# random.rand(d0, d1, ..., dn) : [0, 1)구간의 uniform dist에서 원하는 차원의 난수들을 생성
sample_3_5 = np.random.rand(3,5)
print(sample_3_5)
print(np.round(sample_3_5, 2))

[[0.73799541 0.18249173 0.17545176 0.53155137 0.53182759]
 [0.63440096 0.84943179 0.72445532 0.61102351 0.72244338]
 [0.32295891 0.36178866 0.22826323 0.29371405 0.63097612]]
[[0.74 0.18 0.18 0.53 0.53]
 [0.63 0.85 0.72 0.61 0.72]
 [0.32 0.36 0.23 0.29 0.63]]


In [6]:
# random.shuffle(x) : modify a sequence in-place by shuffling its contents
np.random.seed(123)
x = [2,9,1,0,8,6,3,7,5,4]
np.random.shuffle(x)
print(x)

[8, 2, 7, 6, 5, 0, 9, 3, 4, 1]


In [7]:
# random.choice(a, size=None, replace=True, p=None)
np.random.seed(123)

a = [2,6,0,12,20]
p = [0.2, 0.2, 0.15, 0.25, 0.2]

sample_3_no_repl = np.random.choice(a=a, size=4, replace=False, p=p)
print(sample_3_no_repl)

sample_8_repl = np.random.choice(a=a, size=9, replace=True, p=p)
print(sample_8_repl)

[12  6 20  0]
[20 12  0  6  6 12  0  2  6]


In [8]:
# random.permutation(x) : randomly permutes a sequence
np.random.seed(123)
x = [2,9,1,0,8,6,3,7,5,4]
perm_x = np.random.permutation(x)
print(perm_x)

[8 2 7 6 5 0 9 3 4 1]


In [9]:
# numpy.unique(ar)
print(np.unique([5,5,4,4,7,7]))
print()

ar = np.array([[1, 0, 0, 1], [1, 0, 0, 1], [2, 3, 4, 2]])
print(ar)
print()
print(np.unique(ar, axis=0))
print()
print(np.unique(ar, axis=1))

[4 5 7]

[[1 0 0 1]
 [1 0 0 1]
 [2 3 4 2]]

[[1 0 0 1]
 [2 3 4 2]]

[[0 0 1]
 [0 0 1]
 [3 4 2]]


##### np.random에는 np.random.noraml, standard_t, poisson, exponential, chisquare, uniform, ...등이 있다.

##### 인덱스 설정 및 제거

In [10]:
np.vstack([list('ABCDE'), np.round(np.random.rand(3,5),2)])

array([['A', 'B', 'C', 'D', 'E'],
       ['0.41', '0.58', '0.14', '0.4', '0.63'],
       ['0.32', '0.24', '0.69', '0.59', '0.63'],
       ['0.44', '0.08', '0.71', '0.43', '0.3']], dtype='<U32')

In [13]:
np.random.seed(2021)
df1 = pd.DataFrame(data=np.vstack([list('ABCDE'), np.round(np.random.rand(3,5),2)]).T, 
                   columns=['C1','C2','C3','C4'])
df1

Unnamed: 0,C1,C2,C3,C4
0,A,0.61,0.13,0.1
1,B,0.73,0.18,0.06
2,C,0.14,0.75,0.96
3,D,0.31,0.66,0.62
4,E,1.0,0.78,0.09


In [14]:
df2 = df1.set_index('C1')
df2

Unnamed: 0_level_0,C2,C3,C4
C1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0.61,0.13,0.1
B,0.73,0.18,0.06
C,0.14,0.75,0.96
D,0.31,0.66,0.62
E,1.0,0.78,0.09


In [15]:
df2.reset_index(inplace=True)
df2

Unnamed: 0,C1,C2,C3,C4
0,A,0.61,0.13,0.1
1,B,0.73,0.18,0.06
2,C,0.14,0.75,0.96
3,D,0.31,0.66,0.62
4,E,1.0,0.78,0.09


##### 연습문제 4.5.1

In [32]:
np.random.seed(2021)
data = np.random.randint(40, 100, size=(5,3))
df_score1 = pd.DataFrame(data, columns=['국어','영어','수학'])
df_score1

Unnamed: 0,국어,영어,수학
0,92,61,97
1,40,85,70
2,62,84,67
3,69,61,69
4,64,52,94


In [33]:
df_score1.index = ['Rubin','Kiwon','jiyoung','Seungock','Julie']
df_score1.index.name = "이름";
df_score1

Unnamed: 0_level_0,국어,영어,수학
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rubin,92,61,97
Kiwon,40,85,70
jiyoung,62,84,67
Seungock,69,61,69
Julie,64,52,94


In [34]:
df_score2 = df_score1.reset_index(inplace=False)
df_score2

Unnamed: 0,이름,국어,영어,수학
0,Rubin,92,61,97
1,Kiwon,40,85,70
2,jiyoung,62,84,67
3,Seungock,69,61,69
4,Julie,64,52,94


In [35]:
# df_score2 = df_score2.reset_index('이름')
df_score2.set_index('이름', inplace=True)
df_score2

Unnamed: 0_level_0,국어,영어,수학
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rubin,92,61,97
Kiwon,40,85,70
jiyoung,62,84,67
Seungock,69,61,69
Julie,64,52,94


##### 다중 인덱스

In [36]:
np.random.seed(2021)
df4 = pd.DataFrame(data=np.round(np.random.randn(6,4),2),
                   columns=[['A','A','B','B'],
                            ['C','D','C','D']],
                   index=[['M','M','M','F','F','F'],
                          ['id_'+str(i+1) for i in range(3)]*2])
df4.columns.names = ['Cidx1','Cidx2']
df4.index.names = ['Ridx1','Ridx2']
df4

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C,D,C,D
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,1.49,0.68,-0.42,-0.81
M,id_2,0.56,-0.71,1.13,0.65
M,id_3,0.11,0.42,0.12,-0.84
F,id_1,0.41,0.1,-1.91,1.1
F,id_2,-1.4,-0.23,-1.34,0.3
F,id_3,-0.72,2.54,1.32,0.07
