# Random Shuffling

## suffling a vector

In [1]:
import numpy as np
a = np.arange(10)
np.random.shuffle(a) # return 값이 없음. 
print(a) # 이렇게 두 줄로 만들어주기 


[0 1 2 3 4 5 6 7 8 9]
[9 6 2 3 7 4 5 8 1 0]


## shuffling a matrix

In [3]:
import numpy as np
M = np.arange(15).reshape(5,-1)
print(M)

np.random.shuffle(M) # 가장 바깥쪽 차원부터 shuffling, 여기서는 row
print(M)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]
[[ 3  4  5]
 [12 13 14]
 [ 9 10 11]
 [ 6  7  8]
 [ 0  1  2]]


In [5]:
import numpy as np
# row-wise
M = np.arange(15).reshape(5,-1)
print(M)

# column-wise로 shuffling을 해주고 싶을 때
N = np.arange(15).reshape(5,-1)
print(N)
N = N.T  # transpose
np.random.shuffle(N) # shuffling
N = N.T # transpose again
print(N)


[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]
[[ 0  2  1]
 [ 3  5  4]
 [ 6  8  7]
 [ 9 11 10]
 [12 14 13]]


## shuffling a matrix

In [17]:
import numpy as np
from termcolor import colored
np.set_printoptions(precision=2)

N = 10
a = np.linspace(0,10,N)
b = np.linspace(0,100,N)
print(colored('before shuffling', 'green'))
print(a)
print(b)

indices = np.arange(N)
np.random.shuffle(indices)
a_shuffle = a[indices] # 같은 위치로 random shuffling
b_shuffle = b[indices] # 같은 위치로 random shuffling

print(colored('after shuffling', 'red')) 
print(a_shuffle)
print(b_shuffle)

[32mbefore shuffling[0m
[ 0.    1.11  2.22  3.33  4.44  5.56  6.67  7.78  8.89 10.  ]
[  0.    11.11  22.22  33.33  44.44  55.56  66.67  77.78  88.89 100.  ]
[31mafter shuffling[0m
[ 5.56  3.33  0.    8.89 10.    4.44  2.22  1.11  7.78  6.67]
[ 55.56  33.33   0.    88.89 100.    44.44  22.22  11.11  77.78  66.67]


## shuffling X,Y data

In [25]:
import numpy as np
from termcolor import colored

N, n_feature = 5, 6
n_class = 100

x_data = np.random.normal(0,1,(N, n_feature))
y_data = np.random.randint(0, n_class, N)
print(colored('before shuffling', 'green'))
print(x_data.shape,'\n', x_data)
print(y_data.shape, '\n', y_data)

indices=np.arange(N)
np.random.shuffle(indices)
x_data = x_data[indices]
y_data = y_data[indices]

print(colored('after shuffling', 'red'))
print(x_data.shape,'\n', x_data)
print(y_data.shape, '\n', y_data)

[32mbefore shuffling[0m
(5, 6) 
 [[ 0.97 -0.5   2.01  0.64 -2.09 -0.03]
 [-0.86 -0.75  0.01 -0.78 -1.03  1.69]
 [ 1.02  0.7  -0.64 -0.72  0.4  -1.32]
 [ 0.57  0.28  1.56 -1.21 -1.36  0.59]
 [ 0.67 -0.2   0.8  -1.46  0.07 -0.54]]
(5,) 
 [45 27 91 30 65]
[31mafter shuffling[0m
(5, 6) 
 [[ 0.57  0.28  1.56 -1.21 -1.36  0.59]
 [ 0.67 -0.2   0.8  -1.46  0.07 -0.54]
 [-0.86 -0.75  0.01 -0.78 -1.03  1.69]
 [ 1.02  0.7  -0.64 -0.72  0.4  -1.32]
 [ 0.97 -0.5   2.01  0.64 -2.09 -0.03]]
(5,) 
 [30 65 27 91 45]


## shuffling and getting minibatches

In [27]:
import numpy as np

N, n_feature = 200, 5
n_class = 100
batch_size = 32
iters = 8

x_data = np.random.normal(0,1,(N, n_feature))
y_data = np.random.randint(0, n_class, N)

for iter_ in range(iters):
    indices = np.arange(N)
    np.random.shuffle(indices)
    batch_indices = indices[:batch_size] # 200개 중에서 앞에 32개만 가져옴
    
    x_batch = x_data[batch_indices]
    y_batch = y_data[batch_indices]
    
    print('iter : ', iter_)
    print('X/Y : ', x_batch.shape, y_batch.shape)
    
    
"""
indices를 반복문 돌릴때마다 가져옴
중복 가능
"""

iter :  0
X/Y :  (32, 5) (32,)
iter :  1
X/Y :  (32, 5) (32,)
iter :  2
X/Y :  (32, 5) (32,)
iter :  3
X/Y :  (32, 5) (32,)
iter :  4
X/Y :  (32, 5) (32,)
iter :  5
X/Y :  (32, 5) (32,)
iter :  6
X/Y :  (32, 5) (32,)
iter :  7
X/Y :  (32, 5) (32,)


In [28]:
import numpy as np

N, n_feature = 200, 5
n_class = 100
batch_size = 32
iters = 8

x_data = np.random.normal(0,1,(N, n_feature))
y_data = np.random.randint(0, n_class, N)


indices = np.arange(N)
np.random.shuffle(indices)
for iter_ in range(iters):
    batch_indices = indices[iter_*batch_size : (iter_+1)*batch_size]
    
    x_batch = x_data[batch_indices]
    y_batch = y_data[batch_indices]
    
    print('iter : ', iter_)
    print('X/Y : ', x_batch.shape, y_batch.shape)
    
"""
달라진 점: 하나의 mini batch씩 가져옴 
무조건 데이터 셋을 다 보게 됨. 
중복 x
"""

iter :  0
X/Y :  (32, 5) (32,)
iter :  1
X/Y :  (32, 5) (32,)
iter :  2
X/Y :  (32, 5) (32,)
iter :  3
X/Y :  (32, 5) (32,)
iter :  4
X/Y :  (32, 5) (32,)
iter :  5
X/Y :  (32, 5) (32,)
iter :  6
X/Y :  (8, 5) (8,)
iter :  7
X/Y :  (0, 5) (0,)


# One-hot Encoding/Decoding

## One-hot Encoding with one sample

In [48]:
"""label 자체가 인덱스가 됨"""
import numpy as np
n_class = 5

for _ in range(5):
    label = np.random.randint(0,n_class, 1)
    print(f"before one hot encoding : {label}")
    
    label_enc = np.zeros(n_class)
    label_enc[label] = 1 # label 자체가 index가 됨. 
    print(f'after encoding :\n{label_enc}')

before one hot encoding : [4]
after encoding :
[0. 0. 0. 0. 1.]
before one hot encoding : [2]
after encoding :
[0. 0. 1. 0. 0.]
before one hot encoding : [1]
after encoding :
[0. 1. 0. 0. 0.]
before one hot encoding : [1]
after encoding :
[0. 1. 0. 0. 0.]
before one hot encoding : [4]
after encoding :
[0. 0. 0. 0. 1.]


## one-hot decoding with one sample

In [68]:
import numpy as np
n_class = 5

label = np.random.randint(0, n_class, 1)
label_enc = np.zeros((n_class, ))
label_enc[label] = 1

print(f"before one-hot decoding : {label_enc}")

# method.1
label = np.nonzero(label_enc)[0] # 내가 가진 인덱스를 다시 찾음.
print(f"label(method.1) : {label}")

# method.2
label = np.where(label_enc == 1)[0] # 1인 부분을 찾기
print(f"label(method.2) : {label}")

# method.3
label = np.argmax(label_enc).flatten() # flatten 하면 vector로 변환 가능. 
print(f"label(method3) : {label}")

before one-hot decoding : [0. 0. 0. 0. 1.]
label(method.1) : [4]
label(method.2) : [4]
label(method3) : [4]


## one-hot encoding with n samples

***선생님 설명***

In [107]:
import numpy as np
import warnings
warnings.filterwarnings('ignore')
N, n_class = 10, 5

labels = np.random.randint(0, n_class, (N, ))
print(f"before one hot encoding : {labels}")

"""
0을 미리 깔아놓고
인덱싱을 통해 위치를 찾아서 1을 깔아줌.
"""
labels_enc = np.zeros((N, n_class), dtype=np.int)
labels_enc[np.arange(N), labels] = 1 # for loop 필요 없음. 
labels_enc

before one hot encoding : [2 4 1 2 4 4 2 1 1 1]


array([[0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0]])

In [119]:
import numpy as np
import warnings
warnings.filterwarnings('ignore')
N, n_class = 10, 5

labels = np.random.randint(0, n_class, (N, ))
labels_enc = np.zeros((N, n_class), dtype=np.int)
labels_enc[np.arange(N), labels] = 1 # for loop 필요 없음. 
print(f'before decoding :\n{labels_enc}')

# method.1
labels = np.nonzero(labels_enc)[1]
print(f"labels(method.1) : {labels}")

# method.2
labels = np.where(labels_enc == 1)[1]
print(f'labels(method.2) : {labels}')

# method.3
labels = np.argmax(labels_enc, axis=1)
print(f"labels(method.3) : {labels}")

before decoding :
[[0 1 0 0 0]
 [1 0 0 0 0]
 [0 0 1 0 0]
 [1 0 0 0 0]
 [0 0 0 0 1]
 [0 1 0 0 0]
 [0 0 0 0 1]
 [0 0 1 0 0]
 [1 0 0 0 0]
 [0 0 1 0 0]]
labels(method.1) : [1 0 2 0 4 1 4 2 0 2]
labels(method.2) : [1 0 2 0 4 1 4 2 0 2]
labels(method.3) : [1 0 2 0 4 1 4 2 0 2]


In [None]:
!git init
!git add .
!git commit -m "0413 numpy exercise part 3 added"
!git pus