# 넘파이 API - ndarray의 차원 다루기

## reshape으로 새로운 차원 만들기

In [8]:
import numpy as np
a = np.arange(9)
print(a.shape)

b = a.reshape((1,9)) # 차원을 추가해도 원소의 개수가 바뀌지 않음. 
c = a.reshape((9,1))
print(b.shape)
print(c.shape, '\n')
print(b) # 1차원에서 2차원 벡터로 변환된
print(c)

(9,)
(1, 9)
(9, 1) 

[[0 1 2 3 4 5 6 7 8]]
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]


In [10]:
a = np.arange(9)
b = a.reshape((9,1,1))
print(b)
print(b.ndim)

[[[0]]

 [[1]]

 [[2]]

 [[3]]

 [[4]]

 [[5]]

 [[6]]

 [[7]]

 [[8]]]
3


In [11]:
a = np.random.normal(size=(100,200))
b = a.reshape(1,100,200)
c = a.reshape(100,200,1)
print(b)
print(c)

[[[ 0.55155559 -0.68889215 -0.10275958 ... -1.3741744   0.96360457
    0.88740921]
  [ 0.42041343  1.3586086   0.12614845 ... -2.10873921 -1.7696725
    1.07432554]
  [-0.74557472 -0.24593871  1.55133685 ...  0.16252787 -1.20805123
    0.12188254]
  ...
  [ 0.81108552 -1.73029275 -1.86798207 ...  0.28701923 -0.46376178
   -0.39850927]
  [-0.84309431  0.01622533 -0.51725967 ... -0.37488017 -1.75428158
   -1.14907727]
  [ 1.54922526  0.47787542  0.7906656  ... -1.12770473  0.5697377
    0.38154578]]]
[[[ 0.55155559]
  [-0.68889215]
  [-0.10275958]
  ...
  [-1.3741744 ]
  [ 0.96360457]
  [ 0.88740921]]

 [[ 0.42041343]
  [ 1.3586086 ]
  [ 0.12614845]
  ...
  [-2.10873921]
  [-1.7696725 ]
  [ 1.07432554]]

 [[-0.74557472]
  [-0.24593871]
  [ 1.55133685]
  ...
  [ 0.16252787]
  [-1.20805123]
  [ 0.12188254]]

 ...

 [[ 0.81108552]
  [-1.73029275]
  [-1.86798207]
  ...
  [ 0.28701923]
  [-0.46376178]
  [-0.39850927]]

 [[-0.84309431]
  [ 0.01622533]
  [-0.51725967]
  ...
  [-0.37488017]
  [-

In [27]:
a = (10,20)
print(*a, '\n') # 하나하나 따로 나옴

a = np.random.normal(size=(100,150))
print(a.shape)
print(*a.shape)
print((1, *a.shape)) # 자동으로 바뀌는 것을 확인할 수 있음. 
print((*a.shape, 1))

10 20 

(100, 150)
100 150
(1, 100, 150)
(100, 150, 1)


***making new dimensions***

In [29]:
a = np.random.normal(size=(100,200))
b = a.reshape(1, *a.shape)
c = a.reshape(*a.shape, 1)

print(a.shape)
print(b.shape, c.shape)

(100, 200)
(1, 100, 200) (100, 200, 1)


## 슬라이싱으로 새로운 차원 만들기와 expand_dims

In [33]:
import numpy as np
a = np.arange(9)

row_vec1 = a[np.newaxis, :] # np.newaxis (내부적으로는 None)
row_vec2 = a[None, :]

col_vec1 = a[:, np.newaxis]
col_vec2 = a[:, None]

print(row_vec1.shape, row_vec2.shape) # 새로운 차원이 추가됨. 
print(col_vec1.shape, col_vec2.shape)

(1, 9) (1, 9)
(9, 1) (9, 1)


In [34]:
a = np.arange(9)
b = a[np.newaxis, np.newaxis, :]
c = a[np.newaxis, :, np.newaxis]
d = a[:, np.newaxis, np.newaxis]
print(b.shape, c.shape, d.shape)
print(b.ndim, c.ndim, d.ndim) # 1차원에서 3차원으로 변경됨. 

(1, 1, 9) (1, 9, 1) (9, 1, 1)
3 3 3


In [35]:
a = np.random.normal(size=(100,200))
b = a[np.newaxis, ...]
c = a[..., np.newaxis] # 단점 : 가운데에 새로운 축을 넣지 못함 (but 이런 경우가 대다수)
print(b.shape, c.shape)

(1, 100, 200) (100, 200, 1)


In [40]:
a = np.arange(9)
print(a)
b = np.expand_dims(a, axis=0)
c = np.expand_dims(a, axis=(0,2))
d = np.expand_dims(a, axis=-1)
e = np.expand_dims(a, axis=(0,-1))
print(b, '\n')
print(c, '\n')
print(d, '\n')
print(e, '\n')

[0 1 2 3 4 5 6 7 8]
[[0 1 2 3 4 5 6 7 8]] 

[[[0]
  [1]
  [2]
  [3]
  [4]
  [5]
  [6]
  [7]
  [8]]] 

[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]] 

[[[0]
  [1]
  [2]
  [3]
  [4]
  [5]
  [6]
  [7]
  [8]]] 



## ndarray 쓸모없는 차원 없애기

In [47]:
import numpy as np
a = np.ones(shape=(1,10)) # dummy dimension ==> 1
b = a.reshape((10,))
c = a.reshape(-1)
d = a.flatten()

print(a, '\n')
print(b, '\n')
print(c, '\n')
print(d, '\n')

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] 

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 



In [53]:
a = np.ones(shape=(1,3,4))
b = np.ones(shape=(3,4,1))
c = a.reshape(*a.shape[1:])
d = b.reshape(*b.shape[:-1])

print(a)
print('-'*65)
print(b)
print('-'*65)
print(c)
print('-'*65)
print(d)
print(d.shape)

[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]]
-----------------------------------------------------------------
[[[1.]
  [1.]
  [1.]
  [1.]]

 [[1.]
  [1.]
  [1.]
  [1.]]

 [[1.]
  [1.]
  [1.]
  [1.]]]
-----------------------------------------------------------------
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
-----------------------------------------------------------------
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
(3, 4)


In [54]:
a = np.arange(9).reshape((3,3))
row, col = a[1, :], a[:, 1]
print(a.shape)
print(row.shape)
print(col.shape)

(3, 3)
(3,)
(3,)


In [55]:
# 차원을 없앨때 인덱싱을 사용할 수 있음. 
a = np.arange(9).reshape(1,-1)
b = np.arange(9).reshape(-1,1)

c = a[0, :] # 하나의 차원에 대하여 인덱싱하면 하나의 차원이 떨어져 나온다. 
d = a[:, 0]

print(a.shape, b.shape)
print(c.shape, d.shape)

(1, 9) (9, 1)
(9,) (1,)


***removing dimension***

In [59]:
# 차원을 없앨 때 인덱싱 사용. 
a = np.ones(shape=(1,3,4))
b = np.ones(shape=(3,4,1))

c = a[0, ...]
d = a[..., 0]

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [67]:
a = np.ones(shape=(1,3,4))
c = np.squeeze(a)
d = a.squeeze()

print(a)
print(c.shape)
print(d.shape)

[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]]
(3, 4)
(3, 4)


In [70]:
# 쓰지 않는 차원은 모두 없애줌. 
# np.squeeze()

a = np.ones(shape=(1,1,3,1,3,1))
b = a.squeeze()
print(b.shape) # 얘들의 차원은 무엇인지 알려줘야 함 (어떤 차원이 남았는지 알려주어야 함.)

(3, 3)


## ndarray 차원 바꾸기

In [71]:
# 차원들끼리 교체

In [75]:
import numpy as np

# np.swapaxes API ==> 2가지 차원을 바꿀때 유용함. 
a = np.random.normal(size=(3,4,5,6))
b = np.swapaxes(a, 0, 1)
c = np.swapaxes(a, 0, 2)
d = np.swapaxes(a, 0, 3)

print(a.shape)
print(b.shape)
print(c.shape)
print(d.shape)

(3, 4, 5, 6)
(4, 3, 5, 6)
(5, 4, 3, 6)
(6, 4, 5, 3)


In [76]:
a = np.random.normal(size=(3,200,100))
b = np.swapaxes(a, 0, -1)
print(a.shape)
print(b.shape)

(3, 200, 100)
(100, 200, 3)


In [77]:
# np.moveaxis API
a = np.random.normal(size=(3,4,5,6))
b = np.moveaxis(a, source=0, destination=1)
c = np.moveaxis(a, source=0, destination=2)
d = np.moveaxis(a, source=0, destination=-1)

print(a.shape)
print(b.shape)
print(c.shape)
print(d.shape)

(3, 4, 5, 6)
(4, 3, 5, 6)
(4, 5, 3, 6)
(4, 5, 6, 3)


In [78]:
# transpose
# 차원을 그래도 뒤집어줌. 

import numpy as np
a = np.random.normal(size=(3,4))

b = np.transpose(a)
c = a.T

print(a.shape, b.shape, c.shape)

(3, 4) (4, 3) (4, 3)


In [81]:
# 차원을 임의대로 한번에 바꿀 수 있음. 
a = np.random.normal(size=(3,4,5))
b = np.transpose(a, axes = (0,1,2))
c = np.transpose(a, axes = (1,2,0))
d = np.transpose(a, axes = (2,0,1))
e = np.transpose(a, axes = (2,1,0))
print(a.shape)
print(b.shape)
print(c.shape)
print(d.shape)
print(e.shape)


(3, 4, 5)
(3, 4, 5)
(4, 5, 3)
(5, 3, 4)
(5, 4, 3)


# Merging ndarrays

## hstack, vstack

In [82]:
import numpy as np
a = np.random.randint(0, 10, (4,))
b = np.random.randint(0, 10, (4,))

print(a.shape, b.shape)

(4,) (4,)


In [85]:
vstack = np.vstack([a,b])
hstack = np.hstack([a,b])

print(vstack.shape)
print(vstack)
print('-'*65)
print(hstack.shape)
print(hstack)

(2, 4)
[[9 3 3 9]
 [6 8 3 7]]
-----------------------------------------------------------------
(8,)
[9 3 3 9 6 8 3 7]


In [86]:
a = np.random.randint(0,3,(1,3))
b = np.random.randint(0,3,(1,3))

vstack = np.vstack([a,b]) 
hstack = np.hstack([a,b]) # 차원이 바뀜

print(vstack)
print(hstack)

[[0 2 0]
 [2 2 2]]
[[0 2 0 2 2 2]]


In [88]:
a = np.random.randint(0,3,(3,1))
b = np.random.randint(0,3,(3,1))

vstack = np.vstack([a,b])
hstack = np.hstack([a,b])

print(vstack.shape) # 이번에 vstack의 차원이 바뀜. 
print(vstack)
print('-'*65)
print(hstack.shape)
print(hstack)

(6, 1)
[[2]
 [1]
 [2]
 [0]
 [0]
 [1]]
-----------------------------------------------------------------
(3, 2)
[[2 0]
 [1 0]
 [2 1]]


In [99]:
a = np.random.randint(0,10,(3,4))
b = np.random.randint(0,10,(4,))
print(a)
print(b)
print('-'*65)
vstack = np.vstack([a,b])
print(vstack)
print(vstack.shape)
print('-'*65)

a = np.random.randint(0,10,(3,4))
# b = np.random.randint(0,10,(3,)) 이렇게 하면 오류가 남
b = np.random.randint(0,10,(3,1))
hstack = np.hstack([a,b])
print(a)
print(b)
print(hstack)

[[3 0 6 7]
 [6 3 0 4]
 [4 7 2 9]]
[1 7 5 5]
-----------------------------------------------------------------
[[3 0 6 7]
 [6 3 0 4]
 [4 7 2 9]
 [1 7 5 5]]
(4, 4)
-----------------------------------------------------------------
[[0 0 2 6]
 [8 2 8 5]
 [6 6 7 8]]
[[8]
 [7]
 [3]]
[[0 0 2 6 8]
 [8 2 8 5 7]
 [6 6 7 8 3]]


In [104]:
# vstack
dataset = np.empty((0,4)) # (1,4)는 데이터를 차지하고 있음. 0은 데이터 차지 x
print(dataset.shape) # 데이터가 없는 상태

for iter in range(5): # 5번 iteration 
    data_sample = np.random.uniform(0,5, (1,4))
    dataset = np.vstack((dataset, data_sample))
    
print(dataset)

(0, 4)
[[1.40663085e+00 4.38531667e+00 4.87826889e+00 2.76195926e+00]
 [4.62328024e+00 3.50585552e+00 1.48667868e+00 4.71815746e+00]
 [2.90467819e+00 2.67974911e+00 2.60637302e+00 2.16495541e+00]
 [4.71878059e-01 1.53089429e+00 2.38819622e+00 7.29061998e-04]
 [3.94083558e+00 6.26644469e-01 1.26038029e+00 4.25267078e+00]]


In [107]:
# hstack 
dataset = np.empty((4,0)) # (1,4)는 데이터를 차지하고 있음. 0은 데이터 차지 x
print(dataset.shape) # 데이터가 없는 상태

for iter in range(5): # 5번 iteration 
    data_sample = np.random.uniform(0,5, (4,))
    data_sample_reshaped = data_sample.reshape(-1, 1)
    dataset = np.hstack((dataset, data_sample_reshaped))
    
print(dataset)

(4, 0)
[[2.40659305 3.14721685 2.77482897 1.0185376  1.78563353]
 [1.28225694 1.95625965 3.94027607 1.07793003 4.23726382]
 [1.3943962  2.15368332 3.17266806 1.38016145 2.34005238]
 [0.01491237 0.91796148 0.57794864 3.11423016 0.7857276 ]]


In [108]:
# effecient way!
import numpy as np

a = np.random.randint(0,10, (1,4))
b = np.random.randint(0,10, (1,4))
c = np.random.randint(0,10, (1,4))

arr_list = [a,b,c]
vstack = np.vstack(arr_list)
print(vstack)

[[4 4 0 1]
 [7 3 4 9]
 [8 4 9 8]]


In [119]:
# 처음에 빈 리스트를 만들기
# append를 해주고 최종적으로 stack 해주기 
from time import time
start_time = time()
dataset_tmp = list()
for iter in range(100):
    data_sample = np.random.uniform(0,5,(1,4)) 
    dataset_tmp.append(data_sample) # list에 append해주기
dataset = np.vstack(dataset_tmp)
end_time = time()
print('working time : ', end_time - start_time)
print(dataset.shape)

"""
아래와 비교했을 때, 빈리스트를 만들어 먼저 vstack or hstack을 하고 싶은 리스트들을 넣어주고
그 다음에 vstack or hstack을 해주는 것이 시간을 더 줄일 수 있음. 
"""

working time :  0.004045724868774414
(100, 4)


In [120]:
# 비교 
start_time = time()
dataset = np.empty((0,4)) 
for iter in range(100): # 5번 iteration 
    data_sample = np.random.uniform(0,5, (1,4))
    dataset = np.vstack((dataset, data_sample))
end_time = time()
print('working time : ', end_time - start_time)
print(dataset.shape)

working time :  0.0050432682037353516
(100, 4)


## np.concatenate, np.dstack, np.stack

In [121]:
# tensor에 좀더 많이 사용됨
# vstack, hstack은 일반적으로 matrices에 많이 사용됨. 

In [122]:
# np.concatenate
import numpy as np
a = np.random.randint(0,10,(3,))
b = np.random.randint(0,10,(4,))

concat = np.concatenate([a,b])
concat0 = np.concatenate([a,b], axis=0)
print(concat)
print(concat0)

[7 1 9 9 9 2 8]
[7 1 9 9 9 2 8]


In [123]:
import numpy as np
a = np.random.randint(0,10,(3,))
b = np.random.randint(0,10,(4,))
c = np.random.randint(0,10,(5,))

concat = np.concatenate([a,b,c], axis=0)
print(concat)


[2 9 0 3 6 1 0 6 3 0 9 5]


In [124]:
# 간단한 연산의 경우에는 np.concatenate가 아닌 np.hstack, np.vstack을 더 사용해라
a = np.random.randint(0,10,(1,3))
b = np.random.randint(0,10,(1,3))

axis0 = np.concatenate([a,b], axis=0)
axis1 = np.concatenate([a,b], axis=1)
axis_n1 = np.concatenate([a,b], axis=-1)

print(axis0, axis1, axis_n1, sep='\n')

[[7 1 0]
 [2 5 2]]
[[7 1 0 2 5 2]]
[[7 1 0 2 5 2]]


In [127]:
a = np.random.randint(0,10,(3,4)) # row의 개수가 같음. 
b = np.random.randint(0,10,(3,2))

np.concatenate([a,b], axis=1) # axis=0으로 안됨. 

array([[5, 3, 6, 9, 1, 8],
       [8, 8, 9, 3, 0, 8],
       [1, 7, 2, 1, 6, 6]])

In [129]:
# np.concatenate
import numpy as np
a = np.random.randint(0,10,(3,4,5))
b = np.random.randint(0,10,(10,4,5))
concat0 = np.concatenate([a,b], axis=0)
concat0

array([[[5, 9, 5, 6, 0],
        [1, 0, 4, 7, 0],
        [7, 2, 7, 7, 3],
        [2, 4, 1, 2, 9]],

       [[9, 3, 8, 8, 7],
        [2, 4, 3, 6, 6],
        [0, 5, 1, 5, 4],
        [0, 2, 9, 5, 1]],

       [[5, 3, 0, 5, 3],
        [9, 2, 1, 6, 2],
        [5, 1, 8, 0, 6],
        [1, 9, 4, 9, 3]],

       [[7, 0, 0, 9, 0],
        [1, 0, 3, 5, 7],
        [2, 3, 9, 1, 3],
        [7, 7, 0, 5, 8]],

       [[0, 1, 7, 9, 3],
        [7, 4, 4, 8, 3],
        [8, 6, 1, 5, 7],
        [8, 2, 3, 1, 3]],

       [[8, 1, 5, 3, 8],
        [8, 0, 3, 4, 6],
        [4, 9, 2, 9, 9],
        [2, 1, 7, 0, 9]],

       [[8, 6, 3, 5, 0],
        [7, 1, 2, 2, 2],
        [4, 8, 5, 7, 5],
        [2, 2, 5, 5, 7]],

       [[7, 1, 8, 3, 4],
        [8, 6, 4, 8, 4],
        [5, 8, 8, 8, 1],
        [3, 0, 3, 1, 2]],

       [[2, 6, 1, 9, 1],
        [2, 4, 6, 6, 0],
        [1, 5, 4, 6, 6],
        [2, 2, 5, 4, 8]],

       [[3, 5, 9, 3, 0],
        [0, 8, 7, 3, 2],
        [0, 1, 1, 3, 0],
       

In [131]:
# list에 append해주는 것이 연산 속도가 더 빠름
dataset_tmp = list()
for _ in range(100):
    data_sample = np.random.uniform(0,5,(1,4))
    dataset_tmp.append(data_sample)
    
concat = np.concatenate(dataset_tmp, axis=1)
print(concat.shape)

(1, 400)


In [134]:
# 새로운 차원을 만들면서 스택
# 제일 안쪽 차원을 기준으로 
R = np.random.randint(0,10,(100,200))
G = np.random.randint(0,10, size=R.shape)
B = np.random.randint(0,10, size=R.shape)

image = np.dstack([R,G,B]) # dimension stack 
image.shape

(100, 200, 3)

In [135]:
a1 = np.random.randint(0,10,(100,200,3))
a2 = np.random.randint(0,10,(100,200,3))
a3 = np.random.randint(0,10,(100,200,3))

d = np.dstack([a1,a2,a3])
d.shape


(100, 200, 9)

In [139]:
# np.stack
# 새로운 차원을 만들면서 stack

R = np.random.randint(0,10,(100,200))
G = np.random.randint(0,10, size=R.shape)
B = np.random.randint(0,10, size=R.shape)

print(np.stack([R,G,B]).shape)


R = np.random.randint(0,10,(100,200, 300))
G = np.random.randint(0,10, size=R.shape)
B = np.random.randint(0,10, size=R.shape)

print(np.stack([R,G,B]).shape)

# np.dstack보다 더 general하게 사용가능
R = np.random.randint(0,10,(100,200, 300))
G = np.random.randint(0,10, size=R.shape)
B = np.random.randint(0,10, size=R.shape)
print('-'*65)
print(np.stack([R,G,B], axis=1).shape)


(3, 100, 200)
(3, 100, 200, 300)
-----------------------------------------------------------------
(100, 3, 200, 300)
