# 넘파이 배열 합치기
데이터과학 분야에서는 행벡터가 기본 형태
- 차원이 동일한 배열을 합치는 경우
    - np.hstack, np.vstack, np.concatenate 메서드를 사용
- 배열의 형태별로 사용이 적절한 메서드 (가독성을 높여줌)
    - 행렬: np.hstack, np.vstack, np.dstack
    - 일반적인 텐서: np.concatenate, np.stack

In [1]:
import numpy as np

## ✅ np.hstack, np.vstack
- 대괄호로 감싸서 합친다.
- 주로 행렬에 대해 사용한다.

In [2]:
a = np.random.randint(0, 10, (4, ))
b = np.random.randint(0, 10, (4, ))
print('a =', a)
print('b =', b)

vstack = np.vstack([a, b])
hstack = np.hstack([a, b])

print('\nvstack: {}\n'.format(vstack.shape), vstack)
print('hstack: {}\n'.format(hstack.shape), hstack)

a = [6 7 4 1]
b = [7 7 2 2]

vstack: (2, 4)
 [[6 7 4 1]
 [7 7 2 2]]
hstack: (8,)
 [6 7 4 1 7 7 2 2]


In [3]:
a = np.random.randint(0, 10, (1, 3))
b = np.random.randint(0, 10, (1, 3))
print('a =', a)
print('b =', b)

vstack = np.vstack((a, b))
hstack = np.hstack((a, b))

print('\nvstack: {}\n'.format(vstack.shape), vstack)
print('hstack: {}\n'.format(hstack.shape), hstack)

a = [[8 0 8]]
b = [[4 3 1]]

vstack: (2, 3)
 [[8 0 8]
 [4 3 1]]
hstack: (1, 6)
 [[8 0 8 4 3 1]]


In [4]:
# 열벡터
a = np.random.randint(0, 10, (3, 1))
b = np.random.randint(0, 10, (3, 1))
print('a =\n', a)
print('b =\n', b)

vstack = np.vstack((a, b))
hstack = np.hstack((a, b))

print('\nvstack: {}\n'.format(vstack.shape), vstack)
print('hstack: {}\n'.format(hstack.shape), hstack)

a =
 [[9]
 [1]
 [4]]
b =
 [[5]
 [4]
 [4]]

vstack: (6, 1)
 [[9]
 [1]
 [4]
 [5]
 [4]
 [4]]
hstack: (3, 2)
 [[9 5]
 [1 4]
 [4 4]]


In [5]:
a = np.random.randint(0, 10, (3, 4))
b = np.random.randint(0, 10, (4, ))
print('a =\n', a)
print('b =\n', b)

vstack = np.vstack([a, b])
print('\nvstack: {}\n'.format(vstack.shape), vstack)

# ⚠️ 에러 발생
# hstack = np.hstack([a, b])
# print('hstack: {}\n'.format(hstack.shape), hstack)

a =
 [[0 2 5 0]
 [9 8 4 2]
 [7 4 4 8]]
b =
 [5 2 4 2]

vstack: (4, 4)
 [[0 2 5 0]
 [9 8 4 2]
 [7 4 4 8]
 [5 2 4 2]]


In [6]:
a = np.random.randint(0, 10, (3, 4))
b = np.random.randint(0, 10, (3, ))
print('a =\n', a)
print('b =\n', b)

# ⚠️ 에러 발생
# hstack = np.hstack([a, b])
# print('\nhstack: {}\n'.format(hstack.shape), hstack)

# 📍 새로운 데이터를 오른쪽으로(hstack) 쌓고 싶으면 열벡터를 만들어야 한다.
hstack = np.hstack([a, b.reshape((-1, 1))])
print('hstack: {}\n'.format(hstack.shape), hstack)

a =
 [[7 6 0 9]
 [1 6 2 1]
 [4 6 4 8]]
b =
 [9 7 2]
hstack: (3, 5)
 [[7 6 0 9 9]
 [1 6 2 1 7]
 [4 6 4 8 2]]


In [7]:
# 예시 (for문을 사용한 vstack → 추천 X)
dataset = np.empty((0, 4))
for i in range(5):
    sample = np.random.randint(0, 5, (1, 4))
    dataset = np.vstack([dataset, sample])
    print('iter: {}'.format(i), 'shape: {}'.format(dataset.shape))

print('\ndataset: {}\n'.format(dataset.shape), dataset)

iter: 0 shape: (1, 4)
iter: 1 shape: (2, 4)
iter: 2 shape: (3, 4)
iter: 3 shape: (4, 4)
iter: 4 shape: (5, 4)

dataset: (5, 4)
 [[1. 0. 4. 3.]
 [1. 3. 2. 4.]
 [4. 4. 1. 0.]
 [2. 3. 2. 2.]
 [3. 4. 1. 3.]]


In [8]:
# 예시 (for문을 사용한 hstack → 추천 X)
dataset = np.empty((4, 0))
for i in range(5):
    sample = np.random.randint(0, 5, (4, 1))
    dataset = np.hstack([dataset, sample])
    print('iter: {}'.format(i), 'shape: {}'.format(dataset.shape))
    
print('\ndataset: {}\n'.format(dataset.shape), dataset)

iter: 0 shape: (4, 1)
iter: 1 shape: (4, 2)
iter: 2 shape: (4, 3)
iter: 3 shape: (4, 4)
iter: 4 shape: (4, 5)

dataset: (4, 5)
 [[3. 1. 1. 3. 2.]
 [3. 2. 0. 3. 2.]
 [1. 0. 2. 0. 1.]
 [3. 0. 1. 0. 3.]]


---

In [9]:
# for문보다 better way 1
a = np.random.randint(0, 10, (1, 4))
b = np.random.randint(0, 10, (1, 4))
c = np.random.randint(0, 10, (1, 4))

# all at once
arr_list = [a, b, c]

vstack = np.vstack(arr_list)
print('vstack: {}\n'.format(vstack.shape), vstack)

vstack: (3, 4)
 [[1 6 4 7]
 [5 6 6 0]
 [7 6 9 7]]


In [10]:
# for문보다 better way 2
dataset_tmp = list()
for i in range(10):
    sample = np.random.randint(0, 5, (1, 4))
    dataset_tmp.append(sample)
dataset = np.vstack(dataset_tmp)
print('dataset: {}\n'.format(dataset.shape), dataset)

dataset: (10, 4)
 [[1 3 4 2]
 [0 1 0 3]
 [0 1 2 1]
 [3 3 2 2]
 [4 1 0 4]
 [1 4 4 3]
 [4 4 3 2]
 [3 1 1 1]
 [0 4 3 2]
 [0 3 2 3]]


## ✅ np.concatenate
- 대괄호로 감싸서 합친다.
- 주로 일반적인 텐서에 대해 사용한다. (compared to np.hstack/np.vstack)
- np.hstack/np.vstack을 대체 가능하다.

In [11]:
# 1차원
a = np.random.randint(0, 10, (3, ))
b = np.random.randint(0, 10, (4, ))
print('a =\n', a)
print('b =\n', b)

concat = np.concatenate([a, b])
print('\nconcat: {}\n'.format(concat.shape), concat)

concat_axis0 = np.concatenate([a, b], axis=0)
print('\nconcat_axis0: {}\n'.format(concat_axis0.shape), concat_axis0)

a =
 [2 3 9]
b =
 [1 0 1 2]

concat: (7,)
 [2 3 9 1 0 1 2]

concat_axis0: (7,)
 [2 3 9 1 0 1 2]


In [12]:
# 2차원 (axis=0, 1)
a = np.random.randint(0, 10, (1, 3))
b = np.random.randint(0, 10, (1, 3))
print('a =\n', a)
print('b =\n', b)

concat_axis0 = np.concatenate([a, b], axis=0)
print('\nconcat_axis0: {}\n'.format(concat_axis0.shape), concat_axis0)

concat_axis1 = np.concatenate([a, b], axis=1)
print('\nconcat_axis1: {}\n'.format(concat_axis1.shape), concat_axis1)

concat_axis_m1 = np.concatenate([a, b], axis=-1)
print('\nconcat_axis_m1: {}\n'.format(concat_axis_m1.shape), concat_axis_m1)

a =
 [[9 3 7]]
b =
 [[0 8 2]]

concat_axis0: (2, 3)
 [[9 3 7]
 [0 8 2]]

concat_axis1: (1, 6)
 [[9 3 7 0 8 2]]

concat_axis_m1: (1, 6)
 [[9 3 7 0 8 2]]


In [13]:
# 2차원
a = np.random.randint(0, 10, (3, 4))
b = np.random.randint(0, 10, (3, 2))
print('a =\n', a)
print('b =\n', b)

# hstack으로 대체 가능
concat_axis1 = np.concatenate([a, b], axis=1)
print('\nconcat_axis1: {}\n'.format(concat_axis1.shape), concat_axis1)

# ⚠️ 에러 발생
# concat_axis0 = np.concatenate([a, b], axis=0)
# print('\nconcat_axis0: {}\n'.format(concat_axis0.shape), concat_axis0)

a =
 [[2 5 8 1]
 [5 6 4 3]
 [3 0 4 4]]
b =
 [[2 7]
 [4 0]
 [7 7]]

concat_axis1: (3, 6)
 [[2 5 8 1 2 7]
 [5 6 4 3 4 0]
 [3 0 4 4 7 7]]


In [14]:
# 3차원 (axis=0, 1, 2)
a = np.random.randint(0, 10, (5, 4, 5))
b = np.random.randint(0, 10, (8, 4, 5))
print('a shape:', a.shape)
print('b shape:', b.shape)

concat_axis0 = np.concatenate([a, b], axis=0)
print('\nconcat_axis0: {}'.format(concat_axis0.shape))

# ⚠️ 에러 발생
# concat_axis1 = np.concatenate([a, b], axis=1)
# print('\nconcat_axis1: {}'.format(concat_axis1.shape))

a shape: (5, 4, 5)
b shape: (8, 4, 5)

concat_axis0: (13, 4, 5)


In [15]:
# 3차원
a = np.random.randint(0, 10, (3, 4, 5))
b = np.random.randint(0, 10, (3, 6, 5))
print('a shape:', a.shape)
print('b shape:', b.shape)

concat_axis1 = np.concatenate([a, b], axis=1)
print('\nconcat_axis1: {}'.format(concat_axis1.shape))

# ⚠️ 에러 발생
# concat_axis0 = np.concatenate([a, b], axis=0)
# print('\nconcat_axis0: {}'.format(concat_axis0.shape))

a shape: (3, 4, 5)
b shape: (3, 6, 5)

concat_axis1: (3, 10, 5)


In [16]:
# 3차원
a = np.random.randint(0, 10, (3, 2, 3))
b = np.random.randint(0, 10, (3, 2, 7))
print('a shape:', a.shape)
print('b shape:', b.shape)

concat_axis2 = np.concatenate([a, b], axis=2)
print('\nconcat_axis2: {}'.format(concat_axis2.shape))

a shape: (3, 2, 3)
b shape: (3, 2, 7)

concat_axis2: (3, 2, 10)


In [17]:
# for문 예시1 (axis=0; vstack 대체 가능)
dataset_tmp = list()
for i in range(10):
    sample = np.random.randint(0, 5, (1, 4))
    dataset_tmp.append(sample)

dataset = np.concatenate(dataset_tmp, axis=0)
print('dataset: {}\n'.format(dataset.shape), dataset)

dataset: (10, 4)
 [[2 4 2 1]
 [1 3 2 3]
 [1 4 4 4]
 [3 1 1 3]
 [3 0 3 4]
 [1 1 0 2]
 [0 2 1 3]
 [3 3 0 1]
 [1 1 3 0]
 [0 4 0 4]]


In [18]:
# for문 예시2 (axis=1; hstack 대체 가능)
dataset_tmp = list()
for i in range(10):
    sample = np.random.randint(0, 5, (4, 1))
    dataset_tmp.append(sample)

dataset = np.concatenate(dataset_tmp, axis=1)
print('dataset: {}\n'.format(dataset.shape), dataset)

dataset: (4, 10)
 [[1 4 2 1 3 3 0 0 3 2]
 [1 1 0 4 2 0 3 0 2 0]
 [1 0 1 1 3 0 3 1 3 1]
 [0 3 2 2 2 3 4 2 3 0]]


## ✅ np.dstack
주로 행렬에 대해 사용한다.

In [20]:
R = np.random.randint(0, 10, (100, 200))
G = np.random.randint(0, 10, size=R.shape)
B = np.random.randint(0, 10, size=R.shape)

image = np.dstack([R, G, B])
print('image shape:', image.shape)

image shape: (100, 200, 3)


In [21]:
a = np.random.randint(0, 10, (100, 200, 3))
b = np.random.randint(0, 10, size=a.shape)
c = np.random.randint(0, 10, size=a.shape)

d = np.dstack([a, b, c])
print('d shape:', d.shape)

d shape: (100, 200, 9)


## ✅ np.stack

In [22]:
a = np.random.randint(0, 10, (100, 200))
b = np.random.randint(0, 10, (100, 200))
c = np.random.randint(0, 10, (100, 200))

print('ndim == 2:', np.stack([a, b, c]).shape)

ndim == 2: (3, 100, 200)


In [24]:
a = np.random.randint(0, 10, (100, 200, 300))
b = np.random.randint(0, 10, (100, 200, 300))
c = np.random.randint(0, 10, (100, 200, 300))

print('ndim == 3:', np.stack([a, b, c]).shape)

ndim == 3: (3, 100, 200, 300)


In [27]:
a = np.random.randint(0, 10, (100, 200, 300))
b = np.random.randint(0, 10, (100, 200, 300))
c = np.random.randint(0, 10, (100, 200, 300))

print('axis=0:', np.stack([a, b, c], axis=0).shape)
print('axis=1:', np.stack([a, b, c], axis=1).shape)
print('axis=2:', np.stack([a, b, c], axis=2).shape)
print('axis=3:', np.stack([a, b, c], axis=3).shape)
print('axis=-1:', np.stack([a, b, c], axis=-1).shape)

axis=0: (3, 100, 200, 300)
axis=1: (100, 3, 200, 300)
axis=2: (100, 200, 3, 300)
axis=3: (100, 200, 300, 3)
axis=-1: (100, 200, 300, 3)
