In [2]:
import pandas as pd
import numpy as np

## 배열 연산

* NumPy의 배열 연산은 벡터화(vectorized) 연산을 사용
* 일반적으로 NumPy의 범용 함수(universal functions)를 통해 구현
* 배열 요소에 대한 반복적인 계산을 효율적으로 수행

### 브로드캐스팅(Broadcasting)

In [6]:
a1 = np.array([1, 2, 3])
print(a1)
print(a1 + 5)

a2 = np.arange(1, 10).reshape(3, 3)
print(a2)
print(a1 + a2)

b2 = np.array([1, 2, 3]).reshape(3, 1)
print(b2)
print(a1 + b2)

[1 2 3]
[6 7 8]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[ 2  4  6]
 [ 5  7  9]
 [ 8 10 12]]
[[1]
 [2]
 [3]]
[[2 3 4]
 [3 4 5]
 [4 5 6]]


### 산술 연산(Arithmetic Operators)

In [15]:
a1 = np.arange(1, 10)
print(a1)
print(a1 + 1)
print(np.add(a1, 10))
print(a1 - 2)
print(np.subtract(a1, 10))
print(-a1)
print(np.negative(a1))
print(a1 * 2)
print(np.multiply(a1, 2))
print(a1 / 2)
print(np.divide(a1, 2))
print(a1 // 2)
print(np.floor_divide(a1, 2))    # 나눈값 내림
print(a1 ** 2)
print(np.power(a1, 2))
print(a1 % 2)
print(np.mod(a1, 2))

[1 2 3 4 5 6 7 8 9]
[ 2  3  4  5  6  7  8  9 10]
[11 12 13 14 15 16 17 18 19]
[-1  0  1  2  3  4  5  6  7]
[-9 -8 -7 -6 -5 -4 -3 -2 -1]
[-1 -2 -3 -4 -5 -6 -7 -8 -9]
[-1 -2 -3 -4 -5 -6 -7 -8 -9]
[ 2  4  6  8 10 12 14 16 18]
[ 2  4  6  8 10 12 14 16 18]
[0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5]
[0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5]
[0 1 1 2 2 3 3 4 4]
[0 1 1 2 2 3 3 4 4]
[ 1  4  9 16 25 36 49 64 81]
[ 1  4  9 16 25 36 49 64 81]
[1 0 1 0 1 0 1 0 1]
[1 0 1 0 1 0 1 0 1]


In [17]:
a1 = np.arange(1, 10)
print(a1)
b1 = np.random.randint(1, 10, size=9)
print(b1)
print(a1 + b1)
print(a1 - b1)
print(a1 * b1)
print(a1 / b1)
print(a1 // b1)
print(a1 ** b1)
print(a1 % b1)

[1 2 3 4 5 6 7 8 9]
[8 7 8 9 8 9 9 7 7]
[ 9  9 11 13 13 15 16 15 16]
[-7 -5 -5 -5 -3 -3 -2  1  2]
[ 8 14 24 36 40 54 63 56 63]
[0.125      0.28571429 0.375      0.44444444 0.625      0.66666667
 0.77777778 1.14285714 1.28571429]
[0 0 0 0 0 0 0 1 1]
[       1      128     6561   262144   390625 10077696 40353607  2097152
  4782969]
[1 2 3 4 5 6 7 1 2]


In [21]:
a2 = np.arange(1, 10).reshape(3, 3)
print(a2)
b2 = np.random.randint(1, 10, size=(3, 3))
print(b2)
print(a2 + b2)
print(a2 - b2)
print(a2 * b2)
print(a2 / b2)
print(a2 // b2)
print(a2 ** b2)
print(a2 % b2)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[4 2 2]
 [6 4 6]
 [2 4 6]]
[[ 5  4  5]
 [10  9 12]
 [ 9 12 15]]
[[-3  0  1]
 [-2  1  0]
 [ 5  4  3]]
[[ 4  4  6]
 [24 20 36]
 [14 32 54]]
[[0.25       1.         1.5       ]
 [0.66666667 1.25       1.        ]
 [3.5        2.         1.5       ]]
[[0 1 1]
 [0 1 1]
 [3 2 1]]
[[     1      4      9]
 [  4096    625  46656]
 [    49   4096 531441]]
[[1 0 1]
 [4 1 0]
 [1 0 3]]


#### 절대값 함수(Absolute Function)

* `absolute()`, `abs()`: 내장된 절대값 함수

In [22]:
a1 = np.random.randint(-10, 10, size=5)
print(a1)
print(np.absolute(a1))
print(np.abs(a1))

[-8  5 -8  8  8]
[8 5 8 8 8]
[8 5 8 8 8]


#### 제곱/제곱근 함수

* `square`, `sqrt`: 제곱, 제곱근 함수

In [23]:
print(a1)
print(np.square(a1))
print(np.sqrt(a1))

[-8  5 -8  8  8]
[64 25 64 64 64]
[       nan 2.23606798        nan 2.82842712 2.82842712]


  This is separate from the ipykernel package so we can avoid doing imports until


#### 지수와 로그 함수 (Exponential and Log Function)


In [25]:
a1 = np.random.randint(1, 10, size=5)
print(a1)
print(np.exp(a1))
print(np.exp2(a1))
print(np.power(a1, 2))

[1 7 7 4 5]
[   2.71828183 1096.63315843 1096.63315843   54.59815003  148.4131591 ]
[  2. 128. 128.  16.  32.]
[ 1 49 49 16 25]


In [26]:
print(a1)
print(np.log(a1))
print(np.log2(a1))
print(np.log10(a1))

[1 7 7 4 5]
[0.         1.94591015 1.94591015 1.38629436 1.60943791]
[0.         2.80735492 2.80735492 2.         2.32192809]
[0.         0.84509804 0.84509804 0.60205999 0.69897   ]


#### 삼각 함수(Trigonometrical Function)


In [27]:
t = np.linspace(0, np.pi, 3)
print(t)
print(np.sin(t))
print(np.cos(t))
print(np.tan(t))

[0.         1.57079633 3.14159265]
[0.0000000e+00 1.0000000e+00 1.2246468e-16]
[ 1.000000e+00  6.123234e-17 -1.000000e+00]
[ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [28]:
x = [-1, 0, 1]
print(x)
print(np.arcsin(x))
print(np.arccos(x))
print(np.arctan(x))

[-1, 0, 1]
[-1.57079633  0.          1.57079633]
[3.14159265 1.57079633 0.        ]
[-0.78539816  0.          0.78539816]


### 집계 함수(Aggregate Functions)

#### sum(): 합 계산

In [29]:
a2 = np.random.randint(1, 10, size=(3, 3))
print(a2)
print(a2.sum(), np.sum(a2))
print(a2.sum(axis=0), np.sum(a2, axis=0))
print(a2.sum(axis=1), np.sum(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
41 41
[14 13 14] [14 13 14]
[13 14 14] [13 14 14]


#### cumsum(): 누적합 계산

In [30]:
print(a2)
print(np.cumsum(a2))
print(np.cumsum(a2, axis=0))
print(np.cumsum(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
[ 2  8 13 16 20 27 36 39 41]
[[ 2  6  5]
 [ 5 10 12]
 [14 13 14]]
[[ 2  8 13]
 [ 3  7 14]
 [ 9 12 14]]


#### diff(): 차분 계산

In [31]:
print(a2)
print(np.diff(a2))
print(np.diff(a2, axis=0))
print(np.diff(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
[[ 4 -1]
 [ 1  3]
 [-6 -1]]
[[ 1 -2  2]
 [ 6 -1 -5]]
[[ 4 -1]
 [ 1  3]
 [-6 -1]]


#### prod(): 곱 계산

In [32]:
print(a2)
print(np.prod(a2))
print(np.prod(a2, axis=0))
print(np.prod(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
272160
[54 72 70]
[60 84 54]


#### cumprod(): 누적곱 계산

In [33]:
print(a2)
print(np.cumprod(a2))
print(np.cumprod(a2, axis=0))
print(np.cumprod(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
[     2     12     60    180    720   5040  45360 136080 272160]
[[ 2  6  5]
 [ 6 24 35]
 [54 72 70]]
[[ 2 12 60]
 [ 3 12 84]
 [ 9 27 54]]


#### dot()/matmul(): 점곱/행렬곱 계산

In [34]:
print(a2)
b2 = np.ones_like(a2)
print(b2)
print(np.dot(a2, b2))
print(np.matmul(a2, b2))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[13 13 13]
 [14 14 14]
 [14 14 14]]
[[13 13 13]
 [14 14 14]
 [14 14 14]]


#### tensordot(): 텐서곱 계산

In [36]:
print(a1)
print(b2)
print(np.tensordot(a2, b2))
print(np.tensordot(a2, b2, axes=0))
print(np.tensordot(a2, b2, axes=1))

[1 7 7 4 5]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
41
[[[[2 2 2]
   [2 2 2]
   [2 2 2]]

  [[6 6 6]
   [6 6 6]
   [6 6 6]]

  [[5 5 5]
   [5 5 5]
   [5 5 5]]]


 [[[3 3 3]
   [3 3 3]
   [3 3 3]]

  [[4 4 4]
   [4 4 4]
   [4 4 4]]

  [[7 7 7]
   [7 7 7]
   [7 7 7]]]


 [[[9 9 9]
   [9 9 9]
   [9 9 9]]

  [[3 3 3]
   [3 3 3]
   [3 3 3]]

  [[2 2 2]
   [2 2 2]
   [2 2 2]]]]
[[13 13 13]
 [14 14 14]
 [14 14 14]]


#### cross(): 벡터곱

In [37]:
x = [1, 2, 3]
y = [4, 5, 6]
print(np.cross(x, y)) 

[-3  6 -3]


#### inner()/outer(): 내적/외적

In [38]:
print(a2)
print(b2)
print(np.inner(a2, b2))
print(np.outer(a2, b2))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[13 13 13]
 [14 14 14]
 [14 14 14]]
[[2 2 2 2 2 2 2 2 2]
 [6 6 6 6 6 6 6 6 6]
 [5 5 5 5 5 5 5 5 5]
 [3 3 3 3 3 3 3 3 3]
 [4 4 4 4 4 4 4 4 4]
 [7 7 7 7 7 7 7 7 7]
 [9 9 9 9 9 9 9 9 9]
 [3 3 3 3 3 3 3 3 3]
 [2 2 2 2 2 2 2 2 2]]


#### mean(): 평균 계산

In [40]:
print(a2)
print(np.mean(a2))
print(np.mean(a2, axis=0))
print(np.mean(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
4.555555555555555
[4.66666667 4.33333333 4.66666667]
[4.33333333 4.66666667 4.66666667]


#### std(): 표준 편차 계산

In [41]:
print(a2)
print(np.std(a2))
print(np.std(a2, axis=0))
print(np.std(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
2.266230894930126
[3.09120617 1.24721913 2.05480467]
[1.69967317 1.69967317 3.09120617]


#### var(): 분산 계산

In [42]:
print(a2)
print(np.var(a2))
print(np.var(a2, axis=0))
print(np.var(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
5.135802469135801
[9.55555556 1.55555556 4.22222222]
[2.88888889 2.88888889 9.55555556]


#### min(): 최소값

In [43]:
print(a2)
print(np.min(a2))
print(np.min(a2, axis=0))
print(np.min(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
2
[2 3 2]
[2 3 2]


#### max(): 최대값

In [44]:
print(a2)
print(np.max(a2))
print(np.max(a2, axis=0))
print(np.max(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
9
[9 6 7]
[6 7 9]


#### argmin(): 최소값 인덱스

In [45]:
print(a2)
print(np.argmin(a2))
print(np.argmin(a2, axis=0))
print(np.argmin(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
0
[0 2 2]
[0 0 2]


#### argmax(): 최대값 인덱스

In [46]:
print(a2)
print(np.argmax(a2))
print(np.argmax(a2, axis=0))
print(np.argmax(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
6
[2 0 1]
[1 2 0]


#### median(): 중앙값

In [47]:
print(a2)
print(np.median(a2))
print(np.median(a2, axis=0))
print(np.median(a2, axis=1))

[[2 6 5]
 [3 4 7]
 [9 3 2]]
4.0
[3. 4. 5.]
[5. 4. 3.]


#### percentile(): 백분위 수



In [50]:
a1 = np.array([0, 1, 2, 3])
print(a1)
print(np.percentile(a1, [0, 20, 40, 60, 80, 100], interpolation='linear'))
print(np.percentile(a1, [0, 20, 40, 60, 80, 100], interpolation='higher'))
print(np.percentile(a1, [0, 20, 40, 60, 80, 100], interpolation='lower'))
print(np.percentile(a1, [0, 20, 40, 60, 80, 100], interpolation='nearest'))
print(np.percentile(a1, [0, 20, 40, 60, 80, 100], interpolation='midpoint'))

[0 1 2 3]
[0.  0.6 1.2 1.8 2.4 3. ]
[0 1 2 2 3 3]
[0 0 1 1 2 3]
[0 1 1 2 2 3]
[0.  0.5 1.5 1.5 2.5 3. ]


#### any()

In [51]:
a2 = np.array([[False, False, False],
               [False, True, True],
               [False, True, True]])
print(a2)
print(np.any(a2))
print(np.any(a2, axis=0))
print(np.any(a2, axis=1))

[[False False False]
 [False  True  True]
 [False  True  True]]
True
[False  True  True]
[False  True  True]


#### all()

In [52]:
a2 = np.array([[False, False, True],
               [True, True, True],
               [False, True, True]])
print(a2)
print(np.all(a2))
print(np.all(a2, axis=0))
print(np.all(a2, axis=1))

[[False False  True]
 [ True  True  True]
 [False  True  True]]
False
[False False  True]
[False  True False]


### 비교 연산(Comparison Operators)


In [56]:
a1 = np.arange(1, 10)
print(a1)
print(a1 == 5)
print(a1 != 5)
print(a1 < 5)
print(a1 <= 5)
print(a1 > 5)
print(a1 >= 5)

[1 2 3 4 5 6 7 8 9]
[False False False False  True False False False False]
[ True  True  True  True False  True  True  True  True]
[ True  True  True  True False False False False False]
[ True  True  True  True  True False False False False]
[False False False False False  True  True  True  True]
[False False False False  True  True  True  True  True]


In [58]:
a2 = np.arange(1, 10).reshape(3, 3)
print(a2)
print(np.sum(a2))
print(np.count_nonzero(a2 > 5))    # 0이 아닌 것 갯수 세기
print(np.sum(a2 > 5))
print(np.sum(a2 > 5, axis=0))
print(np.sum(a2 > 5, axis=1))
print(np.any(a2 > 5))
print(np.any(a2 > 5, axis=0))
print(np.any(a2 > 5, axis=1))
print(np.all(a2 > 5))
print(np.all(a2 > 5, axis=0))
print(np.all(a2 > 5, axis=1))

[[1 2 3]
 [4 5 6]
 [7 8 9]]
45
4
4
[1 1 2]
[0 1 3]
True
[ True  True  True]
[False  True  True]
False
[False False False]
[False False  True]


In [59]:
a1 = np.array([1, 2, 3, 4, 5])
print(a1)
b1 = np.array([1, 2, 3, 3, 4])
print(b1)
print(np.isclose(a1, b1))

[1 2 3 4 5]
[1 2 3 3 4]
[ True  True  True False False]


In [63]:
a1 = np.array([np.nan, 2, np.inf, 4, np.NINF])
print(a1)
print(np.isnan(a1))
print(np.isinf(a1))
print(np.isfinite(a1))

[ nan   2.  inf   4. -inf]
[ True False False False False]
[False False  True False  True]
[False  True False  True False]


#### 불리언 연산자(Boolean Operators)


In [66]:
a2 = np.arange(1, 10).reshape(3, 3)
print(a2)

print((a2 > 5) & (a2 < 8))    # AND
print(a2[(a2 > 5) & (a2 < 8)])

print((a2 > 5) | (a2 < 8))    # OR
print(a2[(a2 > 5) | (a2 < 8)])

print((a2 > 5) ^ (a2 < 8))    # XOR
print(a2[(a2 > 5) ^ (a2 < 8)])

print(~(a2 > 5))    # NOT
print(a2[~(a2 > 5)])

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[False False False]
 [False False  True]
 [ True False False]]
[6 7]
[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]
[1 2 3 4 5 6 7 8 9]
[[ True  True  True]
 [ True  True False]
 [False  True  True]]
[1 2 3 4 5 8 9]
[[ True  True  True]
 [ True  True False]
 [False False False]]
[1 2 3 4 5]


### 배열 정렬

In [69]:
a1 = np.random.randint(1, 10, size=10)
print(a1)
print(np.sort(a1))
print(a1)    # 원본 배열은 정렬 반영되지 않음
print(np.argsort(a1))
print(a1)
print(a1.sort())
print(a1)

[1 9 1 2 7 9 8 8 7 8]
[1 1 2 7 7 8 8 8 9 9]
[1 9 1 2 7 9 8 8 7 8]
[0 2 3 4 8 6 7 9 1 5]
[1 9 1 2 7 9 8 8 7 8]
None
[1 1 2 7 7 8 8 8 9 9]


In [70]:
a2 = np.random.randint(1, 10, size=(3, 3))
print(a2)
print(np.sort(a2, axis=0))
print(np.sort(a2, axis=1))

[[8 6 3]
 [8 1 7]
 [2 3 2]]
[[2 1 2]
 [8 3 3]
 [8 6 7]]
[[3 6 8]
 [1 7 8]
 [2 2 3]]


#### 부분 정렬

* `partition()`: 배열에서 k개의 작은 값을 반환

In [72]:
a1 = np.random.randint(1, 10, size=10)
print(a1)
print(np.partition(a1, 3))

[4 7 4 2 2 4 5 2 2 5]
[2 2 2 2 4 4 5 7 4 5]


In [75]:
a2 = np.random.randint(1, 10, size=(5, 5))
print(a2)
print(np.partition(a2, 3))
print(np.partition(a2, 3, axis=0))
print(np.partition(a2, 3, axis=1))

[[1 9 5 7 3]
 [6 3 1 7 7]
 [4 4 2 4 8]
 [5 6 3 5 4]
 [7 8 7 6 3]]
[[1 3 5 7 9]
 [3 1 6 7 7]
 [4 2 4 4 8]
 [3 4 5 5 6]
 [6 3 7 7 8]]
[[1 6 3 5 3]
 [4 4 2 4 3]
 [5 3 1 6 4]
 [6 8 5 7 7]
 [7 9 7 7 8]]
[[1 3 5 7 9]
 [3 1 6 7 7]
 [4 2 4 4 8]
 [3 4 5 5 6]
 [6 3 7 7 8]]
