# Numpy

**NumPy**는 **Python**을 사용한 과학 컴퓨팅의 기본 패키지로서, 다음과 같은 것들을 포함한다. :

- 강력한 N-차원 배열 객체
- 정교한 (브로드케스팅) 함수들
- C / C ++ 및 Fortran 코드들을 통합하는 도구들
- 유용한 선형 대수학, 푸리에 변환 및 난수 기능 등.

명백한 과학적 사용 외에도 **NumPy**는 일반 데이터의 효율적인 다차원 컨테이너로 사용될 수 있다. 임의의 데이터 유형을 정의 할 수 있으며, 이를 통해 **NumPy**를 다양한 데이터베이스와 원활하고 신속하게 통합될 수 있다.

라이브러리 설명서 : http://www.numpy.org/

### Python list 보다 numpy 배열이 갖는 3가지 장점
- 더 작은 공간
- 더 빠름
- 더 편리함

In [179]:
import numpy as np
import sys

SIZE = 1000

list = range(SIZE)
print(sys.getsizeof(5)*len(list))

array = np.arange(SIZE)
print(array.size * array.itemsize)

28000
4000


<img src="http://jakevdp.github.io/images/array_vs_list.png" width="600">

In [180]:
import time

list_1 = range(SIZE)
list_2 = range(SIZE)

array_1 = np.arange(SIZE)
array_2 = np.arange(SIZE)

start = time.time()
result = [ (x+y) for x, y in zip(list_1, list_2) ]
print("python list took: ", (time.time() - start) * 10000 )

start = time.time()
result = array_1 + array_2
print("numpy array took: ", (time.time() - start) * 10000 )

python list took:  9.980201721191406
numpy array took:  0.0


## 기본적 사용법

In [181]:
import numpy as np

# declare a vector using a list as the argument
v = np.array([1,2,3,4])
v

array([1, 2, 3, 4])

In [182]:
# declare a matrix unsing a nested list as the argument
M = np.array([[1,2],[3,4]])
M

array([[1, 2],
       [3, 4]])

In [183]:
# the same core type with different shapes
type(v), type(M)

(numpy.ndarray, numpy.ndarray)

In [184]:
M.size, M.itemsize, M.nbytes, M.ndim

(4, 4, 16, 2)

In [185]:
v[0], M[1], M[1,1]

(1, array([3, 4]), 4)

In [186]:
M[0,:]= 0
M

array([[0, 0],
       [3, 4]])

In [187]:
# generate a sequence of numbers
x = np.arange(0,10,1)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [188]:
np.linspace(0,10,25) # arguments (start, end, No of samples)

array([  0.        ,   0.41666667,   0.83333333,   1.25      ,
         1.66666667,   2.08333333,   2.5       ,   2.91666667,
         3.33333333,   3.75      ,   4.16666667,   4.58333333,
         5.        ,   5.41666667,   5.83333333,   6.25      ,
         6.66666667,   7.08333333,   7.5       ,   7.91666667,
         8.33333333,   8.75      ,   9.16666667,   9.58333333,  10.        ])

In [189]:
np.logspace(0,10,10, base=e)

array([  1.00000000e+00,   3.03773178e+00,   9.22781435e+00,
         2.80316249e+01,   8.51525577e+01,   2.58670631e+02,
         7.85771994e+02,   2.38696456e+03,   7.25095809e+03,
         2.20264658e+04])

In [190]:
x, y = np.mgrid[0:5, 0:5]
x

array([[0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4]])

In [191]:
y

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [192]:
from numpy import random

random.rand(5,5)

array([[ 0.3929095 ,  0.28754676,  0.16813711,  0.62587683,  0.49427862],
       [ 0.66516796,  0.14704125,  0.41794746,  0.78291295,  0.09686488],
       [ 0.78322828,  0.58392262,  0.4857383 ,  0.46220178,  0.05291539],
       [ 0.9307325 ,  0.97195014,  0.94158599,  0.21987834,  0.16793632],
       [ 0.24437609,  0.80710058,  0.60591645,  0.03140475,  0.81070446]])

In [193]:
random.randn(5,5)

array([[ 0.2078565 ,  1.01966015,  0.08681706,  0.41149333, -0.68538716],
       [-0.76924021,  0.6624104 , -0.09454701,  0.91249079, -0.13200543],
       [ 0.38915614,  1.28111091,  0.40633935,  0.25769852,  0.46664445],
       [-0.39525387, -0.718634  , -1.24476756, -2.22379595, -0.14998114],
       [ 0.13763369, -0.06745029, -0.53532316,  1.2152198 , -0.20333796]])

In [194]:
np.diag([1,2,3])

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [195]:
# slicing works just like with lists
A = np.array([1,2,3,4,5])
A[1:3]

array([2, 3])

In [196]:
A = np.array( [ [ n + m*10 for n in range(5) ] for m in range(5)] )
A

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])

In [197]:
row_indices = [1,3,4]
A[row_indices]

array([[10, 11, 12, 13, 14],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])

In [198]:
# indes masking
B = np.array( [n for n in range(5)])
row_mask = np.array([True, False, True, False, False])
B[row_mask]

array([0, 2])

## Linear Algebra

In [199]:
v1 = np.arange(0,5)
v1 + 2

array([2, 3, 4, 5, 6])

In [200]:
v1 * 2

array([0, 2, 4, 6, 8])

In [201]:
v1 * v1

array([ 0,  1,  4,  9, 16])

In [202]:
np.dot(v1,v1)

30

In [203]:
v2 = np.ones(5)
A, v2, np.dot(A,v2)

(array([[ 0,  1,  2,  3,  4],
        [10, 11, 12, 13, 14],
        [20, 21, 22, 23, 24],
        [30, 31, 32, 33, 34],
        [40, 41, 42, 43, 44]]),
 array([ 1.,  1.,  1.,  1.,  1.]),
 array([  10.,   60.,  110.,  160.,  210.]))

In [204]:
# cast changes behavior of + - * etc. to use matrix algebra
M = np.matrix(A)
M * M

matrix([[ 300,  310,  320,  330,  340],
        [1300, 1360, 1420, 1480, 1540],
        [2300, 2410, 2520, 2630, 2740],
        [3300, 3460, 3620, 3780, 3940],
        [4300, 4510, 4720, 4930, 5140]])

In [205]:
C = np.matrix([[1j, 2j],[3j, 4j]])
C

matrix([[ 0.+1.j,  0.+2.j],
        [ 0.+3.j,  0.+4.j]])

In [206]:
np.conjugate(C)

matrix([[ 0.-1.j,  0.-2.j],
        [ 0.-3.j,  0.-4.j]])

In [207]:
# inverse matrix
C.I

matrix([[ 0.+2.j ,  0.-1.j ],
        [ 0.-1.5j,  0.+0.5j]])

## Statistics


In [208]:
A, np.mean(A[:,3])

(array([[ 0,  1,  2,  3,  4],
        [10, 11, 12, 13, 14],
        [20, 21, 22, 23, 24],
        [30, 31, 32, 33, 34],
        [40, 41, 42, 43, 44]]), 23.0)

In [209]:
np.std(A[:,3]), np.var(A[:,3])

(14.142135623730951, 200.0)

In [210]:
A[:,3].min(), A[:,3].max()

(3, 43)

In [211]:
d = np.arange(1,11)
np.sum(d), np.prod(d)

(55, 3628800)

In [212]:
np.cumsum(d)

array([ 1,  3,  6, 10, 15, 21, 28, 36, 45, 55], dtype=int32)

In [213]:
np.cumprod(d)

array([      1,       2,       6,      24,     120,     720,    5040,
         40320,  362880, 3628800], dtype=int32)

In [214]:
np.trace(A)

110

In [215]:
m = random.rand(3,3)
m

array([[ 0.36140459,  0.58724261,  0.76600206],
       [ 0.73333442,  0.11134117,  0.91554426],
       [ 0.86884017,  0.61781494,  0.46333611]])

In [216]:
m.max(), m.max(axis=0)

(0.91554426333357808, array([ 0.86884017,  0.61781494,  0.91554426]))

In [217]:
A

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])

In [218]:
n, m = A.shape
B = A.reshape((1,n*m))
B

array([[ 0,  1,  2,  3,  4, 10, 11, 12, 13, 14, 20, 21, 22, 23, 24, 30, 31,
        32, 33, 34, 40, 41, 42, 43, 44]])

In [219]:
B[0,0:5] = 5
B

array([[ 5,  5,  5,  5,  5, 10, 11, 12, 13, 14, 20, 21, 22, 23, 24, 30, 31,
        32, 33, 34, 40, 41, 42, 43, 44]])

In [220]:
# also change
A

array([[ 5,  5,  5,  5,  5],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [40, 41, 42, 43, 44]])

In [221]:
# creates a copy
B = A.flatten()
B

array([ 5,  5,  5,  5,  5, 10, 11, 12, 13, 14, 20, 21, 22, 23, 24, 30, 31,
       32, 33, 34, 40, 41, 42, 43, 44])

In [222]:
# can insert a dimension in an array
v = np.array([1,2,3])
v[:,newaxis], v[:,newaxis].shape, v[newaxis,:].shape

(array([[1],
        [2],
        [3]]), (3, 1), (1, 3))

In [223]:
np.repeat(v,3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [224]:
np.tile(v,3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [225]:
w = np.array([5,6])
np.concatenate((v,w), axis=0)

array([1, 2, 3, 5, 6])