In [2]:
def pprint(arr):
    print("type : {}".format(type(arr)))
    print("shape : {}, dimension : {}, dtype : {}".format(arr.shape, 
                                                          arr.ndim, 
                                                          arr.dtype))
    print("Array’s Data :\n", arr)

In [3]:
import numpy as np

In [4]:
arr = [1, 2, 3]
a = np.array(arr)
pprint(a)

type : <class 'numpy.ndarray'>
shape : (3,), dimension : 1, dtype : int32
Array’s Data :
 [1 2 3]


In [5]:
arr = [[1, 2, 3], [4, 5, 6]]
a = np.array(arr, dtype = float)
pprint(a)

type : <class 'numpy.ndarray'>
shape : (2, 3), dimension : 2, dtype : float64
Array’s Data :
 [[1. 2. 3.]
 [4. 5. 6.]]


In [6]:
arr = [[[1, 2, 3], [4, 5, 6]], [[3, 2, 1], [4, 5, 6]]]
a = np.array(arr, dtype = float)
pprint(a)

type : <class 'numpy.ndarray'>
shape : (2, 2, 3), dimension : 3, dtype : float64
Array’s Data :
 [[[1. 2. 3.]
  [4. 5. 6.]]

 [[3. 2. 1.]
  [4. 5. 6.]]]


In [7]:
a = np.identity(4, dtype = int)
b = np.eye(4, 4, k=1, dtype=int)
print(a)
print(b)

[[1 0 0 0]
 [0 1 0 0]
 [0 0 1 0]
 [0 0 0 1]]
[[0 1 0 0]
 [0 0 1 0]
 [0 0 0 1]
 [0 0 0 0]]


In [8]:
# 임의의 값을 이용하여 Numpy 배열 생성
data = np.random.randn(2, 3)
data

array([[ 0.00247255, -1.12723231,  0.57165483],
       [-0.22749753, -0.50531839, -0.08728507]])

In [9]:
# 배열 차원의 크기
data.shape

(2, 3)

In [10]:
# 배열에 저장된 데이터의 형
data.dtype

dtype('float64')

### ndarray 생성

In [15]:
data1 = [6, 7, 8, 0, 1]
arr1 = np.array(data1)
arr1

array([6, 7, 8, 0, 1])

In [12]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [16]:
# 배열의 차원 확인
arr1.ndim

1

In [14]:
arr2.ndim

2

In [17]:
type(arr2)

numpy.ndarray

In [18]:
arr2.dtype

dtype('int32')

In [19]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [20]:
np.zeros((3, 6)) # 차원은 튜플 값

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [21]:
np.zeros((2, 3, 4))

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [22]:
# 파이썬의 range()
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [23]:
# dtype
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)

In [24]:
# astype() : 자료형을 캐스팅
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int32')

In [25]:
float_arr = arr.astype(np.float64)
float_arr.dtype

dtype('float64')

In [26]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])

In [27]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10])

In [28]:
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings

array([b'1.25', b'-9.6', b'42'], dtype='|S4')

In [29]:
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

### Numpy 배열의 산술연산

In [43]:
arr = np.array([[1, 2, 3], [4, 5, 6]])

In [39]:
arr * arr

array([  1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144, 169,
       196])

In [40]:
arr - arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [41]:
1 / arr

array([1.        , 0.5       , 0.33333333, 0.25      , 0.2       ,
       0.16666667, 0.14285714, 0.125     , 0.11111111, 0.1       ,
       0.09090909, 0.08333333, 0.07692308, 0.07142857])

In [42]:
arr ** 0.5

array([1.        , 1.41421356, 1.73205081, 2.        , 2.23606798,
       2.44948974, 2.64575131, 2.82842712, 3.        , 3.16227766,
       3.31662479, 3.46410162, 3.60555128, 3.74165739])

In [44]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [45]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

### 인덱싱과 슬라이싱

In [46]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [47]:
arr[5]

5

In [48]:
#[start:end]
arr[5:8]

array([5, 6, 7])

In [49]:
arr[5:8] = 12

In [50]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [51]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [52]:
# 데이터 복사가 아니라 이름만 다른 같은 데이터
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [53]:
# [:] 전체 요소 - 많은 데이터 복사 -> 공간 낭비 : 복사는 copy
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [54]:
# 다차원 배열의 색인
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [55]:
arr2d[2] # 1차원 배열 반환

array([7, 8, 9])

In [56]:
arr2d[0][2]

3

In [57]:
# [행][열] or [row start:end, column start:end]
arr2d[0, 2]

3

In [58]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [59]:
arr3d[0] # 2차원 배열

array([[1, 2, 3],
       [4, 5, 6]])

In [60]:
old_value = arr3d[0].copy() # 원본 보존, 사본 만들기 위함
arr3d[0] = 42

In [61]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [62]:
old_value

array([[1, 2, 3],
       [4, 5, 6]])

In [63]:
arr3d[0] = old_value
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [64]:
arr3d[1, 0]

array([7, 8, 9])

In [65]:
x = arr3d[1]
x

array([[ 7,  8,  9],
       [10, 11, 12]])

In [66]:
x[0]

array([7, 8, 9])

In [67]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [68]:
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [69]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [70]:
arr[:2]

array([0, 1])

In [71]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [72]:
arr2d[1, :2]

array([4, 5])

In [73]:
arr2d[:, :]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Boolean 배열을 이용하기

In [86]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
print(names)
print(data)

['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
[[-1.76935709  1.06044499 -1.07976478 -0.80865172]
 [-0.70188399  0.48422799  0.18736798  1.35515717]
 [-0.77406131 -0.6343232  -1.08314498 -1.64222976]
 [ 1.29333515  0.88633534  1.22450347 -1.95588394]
 [-0.63855254 -0.82503141  0.68420141 -1.16899012]
 [ 2.07320978 -1.36018802 -0.02429293  1.68881688]
 [-0.82102647 -1.84299648  1.96670765  0.29153259]]


In [87]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [88]:
# [조건식], [범위], [행, 열], [row_start:row_end, col_start:col_end]
data[names=='Bob']

array([[-1.76935709,  1.06044499, -1.07976478, -0.80865172],
       [ 1.29333515,  0.88633534,  1.22450347, -1.95588394]])

In [89]:
data[names=='Bob', 2:]

array([[-1.07976478, -0.80865172],
       [ 1.22450347, -1.95588394]])

In [90]:
# != or ~
# 'Bob'이 아닌 요소를 선택하는 작업
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [91]:
data[~(names == 'Bob')]

array([[-0.70188399,  0.48422799,  0.18736798,  1.35515717],
       [-0.77406131, -0.6343232 , -1.08314498, -1.64222976],
       [-0.63855254, -0.82503141,  0.68420141, -1.16899012],
       [ 2.07320978, -1.36018802, -0.02429293,  1.68881688],
       [-0.82102647, -1.84299648,  1.96670765,  0.29153259]])

In [92]:
# 논리연산자 : &(and), |(or)
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [93]:
data[mask]

array([[-1.76935709,  1.06044499, -1.07976478, -0.80865172],
       [-0.77406131, -0.6343232 , -1.08314498, -1.64222976],
       [ 1.29333515,  0.88633534,  1.22450347, -1.95588394],
       [-0.63855254, -0.82503141,  0.68420141, -1.16899012]])

In [94]:
# data의 요소 중 음수인 것은 모두 0으로 변환
data[data < 0] = 0
data

array([[0.        , 1.06044499, 0.        , 0.        ],
       [0.        , 0.48422799, 0.18736798, 1.35515717],
       [0.        , 0.        , 0.        , 0.        ],
       [1.29333515, 0.88633534, 1.22450347, 0.        ],
       [0.        , 0.        , 0.68420141, 0.        ],
       [2.07320978, 0.        , 0.        , 1.68881688],
       [0.        , 0.        , 1.96670765, 0.29153259]])

In [95]:
data[names != 'Joe'] = 7

In [96]:
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.48422799, 0.18736798, 1.35515717],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [2.07320978, 0.        , 0.        , 1.68881688],
       [0.        , 0.        , 1.96670765, 0.29153259]])

In [97]:
arr = np.empty((8, 4))

for i in range(8):
    arr[i] = i

arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [98]:
arr[4]

array([4., 4., 4., 4.])

In [99]:
arr[[4, 3, 0, 6]] # 여러 행 출력

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [100]:
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [102]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [103]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [104]:
# [행, 열]
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [105]:
# 전치행렬(행과 열의 크기를 바꾸는 행렬)
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [106]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [107]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print(A)
print(B)

[[1 2]
 [3 4]]
[[5 6]
 [7 8]]


In [108]:
print(A + B)
print(A * B)

[[ 6  8]
 [10 12]]
[[ 5 12]
 [21 32]]


In [109]:
# 행렬의 곱
np.dot(A, B)

array([[19, 22],
       [43, 50]])

### 유니버셜 함수 : 배열의 각 원소를 빠르게 처리하는 함수
- ufunc 라고 불리기도 함

In [110]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [111]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [112]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [113]:
x = np.random.randn(8)
y = np.random.randn(8)
print(x)
print(y)

[-0.18573565 -1.09018123 -1.06168591  0.06203339 -0.33083497  1.79592011
 -1.30537012 -1.16430336]
[ 8.78201372e-01  2.88444523e-01 -1.25281824e+00  8.17068324e-01
  5.47880799e-01 -8.24099531e-01 -1.97165486e-03 -2.15258901e+00]


In [114]:
np.maximum(x,y)

array([ 0.87820137,  0.28844452, -1.06168591,  0.81706832,  0.5478808 ,
        1.79592011, -0.00197165, -1.16430336])

### 정렬

In [115]:
arr = np.random.randn(8)

In [116]:
arr.sort()
arr

array([-0.74821052, -0.06734334,  0.28291286,  0.69627552,  0.90761454,
        1.12069649,  1.19423942,  1.21491687])

In [117]:
# np.unique()
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

In [118]:
ints = np.array([3, 3, 3, 3, 4, 5, 5, 6, 1, 1, 2, 5])
np.unique(ints)

array([1, 2, 3, 4, 5, 6])

In [119]:
sorted(set(names))

['Bob', 'Joe', 'Will']