## 파이썬 데이터분석
# NUMPY 심화

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 비교식
- ```> < >= <= == !=```
- 0 은 False, 1은 True, 0이 아닌 숫자는 True

In [2]:
a = np.arange(12).reshape(4,3)
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [3]:
a> 5

array([[False, False, False],
       [False, False, False],
       [ True,  True,  True],
       [ True,  True,  True]])

In [4]:
(a%2) == 0

array([[ True, False,  True],
       [False,  True, False],
       [ True, False,  True],
       [False,  True, False]])

In [6]:
a.astype('bool')

array([[False,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [8]:
b = (a>5)
b.astype('int')

array([[0, 0, 0],
       [0, 0, 0],
       [1, 1, 1],
       [1, 1, 1]])

In [9]:
(a>5).sum() # True 는 1, False 는 0 으로 계산함 (결과는 True 의 갯수임)

6

In [10]:
b = np.arange(11,-1,-1).reshape(4,3)
b

array([[11, 10,  9],
       [ 8,  7,  6],
       [ 5,  4,  3],
       [ 2,  1,  0]])

In [11]:
a>b

array([[False, False, False],
       [False, False, False],
       [ True,  True,  True],
       [ True,  True,  True]])

In [12]:
(a>5)*((a%2)==0)

array([[False, False, False],
       [False, False, False],
       [ True, False,  True],
       [False,  True, False]])

### 원본 데이터를 수정할 때는 copy() 하자

In [13]:
a = np.arange(12).reshape(4,3)
b = a.copy()
b[0,0] = -1
display(a,b)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

array([[-1,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [15]:
a = np.arange(12).reshape(4,3)
b = a[1:3, 1:3].copy() # 원본이 수정되면 안되는 경우 copy()
b[0,0] = -1
display(a,b)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

array([[-1,  5],
       [ 7,  8]])

### 고급 검색 방법

In [16]:
data = pd.read_csv('iris.csv')
iris = data.iloc[:, :4].values
iris

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [17]:
iris.shape

(150, 4)

In [18]:
iris[0] # 첫번째 행

array([5.1, 3.5, 1.4, 0.2])

In [19]:
iris[:, 0] # 첫번째 열

array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,
       6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,
       6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,
       6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,
       6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,
       6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,
       7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,
       7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,
       6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])

In [20]:
iris[ [0,1] ] # 첫번째와 두번째 행

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2]])

In [21]:
iris[:, [0,1] ] # 첫번째와 두번째 열

array([[5.1, 3.5],
       [4.9, 3. ],
       [4.7, 3.2],
       [4.6, 3.1],
       [5. , 3.6],
       [5.4, 3.9],
       [4.6, 3.4],
       [5. , 3.4],
       [4.4, 2.9],
       [4.9, 3.1],
       [5.4, 3.7],
       [4.8, 3.4],
       [4.8, 3. ],
       [4.3, 3. ],
       [5.8, 4. ],
       [5.7, 4.4],
       [5.4, 3.9],
       [5.1, 3.5],
       [5.7, 3.8],
       [5.1, 3.8],
       [5.4, 3.4],
       [5.1, 3.7],
       [4.6, 3.6],
       [5.1, 3.3],
       [4.8, 3.4],
       [5. , 3. ],
       [5. , 3.4],
       [5.2, 3.5],
       [5.2, 3.4],
       [4.7, 3.2],
       [4.8, 3.1],
       [5.4, 3.4],
       [5.2, 4.1],
       [5.5, 4.2],
       [4.9, 3.1],
       [5. , 3.2],
       [5.5, 3.5],
       [4.9, 3.1],
       [4.4, 3. ],
       [5.1, 3.4],
       [5. , 3.5],
       [4.5, 2.3],
       [4.4, 3.2],
       [5. , 3.5],
       [5.1, 3.8],
       [4.8, 3. ],
       [5.1, 3.8],
       [4.6, 3.2],
       [5.3, 3.7],
       [5. , 3.3],
       [7. , 3.2],
       [6.4, 3.2],
       [6.9,

In [24]:
iris[ [0,2,-3,-1] ]

array([[5.1, 3.5, 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [6.5, 3. , 5.2, 2. ],
       [5.9, 3. , 5.1, 1.8]])

In [25]:
iris[ [1,1,1] ]

array([[4.9, 3. , 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2]])

In [26]:
iris[ [0,0,-1,-1] ]

array([[5.1, 3.5, 1.4, 0.2],
       [5.1, 3.5, 1.4, 0.2],
       [5.9, 3. , 5.1, 1.8],
       [5.9, 3. , 5.1, 1.8]])

In [27]:
iris[:, [True,False,False,True]]

array([[5.1, 0.2],
       [4.9, 0.2],
       [4.7, 0.2],
       [4.6, 0.2],
       [5. , 0.2],
       [5.4, 0.4],
       [4.6, 0.3],
       [5. , 0.2],
       [4.4, 0.2],
       [4.9, 0.1],
       [5.4, 0.2],
       [4.8, 0.2],
       [4.8, 0.1],
       [4.3, 0.1],
       [5.8, 0.2],
       [5.7, 0.4],
       [5.4, 0.4],
       [5.1, 0.3],
       [5.7, 0.3],
       [5.1, 0.3],
       [5.4, 0.2],
       [5.1, 0.4],
       [4.6, 0.2],
       [5.1, 0.5],
       [4.8, 0.2],
       [5. , 0.2],
       [5. , 0.4],
       [5.2, 0.2],
       [5.2, 0.2],
       [4.7, 0.2],
       [4.8, 0.2],
       [5.4, 0.4],
       [5.2, 0.1],
       [5.5, 0.2],
       [4.9, 0.1],
       [5. , 0.2],
       [5.5, 0.2],
       [4.9, 0.1],
       [4.4, 0.2],
       [5.1, 0.2],
       [5. , 0.3],
       [4.5, 0.3],
       [4.4, 0.2],
       [5. , 0.6],
       [5.1, 0.4],
       [4.8, 0.3],
       [5.1, 0.2],
       [4.6, 0.2],
       [5.3, 0.2],
       [5. , 0.2],
       [7. , 1.4],
       [6.4, 1.5],
       [6.9,

In [28]:
a = np.arange(12).reshape(4,3)
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [29]:
a[ [True,False,False,True] ]

array([[ 0,  1,  2],
       [ 9, 10, 11]])

In [30]:
a[:, [True,False,True]]

array([[ 0,  2],
       [ 3,  5],
       [ 6,  8],
       [ 9, 11]])

In [31]:
a[0]

array([0, 1, 2])

In [32]:
a[ [0] ]

array([[0, 1, 2]])

In [33]:
a[:, 0]

array([0, 3, 6, 9])

In [34]:
a[:, [0] ]

array([[0],
       [3],
       [6],
       [9]])

### 비교식을 이용한 데이터 검색

In [35]:
iris[iris[:,0]>5] # 첫번째 속성의 값이 5보다 큰 행(샘플)들

array([[5.1, 3.5, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [5.4, 3.7, 1.5, 0.2],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [5.1, 3.3, 1.7, 0.5],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5.1, 3.8, 1.9, 0.4],
       [5.1, 3.8, 1.6, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2

In [36]:
iris[:,0]>5

array([ True, False, False, False, False,  True, False, False, False,
       False,  True, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True, False, False, False,
        True,  True, False, False,  True,  True,  True, False, False,
        True, False, False,  True, False, False, False, False,  True,
       False,  True, False,  True, False,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [38]:
iris[(iris[:,0]>5) & (iris[:,-1]<=0.2)] # & 는 and, | 는 or

array([[5.1, 3.5, 1.4, 0.2],
       [5.4, 3.7, 1.5, 0.2],
       [5.8, 4. , 1.2, 0.2],
       [5.4, 3.4, 1.7, 0.2],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5.1, 3.8, 1.6, 0.2],
       [5.3, 3.7, 1.5, 0.2]])

In [39]:
(iris[:,0]>5) & (iris[:,-1]<=0.2) # ( ) 를 반드시 사용하여야 함

array([ True, False, False, False, False, False, False, False, False,
       False,  True, False, False, False,  True, False, False, False,
       False, False,  True, False, False, False, False, False, False,
        True,  True, False, False, False,  True,  True, False, False,
        True, False, False,  True, False, False, False, False, False,
       False,  True, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

### np.where() 함수
- 조건에 맞는 위치를 알려준다
- 조건에 맞게 값을 할당한다

In [40]:
a = np.arange(12).reshape(4,3)
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [41]:
np.where(a>5) # 결과는 조건에 맞는 (행번호목록, 열번호목록)

(array([2, 2, 2, 3, 3, 3], dtype=int64),
 array([0, 1, 2, 0, 1, 2], dtype=int64))

In [42]:
np.where((a%2)==0) # 값이 짝수인 목록

(array([0, 0, 1, 2, 2, 3], dtype=int64),
 array([0, 2, 1, 0, 2, 1], dtype=int64))

In [44]:
np.where(a>5, 1, -1) # if a>5 then 1 else -1 의 의미임

array([[-1, -1, -1],
       [-1, -1, -1],
       [ 1,  1,  1],
       [ 1,  1,  1]])

In [45]:
np.where(a>5, a, -1)

array([[-1, -1, -1],
       [-1, -1, -1],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [47]:
np.where(a>5, a%2, 0)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 1, 0],
       [1, 0, 1]])