### 기계학습 모델(Machine Learning Models)
  * 질적(qualitative) 값, 수적/양적 값(quantitative)
    * 질적 값: 분류(classification)
    * 양적 값: 회귀(regression)
  * 손실함수(loss function): 입력값 x에 따라 결정되는 예측값 y가 실제 측정된 결과값의 y와 얼마나 떨어져 있는가 계산

#### 지도학습 모델 종류
  * 결정트리(decision tree)
  * 최근접 이웃 탐색(nearest neighbor search)
  * 서포트 벡터 머신(SVM: support vector machine)
  * 선형 회귀(linear regression)

In [1]:
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()
print(iris.feature_names)
print(iris.target_names)
print(iris.data[0])
print(iris.target[0])

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']
[5.1 3.5 1.4 0.2]
0


In [3]:
for idx, t in enumerate(iris.target):
    print(f'Example {idx}: label {t}, features {iris.data[idx]}')

Example 0: label 0, features [5.1 3.5 1.4 0.2]
Example 1: label 0, features [4.9 3.  1.4 0.2]
Example 2: label 0, features [4.7 3.2 1.3 0.2]
Example 3: label 0, features [4.6 3.1 1.5 0.2]
Example 4: label 0, features [5.  3.6 1.4 0.2]
Example 5: label 0, features [5.4 3.9 1.7 0.4]
Example 6: label 0, features [4.6 3.4 1.4 0.3]
Example 7: label 0, features [5.  3.4 1.5 0.2]
Example 8: label 0, features [4.4 2.9 1.4 0.2]
Example 9: label 0, features [4.9 3.1 1.5 0.1]
Example 10: label 0, features [5.4 3.7 1.5 0.2]
Example 11: label 0, features [4.8 3.4 1.6 0.2]
Example 12: label 0, features [4.8 3.  1.4 0.1]
Example 13: label 0, features [4.3 3.  1.1 0.1]
Example 14: label 0, features [5.8 4.  1.2 0.2]
Example 15: label 0, features [5.7 4.4 1.5 0.4]
Example 16: label 0, features [5.4 3.9 1.3 0.4]
Example 17: label 0, features [5.1 3.5 1.4 0.3]
Example 18: label 0, features [5.7 3.8 1.7 0.3]
Example 19: label 0, features [5.1 3.8 1.5 0.3]
Example 20: label 0, features [5.4 3.4 1.7 0.2]
Ex

In [4]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn import tree

In [5]:
iris = load_iris()
test = [0, 50, 100]

# test data를 제거하여 train 생성, axis=0: 행 단위 삭제r
train_data = np.delete(iris.data, test, axis=0)
train_target = np.delete(iris.target, test)

test_data = iris.data[test]
test_target = iris.target[test]

# 결정트리 생성
clf = tree.DecisionTreeClassifier()
clf.fit(train_data, train_target)

# 출력
print(test_target)
print(clf.predict(test_data))

[0 1 2]
[0 1 2]


In [6]:
a = np.array([[1, 2], [3, 4]])
print(a.flatten())
# print(a.flatten('C'))

[1 2 3 4]


In [7]:
print(a.flatten('F'))

[1 3 2 4]


In [8]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn import tree

In [9]:
iris = load_iris()
test_idx = [0, 50, 100]
train_data = np.delete(iris.data, test_idx, axis=0)
print(train_data)

[[4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.2]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.6 1.4 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.9 1.5]
 [5.5 2.3 4.  1.3]
 [6.5 2.8 4.

In [10]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn import tree

In [12]:
iris = load_iris()

iris.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [13]:
# 버전이 바뀌어서 책 내용과 차이가 있는 것 같음
test_idx = [True, False, False, False]
train_data = np.delete(iris.data, test_idx, axis=1)
print(train_data.shape)
print(train_data)

(150, 3)
[[3.5 1.4 0.2]
 [3.  1.4 0.2]
 [3.2 1.3 0.2]
 [3.1 1.5 0.2]
 [3.6 1.4 0.2]
 [3.9 1.7 0.4]
 [3.4 1.4 0.3]
 [3.4 1.5 0.2]
 [2.9 1.4 0.2]
 [3.1 1.5 0.1]
 [3.7 1.5 0.2]
 [3.4 1.6 0.2]
 [3.  1.4 0.1]
 [3.  1.1 0.1]
 [4.  1.2 0.2]
 [4.4 1.5 0.4]
 [3.9 1.3 0.4]
 [3.5 1.4 0.3]
 [3.8 1.7 0.3]
 [3.8 1.5 0.3]
 [3.4 1.7 0.2]
 [3.7 1.5 0.4]
 [3.6 1.  0.2]
 [3.3 1.7 0.5]
 [3.4 1.9 0.2]
 [3.  1.6 0.2]
 [3.4 1.6 0.4]
 [3.5 1.5 0.2]
 [3.4 1.4 0.2]
 [3.2 1.6 0.2]
 [3.1 1.6 0.2]
 [3.4 1.5 0.4]
 [4.1 1.5 0.1]
 [4.2 1.4 0.2]
 [3.1 1.5 0.2]
 [3.2 1.2 0.2]
 [3.5 1.3 0.2]
 [3.6 1.4 0.1]
 [3.  1.3 0.2]
 [3.4 1.5 0.2]
 [3.5 1.3 0.3]
 [2.3 1.3 0.3]
 [3.2 1.3 0.2]
 [3.5 1.6 0.6]
 [3.8 1.9 0.4]
 [3.  1.4 0.3]
 [3.8 1.6 0.2]
 [3.2 1.4 0.2]
 [3.7 1.5 0.2]
 [3.3 1.4 0.2]
 [3.2 4.7 1.4]
 [3.2 4.5 1.5]
 [3.1 4.9 1.5]
 [2.3 4.  1.3]
 [2.8 4.6 1.5]
 [2.8 4.5 1.3]
 [3.3 4.7 1.6]
 [2.4 3.3 1. ]
 [2.9 4.6 1.3]
 [2.7 3.9 1.4]
 [2.  3.5 1. ]
 [3.  4.2 1.5]
 [2.2 4.  1. ]
 [2.9 4.7 1.4]
 [2.9 3.6 1.3]
 [3.1 4.4 1.4]
 

In [14]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn import tree

In [15]:
iris = load_iris()
test_idx = [0, 50, 100]

train_target = np.delete(iris.target, test_idx)
train_data = np.delete(iris.data, test_idx)

print(train_data.shape)

print(''.join([f'{idx}th, {e}\n' for idx, e in enumerate(train_data)]))

(597,)
0th, 3.5
1th, 1.4
2th, 0.2
3th, 4.9
4th, 3.0
5th, 1.4
6th, 0.2
7th, 4.7
8th, 3.2
9th, 1.3
10th, 0.2
11th, 4.6
12th, 3.1
13th, 1.5
14th, 0.2
15th, 5.0
16th, 3.6
17th, 1.4
18th, 0.2
19th, 5.4
20th, 3.9
21th, 1.7
22th, 0.4
23th, 4.6
24th, 3.4
25th, 1.4
26th, 0.3
27th, 5.0
28th, 3.4
29th, 1.5
30th, 0.2
31th, 4.4
32th, 2.9
33th, 1.4
34th, 0.2
35th, 4.9
36th, 3.1
37th, 1.5
38th, 0.1
39th, 5.4
40th, 3.7
41th, 1.5
42th, 0.2
43th, 4.8
44th, 3.4
45th, 1.6
46th, 0.2
47th, 4.8
48th, 3.0
49th, 0.1
50th, 4.3
51th, 3.0
52th, 1.1
53th, 0.1
54th, 5.8
55th, 4.0
56th, 1.2
57th, 0.2
58th, 5.7
59th, 4.4
60th, 1.5
61th, 0.4
62th, 5.4
63th, 3.9
64th, 1.3
65th, 0.4
66th, 5.1
67th, 3.5
68th, 1.4
69th, 0.3
70th, 5.7
71th, 3.8
72th, 1.7
73th, 0.3
74th, 5.1
75th, 3.8
76th, 1.5
77th, 0.3
78th, 5.4
79th, 3.4
80th, 1.7
81th, 0.2
82th, 5.1
83th, 3.7
84th, 1.5
85th, 0.4
86th, 4.6
87th, 3.6
88th, 1.0
89th, 0.2
90th, 5.1
91th, 3.3
92th, 1.7
93th, 0.5
94th, 4.8
95th, 3.4
96th, 1.9
97th, 0.2
98th, 3.0
99th, 1.6
100