# Dataset
[sklearn.datasets](https://scikit-learn.org/stable/datasets/index.html)

### 学習データとテストデータの分割

In [1]:
import random
import numpy as np
from sklearn import datasets

random.seed(0)

iris = datasets.load_iris()

shuffle_idx = np.arange(len(iris.data))  # [0..150]データ数と同じサイズのインデックスのリスト
random.shuffle(shuffle_idx)  # 順序をランダムに入れ替え

# dataとtargetをshuffle_idxに従って並び替え
# dataとtargetは対応しているので同じ順序で並び替える (個別にシャッフルしない)
iris_data = iris.data[shuffle_idx]
iris_target = iris.target[shuffle_idx]

# 10%をテストデータに
test_num = int(len(iris_data) * 0.1)

iris_test_data = iris_data[:test_num]
iris_test_target = iris_target[:test_num]

iris_train_data = iris_data[test_num:]
iris_train_target = iris_target[test_num:]

print(f"iris: train({len(iris_train_data)}) test({len(iris_test_data)})")

# 上と同じようにdigitsも
digits = datasets.load_digits()

shuffle_idx = np.arange(len(digits.data))
random.shuffle(shuffle_idx)

digits_data = digits.data[shuffle_idx]
digits_target = digits.target[shuffle_idx]

test_num = int(len(digits_data) * 0.1)

digits_test_data = digits_data[:test_num]
digits_test_target = digits_target[:test_num]

digits_train_data = digits_data[test_num:]
digits_train_target = digits_target[test_num:]

print(f"digits: train({len(digits_train_data)}) test({len(digits_test_data)})")

iris: train(135) test(15)
digits: train(1618) test(179)


# SVM(support vector machine, サポートベクトルマシン)

[sklearn.svm.LinearSVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC)

[sklearn.svm.SVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)

[User Guide](https://scikit-learn.org/stable/modules/svm.html#svm-classification)

In [2]:
from sklearn.svm import LinearSVC

model = LinearSVC()
model.fit(iris_train_data, iris_train_target)

print(model.score(iris_test_data, iris_test_target))

1.0




In [3]:
model = LinearSVC()
model.fit(digits_train_data, digits_train_target)

print(model.score(digits_test_data, digits_test_target))

0.9553072625698324




In [4]:
from sklearn.svm import SVC

for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
  print(kernel)
  model = SVC(kernel=kernel)
  model.fit(iris_train_data, iris_train_target)
  print(model.score(iris_test_data, iris_test_target))

linear
1.0
poly
1.0
rbf
1.0
sigmoid
0.26666666666666666


In [5]:
from sklearn.svm import SVC

for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
  print(kernel)
  model = SVC(kernel=kernel)
  model.fit(digits_train_data, digits_train_target)
  print(model.score(digits_test_data, digits_test_target))

linear
0.9776536312849162
poly
0.9888268156424581
rbf
0.9888268156424581
sigmoid
0.9217877094972067
