In [13]:
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
# Irisデータセットをロード
iris = datasets.load_iris()
# k-NNのインスタンスを生成
knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
# クロスバリデーション実行
scores = cross_val_score(knn, iris.data, iris.target, cv=5)

print("Cross-validation scores: {}".format(scores))
print("Average cross-validation score: {:.2f}".format(scores.mean()))


Cross-validation scores: [0.96666667 1.         0.93333333 0.96666667 1.        ]
Average cross-validation score: 0.97


In [12]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
# Irisデータセットをロード
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
# トレーニングデータとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
# k-NNのインスタンスを生成
knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
# k-NNのモデル学習
knn.fit(X_train, y_train)
# スコア表示
print("Accuracy on training set: {:.3f}".format(knn.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(knn.score(X_test, y_test)))

Accuracy on training set: 0.962
Accuracy on test set: 0.978


In [14]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
# Irisデータセットをロード
iris = datasets.load_iris()
# 1、2列目の特徴量を抽出
X = iris.data[:, [0, 1]]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
# k-NNのインスタンスを生成
knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
# k-NNのモデル学習
knn.fit(X_train, y_train)
pred = knn.predict(X_test)
print("Accuracy: {:.3f}".format(accuracy_score(y_test, pred)))
print("Confusion matrix:¥n{}".format(confusion_matrix(y_test, pred)))

from sklearn.metrics import classification_report
print(classification_report(y_test, pred))

Accuracy: 0.711
Confusion matrix:¥n[[15  0  0]
 [ 0  7  8]
 [ 0  5 10]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.58      0.47      0.52        15
           2       0.56      0.67      0.61        15

    accuracy                           0.71        45
   macro avg       0.71      0.71      0.71        45
weighted avg       0.71      0.71      0.71        45



In [17]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
from scipy.stats import sem
from sklearn.metrics import classification_report

# Irisデータセットをロード
iris = datasets.load_iris()

# 決定木の深さの指定 max_depthは任意の整数
clf = DecisionTreeClassifier(max_depth = 5)
score = cross_val_score(estimator = clf, X = iris.data, y = iris.target, cv = 5)
clf.fit(X,y)
pred = clf.predict(X)

print(score)
print ("Mean score: {} (+/-{})".format( np.mean (score), sem(score)))
print(classification_report(y,pred ))



[0.96666667 0.96666667 0.9        1.         1.        ]
Mean score: 0.9666666666666668 (+/-0.018257418583505533)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       0.79      0.74      0.76        50
           2       0.75      0.80      0.78        50

    accuracy                           0.85       150
   macro avg       0.85      0.85      0.85       150
weighted avg       0.85      0.85      0.85       150



In [42]:
from sklearn.datasets import load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

import random

if __name__ == '__main__':

    # データセットを読み込む
    iris = load_iris()
    x = iris.data
    y = iris.target

    # 読み込んだデータセットをシャッフルする
    p = list(zip(x, y))
    random.shuffle(p)
    x, y = zip(*p)

    # # 学習データの件数を指定する
    # train_size = 100
    # test_size = len(x) - train_size

    # データセットを学習データとテストデータに分割する
    train_x = x[:train_size]
    train_y = y[:train_size]
    test_x = x[train_size:]
    test_y = y[train_size:]

    # ニューラルネットワークの学習を行う
    mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=200)
    mlp.fit(x,y)
    # pred = mlp.predict(x)

    # 学習させたモデルを使ってテストデータに対する予測を出力する
    count = 0
    pred = mlp.predict(test_x)
    for i in range(test_size):
        #print('[{0}] correct:{1}, predict:{2}'.format(i, test_y[i], pred[i]))
        if pred[i] == test_y[i]:
            count += 1

    # 予測結果から正答率を算出する
    score = float(count) / test_size
    print('{0} / {1} = {2}'.format(count, test_size, score))
    print(classification_report(test_y,pred))


48 / 50 = 0.96
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        18
           1       1.00      0.87      0.93        15
           2       0.89      1.00      0.94        17

    accuracy                           0.96        50
   macro avg       0.96      0.96      0.96        50
weighted avg       0.96      0.96      0.96        50





In [6]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np

# Irisデータセットをロード
iris = datasets.load_iris()
# 1、2列目の特徴量を抽出
X = iris.data[:, [0, 1]]
# クラスラベルを取得
y = iris.target
# トレーニングデータとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
# 決定木の生成
tree = DecisionTreeClassifier(max_depth=4, random_state=0)
tree.fit(X_train, y_train)
print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test)))

Accuracy on training set: 0.876
Accuracy on test set: 0.689
