In [1]:
import pandas
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(url, names=names)

In [2]:
from sklearn import model_selection
# Split-out validation dataset
array = dataset.values
X = array[:,0:4]
Y = array[:,4]
validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [3]:
# Test options and evaluation metric
seed = 7
scoring = 'accuracy'

In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
# Spot Check Algorithms
models = []
models.append(('NB', GaussianNB()))
models.append(('Decision Tree', DecisionTreeClassifier()))
models.append(('Stochastic Gradient Descent', SGDClassifier()))
# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, random_state=seed, shuffle=True)
    cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

NB: 0.966667 (0.040825)
Decision Tree: 0.958333 (0.076830)
Stochastic Gradient Descent: 0.833333 (0.134371)


In [5]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
# Make predictions on validation dataset
nb = GaussianNB()
nb.fit(X_train, Y_train)
predictions = nb.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))

0.8333333333333334
[[7 0 0]
 [0 9 3]
 [0 2 9]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         7
Iris-versicolor       0.82      0.75      0.78        12
 Iris-virginica       0.75      0.82      0.78        11

       accuracy                           0.83        30
      macro avg       0.86      0.86      0.86        30
   weighted avg       0.84      0.83      0.83        30



In [6]:
classification_report(Y_validation, predictions)

'                 precision    recall  f1-score   support\n\n    Iris-setosa       1.00      1.00      1.00         7\nIris-versicolor       0.82      0.75      0.78        12\n Iris-virginica       0.75      0.82      0.78        11\n\n       accuracy                           0.83        30\n      macro avg       0.86      0.86      0.86        30\n   weighted avg       0.84      0.83      0.83        30\n'

In [7]:
confusion_matrix(Y_validation, predictions)

array([[7, 0, 0],
       [0, 9, 3],
       [0, 2, 9]], dtype=int64)