In [1]:
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

# Load the iris dataset (1936!)- https://archive.ics.uci.edu/ml/datasets/iris
# 150 samples for 3 different types of irises (Setosa, Versicolour and Virginica)
# The rows are the samples and the columns are: Sepal Length, Sepal Width, Petal Length and Petal Width.
dataset = datasets.load_iris()

print(dataset.data.shape)
print(dataset.data[:10])
print(dataset.target.shape)
print(dataset.target[:10])

# Fit a logistic regression model to the data
model = LogisticRegression(solver='liblinear', multi_class='auto')
model.fit(dataset.data, dataset.target)

# Save model for future use
from sklearn.externals import joblib
joblib.dump(model, 'irismodel.pkl')

# Make predictions
expected = dataset.target
predicted = model.predict(dataset.data)

# Display metrics
# Precision measures the impact of false positives: TP/(TP+FP)
# Recall measures the impact of false negatives : TP/(TP+FN)
# F1 is the weighted average of precision and recall: (2*Recall*Precision)/(Recall+Precision)
print(metrics.classification_report(expected, predicted))

# Display confusion matrix
print(metrics.confusion_matrix(expected, predicted))


(150, 4)
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]]
(150,)
[0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       0.98      0.90      0.94        50
           2       0.91      0.98      0.94        50

    accuracy                           0.96       150
   macro avg       0.96      0.96      0.96       150
weighted avg       0.96      0.96      0.96       150

[[50  0  0]
 [ 0 45  5]
 [ 0  1 49]]


