In [None]:
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
# Load the iris dataset (1936!)- https://archive.ics.uci.edu/ml/datasets/iris
# 150 samples for 3 different types of irises (Setosa, Versicolour and Virginica)
# The rows are the samples and the columns are: Sepal Length, Sepal Width, Petal Length and Petal Width.
iris = datasets.load_iris()

print(iris.data.shape)
print(iris.data[:10])
print(iris.target.shape)
print(iris.target[:10])

In [None]:
# Split the data set into training and test
X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target,
        test_size=0.33, random_state=123)

In [None]:
# Fit a logistic regression model to the data
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
model = LogisticRegression(solver='liblinear', multi_class='ovr')
model.fit(X_train, y_train)

# Save model for future use
from sklearn.externals import joblib
joblib.dump(model, 'irismodel.pkl')

In [None]:
# Predict
expected = y_test
predicted = model.predict(X_test)

In [None]:
# Display metrics
# Precision measures the impact of false positives: TP/(TP+FP)
# Recall measures the impact of false negatives : TP/(TP+FN)
# F1 is the weighted average of precision and recall: (2*Recall*Precision)/(Recall+Precision)
print(metrics.classification_report(expected, predicted))

In [None]:
# Display confusion matrix
print(metrics.confusion_matrix(expected, predicted))