In [49]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

## Load Iris dataset
You can download Iris data from the internet, for example, from [uci web site](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/).

In [50]:
df = pd.read_csv('../data/iris.csv', header=None, names=['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'label'])

display(df.head(20))

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,label
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa


## Convert text labels to numeric indices

In [51]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])
display(df.head(20))

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


## Split data into training and test datasets, prepare training data

In [52]:
train, test = train_test_split(df, random_state = 42, test_size = 0.3)

target = train['label']
train = train.drop('label', 1)

display(train.head(20))

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width
81,5.5,2.4,3.7,1.0
133,6.3,2.8,5.1,1.5
137,6.4,3.1,5.5,1.8
75,6.6,3.0,4.4,1.4
109,7.2,3.6,6.1,2.5
96,5.7,2.9,4.2,1.3
105,7.6,3.0,6.6,2.1
66,5.6,3.0,4.5,1.5
0,5.1,3.5,1.4,0.2
122,7.7,2.8,6.7,2.0


## Train the logistic regression model

In [53]:
lr = LogisticRegression(solver='newton-cg').fit(train, target)

## Evalute the model on the test data

In [54]:
expected = test['label']
test = test.drop('label', 1)

accuracy = lr.score(test, expected)
accuracy

0.9555555555555556

In [55]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
predictions = lr.predict(test)
print(confusion_matrix(expected, predictions))
print(classification_report(expected, predictions))

[[19  0  0]
 [ 0 11  2]
 [ 0  0 13]]
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        19
          1       1.00      0.85      0.92        13
          2       0.87      1.00      0.93        13

avg / total       0.96      0.96      0.96        45

