In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

In [2]:
data_iris = load_iris()
dir(data_iris)

['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']

In [3]:
data_iris['feature_names']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
df = pd.DataFrame(data_iris['data'], columns=['sl', 'sw', 'pl', 'pw'])
df

Unnamed: 0,sl,sw,pl,pw
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [5]:
df['target'] = data_iris['target']
df

Unnamed: 0,sl,sw,pl,pw,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [7]:
df['species'] = df['target'].apply(lambda x: data_iris['target_names'][x])
df

Unnamed: 0,sl,sw,pl,pw,target,species
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2,virginica
146,6.3,2.5,5.0,1.9,2,virginica
147,6.5,3.0,5.2,2.0,2,virginica
148,6.2,3.4,5.4,2.3,2,virginica


In [8]:
from sklearn.model_selection import train_test_split

In [22]:
xtr, xts, ytr, yts = train_test_split(df[['sl', 'sw', 'pl', 'pw']], df['target'])

In [23]:
from sklearn.linear_model import LogisticRegression

In [24]:
logreg = LogisticRegression(solver='liblinear', multi_class='auto')
logreg.fit(xtr, ytr)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [25]:
from sklearn.metrics import confusion_matrix, classification_report, jaccard_score, accuracy_score

In [26]:
pred = logreg.predict(xts)

In [27]:
print('Actual :', np.array(yts))
print('Prediction :', pred)

Actual : [1 2 1 1 0 2 1 2 1 0 1 0 0 0 2 1 1 1 0 1 1 1 0 2 2 0 1 0 0 0 2 1 1 2 1 2 0
 2]
Prediction : [1 2 1 2 0 2 1 2 1 0 1 0 0 0 2 1 2 1 0 1 1 1 0 2 2 0 1 0 0 0 2 1 1 2 1 2 0
 2]


In [29]:
res = pd.DataFrame({'Actual': np.array(yts), 'Pred': pred})
res['Result'] = (res['Actual']==res['Pred'])
res.sort_values('Result', ascending=False)

Unnamed: 0,Actual,Pred,Result
0,1,1,True
28,0,0,True
21,1,1,True
22,0,0,True
23,2,2,True
24,2,2,True
25,0,0,True
26,1,1,True
27,0,0,True
29,0,0,True


## Multiclass Confusion Matrix

In [31]:
conf_iris = pd.DataFrame(confusion_matrix(yts, pred), columns=['pred0', 'pred1', 'pred2'], index=['act0', 'act1', 'act2'])
conf_iris

Unnamed: 0,pred0,pred1,pred2
act0,12,0,0
act1,0,14,2
act2,0,0,10


In [32]:
print(classification_report(yts, pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      0.88      0.93        16
           2       0.83      1.00      0.91        10

    accuracy                           0.95        38
   macro avg       0.94      0.96      0.95        38
weighted avg       0.96      0.95      0.95        38



## Jaccard Index

### Binary
$\displaystyle J(y,y_p) = \frac{|y\cap y_p|}{|y\cup y_p|}$

In [33]:
y = np.array([1,1,1,0,1])
yp = np.array([0,1,1,1,1])

In [34]:
print(jaccard_score(y,yp))

0.6


### Multiclass
$\displaystyle J(y,y_p) = \frac{\sum^{n}_{i=1}\frac{TP_i}{TP_i + FP_i + FN_i}}{n}$

In [36]:
conf_iris

Unnamed: 0,pred0,pred1,pred2
act0,12,0,0
act1,0,14,2
act2,0,0,10


In [37]:
print(jaccard_score(yts, pred, average=None))

[1.         0.875      0.83333333]


In [41]:
np.mean(jaccard_score(yts, pred, average=None))

0.9027777777777778

In [40]:
(1+0.875+0.833) / 3

0.9026666666666667