# classification metrics in multi class classification

In [24]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score , confusion_matrix , precision_score , recall_score ,f1_score

In [2]:
df = pd.read_csv('iris.csv')
df.shape

(150, 6)

In [3]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
encoder = LabelEncoder()

df['Species'] = encoder.fit_transform(df['Species'])

In [9]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0


In [11]:
x = df.iloc[:,1:-1]
y = df.iloc[:,-1]

x.shape , y.shape

((150, 4), (150,))

In [12]:
x

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [13]:
y

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: Species, Length: 150, dtype: int64

In [15]:
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size = 0.2 , random_state = 2)

In [16]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(120, 4)
(30, 4)
(120,)
(30,)


In [14]:
clf1 = LogisticRegression()
clf2 = DecisionTreeClassifier()

In [17]:
clf1.fit(x_train , y_train)
clf2.fit(x_train , y_train)

In [18]:
y_pred1 = clf1.predict(x_test)
y_pred2 = clf2.predict(x_test)

# Accuracy score

In [20]:
print("Accuracy of Logistic Regression",accuracy_score(y_test,y_pred1))
print("Accuracy of Decision Trees",accuracy_score(y_test,y_pred2))

Accuracy of Logistic Regression 0.9666666666666667
Accuracy of Decision Trees 0.9333333333333333


# Confusion matrix

In [21]:
print("Logistic Regression Confusion Matrix\n")
pd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,3)))

Logistic Regression Confusion Matrix



Unnamed: 0,0,1,2
0,14,0,0
1,0,7,1
2,0,0,8


In [22]:
print("Decision Tree Confusion Matrix\n")
pd.DataFrame(confusion_matrix(y_test,y_pred2),columns=list(range(0,3)))

Decision Tree Confusion Matrix



Unnamed: 0,0,1,2
0,14,0,0
1,0,7,1
2,0,1,7


In [23]:
result = pd.DataFrame()
result['Actual Label'] = y_test
result['Logistic Regression Prediction'] = y_pred1
result['Decision Tree Prediction'] = y_pred2
result.sample(10)

Unnamed: 0,Actual Label,Logistic Regression Prediction,Decision Tree Prediction
89,1,1,1
35,0,0,0
5,0,0,0
128,2,2,2
129,2,2,1
48,0,0,0
6,0,0,0
108,2,2,2
85,1,1,1
45,0,0,0


In [28]:
print('Logistic Regression')
print('precision score ' , precision_score(y_test,y_pred1,average=None))
print('recall score ' , recall_score(y_test,y_pred1,average=None))
print('f1 score ' , f1_score(y_test , y_pred1 , average = None))

Logistic Regression
precision score  [1.         1.         0.88888889]
recall score  [1.    0.875 1.   ]
f1 score  [1.         0.93333333 0.94117647]


In [29]:
print('Decision Tree')
print('precision score ' , precision_score(y_test,y_pred2,average=None))
print('recall score ' , recall_score(y_test,y_pred2,average=None))
print('f1 score ' , f1_score(y_test , y_pred2 , average = None))

Decision Tree
precision score  [1.    0.875 0.875]
recall score  [1.    0.875 0.875]
f1 score  [1.    0.875 0.875]
