In [11]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [12]:
data = {
    'x': np.arange(1, 21),
    'y': np.array([
        'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah',
        'Sarah', 'Zaenab', 'Sarah', 'Zaenab', 'Zaenab',
        'Sarah', 'Sarah', 'Zaenab', 'Sarah', 'Zaenab',
        'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab'
    ])
}
df = pd.DataFrame(data)
df

Unnamed: 0,x,y
0,1,Sarah
1,2,Sarah
2,3,Sarah
3,4,Sarah
4,5,Sarah
5,6,Sarah
6,7,Zaenab
7,8,Sarah
8,9,Zaenab
9,10,Zaenab


Sarah = 0, Zaenab = 1

In [13]:
model = LogisticRegression(solver='lbfgs')
model.fit(df[['x']], df['y'])

y = df['y'].tolist()
yp = model.predict(df[['x']]).tolist()

print(y)
print(yp)

['Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Zaenab', 'Sarah', 'Zaenab', 'Zaenab', 'Sarah', 'Sarah', 'Zaenab', 'Sarah', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab']
['Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Sarah', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab', 'Zaenab']


### Confusion Matrix cara 1

- | pred Sarah | pred Zaenab
- | - | -
__actual Sarah__ | 7 | 3
__actual Zaenab__ | 3 | 7

### Confusion Matrix cara 2

In [15]:
df['yp'] = yp

cm = confusion_matrix(y, yp)

dfConf = pd.DataFrame(cm, columns=['pred Sarah', 'pred Zaenab'], index=['actual Sarah', 'actual Zaenab'])
dfConf

Unnamed: 0,pred Sarah,pred Zaenab
actual Sarah,7,3
actual Zaenab,3,7


### Confusion Matrix cara 3

In [32]:
tp = len(df[df['y'] == 'Zaenab'][df['yp'] == 'Zaenab'])
tn = len(df[df['y'] == 'Sarah'][df['yp'] == 'Sarah'])
fp = len(df[df['y'] == 'Zaenab'][df['yp'] == 'Sarah'])
fn = len(df[df['y'] == 'Sarah'][df['yp'] == 'Zaenab'])

dfConf = pd.DataFrame(np.array([[tp, fn], [fp, tn]]),
                      columns=['pred Sarah', 'pred Zaenab'],
                      index=['actual Sarah', 'actual Zaenab'])
dfConf

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


Unnamed: 0,pred Sarah,pred Zaenab
actual Sarah,7,3
actual Zaenab,3,7


### Cek Metrics cara Manual

In [51]:
print('Akurasi =', (tp + tn) / (tp + tn + fp + fn))
print('Error Rate =', (fp + fn) / (tp + tn +fp +fn))

print('Recall(+) =', tp / (tp + fn))
print('FP Rate =', fp / (fp + tn))
print('Recall(-) =', tn / (fp + tn))
print('FN Rate =', fn / (tp + fn))
print('Precision(+) =', tp / (tp + fp))
print('Precision(-) =', tn / (tn + fn))

rclP = tp / (tp + fn)
rclN = tn / (fp + tn)
print('Balanced Accuracy =', (rclP + rclN) / 2)

prc = tp / (tp + fp)
rcl = tp / (tp + fn)
print('F1 score =', 2 * ((prc * rcl) / (prc +  rcl)))

Akurasi = 0.7
Error Rate = 0.3
Recall(+) = 0.7
FP Rate = 0.3
Recall(-) = 0.7
FN Rate = 0.3
Precision(+) = 0.7
Precision(-) = 0.7
Balanced Accuracy = 0.7
F1 score = 0.7


### Cek Metrics cara Sklearn

- __Accuracy__

In [44]:
# cara 1
print('model.score =', model.score(df[['x']], df['y']))

# cara 2
from sklearn.metrics import accuracy_score
print('accuracy_score =', accuracy_score(df['y'], df['yp']))

print('Error Rate =', round(1 - accuracy_score(df['y'], df['yp']), 1))

model.score = 0.7
accuracy_score = 0.7
Error Rate = 0.3


- __Recall__

In [48]:
from sklearn.metrics import recall_score

print('Recall+ =', recall_score(df['y'], df['yp'], pos_label='Zaenab'))
print('Recall- =', recall_score(df['y'], df['yp'], pos_label='Sarah'))

Recall+ = 0.7
Recall- = 0.7


- __Precision__

In [50]:
from sklearn.metrics import precision_score

print('Precision+ =', precision_score(df['y'], df['yp'], pos_label='Zaenab'))
print('Precision- =', precision_score(df['y'], df['yp'], pos_label='Sarah'))

Precision+ = 0.7
Precision- = 0.7


- __Balanced Accuracy__

In [52]:
from sklearn.metrics import balanced_accuracy_score

print('Balanced Accuracy =', balanced_accuracy_score(df['y'], df['yp']))

Balanced Accuracy = 0.7


- __F1 Score__

In [56]:
from sklearn.metrics import f1_score

print('F1 Score+ =', f1_score(df['y'], df['yp'], pos_label='Zaenab'))
print('F1 Score- =', f1_score(df['y'], df['yp'], pos_label='Sarah'))

F1 Score+ = 0.7
F1 Score- = 0.7


### Function Langsung

In [63]:
from sklearn.metrics import precision_recall_fscore_support

prc, rcl, f1, sp = precision_recall_fscore_support(df['y'], df['yp'])
prc, rcl, f1, sp

# [Sarah, Zaenab]
## support = ada berapa [Sarah, Zaenab] di yp

(array([0.7, 0.7]),
 array([0.7, 0.7]),
 array([0.7, 0.7]),
 array([10, 10], dtype=int64))

In [67]:
from sklearn.metrics import classification_report

print(classification_report(df['y'], df['yp']))

              precision    recall  f1-score   support

       Sarah       0.70      0.70      0.70        10
      Zaenab       0.70      0.70      0.70        10

    accuracy                           0.70        20
   macro avg       0.70      0.70      0.70        20
weighted avg       0.70      0.70      0.70        20



In [68]:
print(precision_recall_fscore_support(df['y'], df['yp'], average='micro'))
print(precision_recall_fscore_support(df['y'], df['yp'], average='macro'))
print(precision_recall_fscore_support(df['y'], df['yp'], average='weighted'))

(0.7, 0.7, 0.7, None)
(0.7, 0.7, 0.7, None)
(0.7, 0.7, 0.7, None)


### Perbedaan Average Micro, Macro, & Weighted

In [70]:
# Precision
precSa = tn / (tn + fn)
precZa = tp / (tp + fp)
precSa, precZa

precMacro = (precSa + precZa) / 2
precWeighted = ((precSa * 1) + (precZa * 1)) / (1+1)
precMicro = (tn + tp) / (tn + fn + tp + fp)

precMacro, precWeighted, precMicro

(0.7, 0.7, 0.7)

# Tugas

In [None]:
y = ['Kucing', 'Anjing', 'Gajah', 'Kucing', 'Anjing', 'Gajah']
yp = ['Kucing', 'Kucing', 'Gajah', 'Kucing', 'Kucing', 'Gajah']

# gimana confusion matrixnya ?