# Classification quality measures

In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score,\
    recall_score, f1_score, roc_auc_score, precision_recall_curve

In [2]:
%%capture 
%run nihil_ml_utils.ipynb

In [3]:
# data upload
clf_data = pd.read_csv(to_path('classification.csv'))

In [4]:
# classification errors data
tp_data = clf_data[(clf_data['true'] == True) & (clf_data['pred'] == True)]
fp_data = clf_data[(clf_data['true'] == False) & (clf_data['pred'] == True)]
fn_data = clf_data[(clf_data['true'] == True) & (clf_data['pred'] == False)]
tn_data = clf_data[(clf_data['true'] == False) & (clf_data['pred'] == False)]

In [6]:
# classification errors values
tp = len(tp_data)
fp = len(fp_data)
fn = len(fn_data)
tn = len(tn_data)
print(f'Classification errors (tp, fp, fn, tn): '
      f'{tp} {fp} {fn} {tn}')

Classification errors (tp, fp, fn, tn): 43 34 59 64


In [8]:
# main binary classification measures

In [9]:
# part of correctly classified objects among all
accuracy = (tp + tn) / (tp + fp + fn + tn)
acc_sk = accuracy_score(clf_data['true'], clf_data['pred'])

In [10]:
# part of correctly classified 1-st class objects in all objects which
# classifier considered as 1-st class
precision = tp / (tp + fp)
prec_sk = precision_score(clf_data['true'], clf_data['pred'])

In [11]:
# proportion of correctly classified 1-st class objects in all 1-st class
recall = tp / (tp + fn)
recall_sk = recall_score(clf_data['true'], clf_data['pred'])

In [12]:
# F measure, harmonic average
F_meas = 2*precision*recall/(precision + recall)
F_meas_sk = f1_score(clf_data['true'], clf_data['pred'])
print(f'{accuracy:.2f} {precision:.2f} {recall:.2f} {F_meas:.2f}')

0.54 0.56 0.42 0.48


In [None]:
# Probability classification measures

In [14]:
# data upload
scores_data = pd.read_csv(to_path('scores.csv'))
y_true = scores_data['true']
y_pred = scores_data[scores_data.columns[1:]]

In [15]:
# function roc_auc_score returns float score, so scores_rocauc is dict 
# with classifiers names as keys and roc_auc as value
scores_rocauc = {column: roc_auc_score(y_true, y_pred[column])
                 for column in y_pred.columns}
res = sorted(scores_rocauc.items(), key=lambda i: i[1], reverse=True)
print(f'Best roc auc is {res[0][1]} by classifier {res[0][0]}')

Best roc auc is 0.719187675070028 by classifier score_logreg


In [16]:
# Precision-Recall curve
best_prec = dict()
for column in y_pred.columns:
    # tpl is tuple (precision_array, recall_array, treshold_array)
    tpl = precision_recall_curve(y_true, scores_data[column])
    scores_df = pd.DataFrame({'precision': tpl[0], 'recall': tpl[1]})
    best_prec[column] = scores_df[scores_df['recall'] >= 0.7]['precision'].max()
print(f'Best precision witn recall >= 0.7 is classifier:'
      f' {max(best_prec, key=lambda k: best_prec[k])}')

Best precision witn recall >= 0.7 is classifier: score_tree
