# Purpose


Test a few more classification metrics to include to select the best models based on these.

# Notebook setup

In [50]:
%load_ext autoreload
%autoreload 2

In [51]:
from datetime import datetime
import gc
import os
import logging
from pathlib import Path
from pprint import pprint

import mlflow

import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import seaborn as sns


import subclu

from subclu.utils import set_working_directory
from subclu.utils.eda import (
    setup_logging, counts_describe, value_counts_and_pcts,
    notebook_display_config, print_lib_versions,
    style_df_numeric
)
from subclu.utils.mlflow_logger import MlflowLogger, log_clf_report_and_conf_matrix

from subclu.utils.data_irl_style import (
    get_colormap, theme_dirl
)


print_lib_versions([mlflow, np, pd, plotly, sns, subclu])

python		v 3.7.10
===
mlflow		v: 1.16.0
numpy		v: 1.19.5
pandas		v: 1.2.4
plotly		v: 4.14.3
seaborn		v: 0.11.1
subclu		v: 0.4.1


In [4]:
# plotting
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.dates as mdates
plt.style.use('default')

setup_logging()
notebook_display_config()

# Test additional classification metrics

In [31]:
import random

from sklearn.metrics import (
    classification_report,
    accuracy_score, precision_recall_fscore_support,
    confusion_matrix
)

from subclu

In [63]:
n_array_test = 99
y_true = np.random.choice(['sports', 'politics', 'gaming', ], size=n_array_test, p=[.2, .3, .5])
y_pred = [random.choice(['sports', 'politics', 'gaming', ] + [v] * 3) for v in y_true]

In [64]:
print(classification_report(y_true, y_pred, digits=3))

              precision    recall  f1-score   support

      gaming      0.902     0.698     0.787        53
    politics      0.571     0.714     0.635        28
      sports      0.478     0.611     0.537        18

    accuracy                          0.687        99
   macro avg      0.651     0.675     0.653        99
weighted avg      0.732     0.687     0.699        99



In [69]:
print('accuracy', accuracy_score(y_true, y_pred))
for avg_ in ['macro', 'weighted']:
    print(f'{avg_}_avg', precision_recall_fscore_support(y_true, y_pred, beta=1, average=avg_))

accuracy 0.6868686868686869
macro_avg (0.6507094884613442, 0.674503344314665, 0.6529133477758283, None)
weighted_avg (0.7316966055036045, 0.6868686868686869, 0.6985830163481088, None)


In [70]:
lbls_ = ['gaming', 'politics', 'sports']
precision_recall_fscore_support(y_true, y_pred, beta=1, labels=lbls_, average=None)

(array([0.90243902, 0.57142857, 0.47826087]),
 array([0.69811321, 0.71428571, 0.61111111]),
 array([0.78723404, 0.63492063, 0.53658537]),
 array([53, 28, 18]))

If we supply the labels, the order of the arrays will match the labels and we can use this to create a dict, which we can then convert into a dataframe.

In [71]:
d_class_metrics = dict()
col_class_labels = 'class'
beta = 1
d_class_metrics[col_class_labels] = ['gaming', 'sports', 'politics',]
(
    d_class_metrics['precision'],
    d_class_metrics['recall'],
    d_class_metrics[f'f{beta}_score'],
    d_class_metrics['support']
) = precision_recall_fscore_support(y_true, y_pred, beta=beta, labels=d_class_metrics[col_class_labels], average=None)
display(pd.DataFrame(d_class_metrics))

for metric_ in [k for k in d_class_metrics.keys() if k != col_class_labels]:
    for class_, val_ in zip(d_class_metrics[col_class_labels], d_class_metrics[metric_]):
        print(f"{metric_}-{class_}", val_)

Unnamed: 0,class,precision,recall,f1_score,support
0,gaming,0.902439,0.698113,0.787234,53
1,sports,0.478261,0.611111,0.536585,18
2,politics,0.571429,0.714286,0.634921,28


precision-gaming 0.9024390243902439
precision-sports 0.4782608695652174
precision-politics 0.5714285714285714
recall-gaming 0.6981132075471698
recall-sports 0.6111111111111112
recall-politics 0.7142857142857143
f1_score-gaming 0.7872340425531914
f1_score-sports 0.5365853658536586
f1_score-politics 0.634920634920635
support-gaming 53
support-sports 18
support-politics 28


In [54]:
log_clf_report_and_conf_matrix(
        y_true=y_true,
        y_pred=y_pred,
        data_fold_name='k2',
        class_labels=['gaming', 'politics', 'sports'],
        save_path=None,
        log_metrics_to_mlflow=False,
        log_artifacts_to_mlflow=False,
        log_to_console=True,
        remove_files_from_local_path=False,
        print_clf_df=True,
        return_confusion_mx=False,
)

05:41:59 | INFO | "Start processing K2 metrics for logging"
  0%|          | 0/15 [00:00<?, ?it/s]05:41:59 | INFO | " K2 gaming-f1_score: 0.816327"
05:41:59 | INFO | " K2 gaming-support: 50"
05:41:59 | INFO | " K2 politics-f1_score: 0.677419"
05:41:59 | INFO | " K2 politics-support: 36"
05:41:59 | INFO | " K2 sports-f1_score: 0.526316"
05:41:59 | INFO | " K2 sports-support: 13"
05:41:59 | INFO | " K2 accuracy: 0.717172"
05:41:59 | INFO | " K2 precision_macro_avg: 0.680342"
05:41:59 | INFO | " K2 recall_macro_avg: 0.717521"
05:41:59 | INFO | " K2 f1_score_macro_avg: 0.673354"
05:41:59 | INFO | " K2 support_macro_avg: 99"
05:41:59 | INFO | " K2 precision_weighted_avg: 0.767107"
05:41:59 | INFO | " K2 recall_weighted_avg: 0.717172"
05:41:59 | INFO | " K2 f1_score_weighted_avg: 0.727733"
05:41:59 | INFO | " K2 support_weighted_avg: 99"
100%|██████████| 15/15 [00:00<00:00, 1152.53it/s]

             precision recall  f1-score  support
class                                           
gaming           0.833  0.800     0.816       50
politics         0.808  0.583     0.677       36
sports           0.400  0.769     0.526       13
macro avg        0.680  0.718     0.673       99
weighted avg     0.767  0.717     0.728       99
accuracy                          0.717       99





In [25]:
d_clf_report = dict()
d_clf_report['class'] = ['sports', 'gaming', 'politics', ]
(
    d_clf_report['precision'],
    d_clf_report['recall'],
    d_clf_report['f1_score'],
    d_clf_report['support']
) = precision_recall_fscore_support(y_true, y_pred, beta=1, labels=d_clf_report['class'], average=None)
pd.DataFrame(d_clf_report)

Unnamed: 0,class,precision,recall,f1_score,support
0,sports,0.4,0.769231,0.526316,13
1,gaming,0.833333,0.8,0.816327,50
2,politics,0.807692,0.583333,0.677419,36


In [11]:
precision_recall_fscore_support(y_true, y_pred, beta=1, labels=['politics', 'sports', 'gaming', ])

(array([0.66666667, 0.58823529, 0.8       ]),
 array([0.8       , 0.76923077, 0.58333333]),
 array([0.72727273, 0.66666667, 0.6746988 ]),
 array([25, 26, 48]))