# Purpose


Test a few more classification metrics to include to select the best models based on these.

# Notebook setup

In [1]:
%load_ext autoreload
%autoreload 2

In [11]:
from datetime import datetime
import gc
import os
import logging
from pathlib import Path
from pprint import pprint

import mlflow

import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import seaborn as sns


import subclu

from subclu.utils import set_working_directory
from subclu.utils.eda import (
    setup_logging, counts_describe, value_counts_and_pcts,
    notebook_display_config, print_lib_versions,
    style_df_numeric
)
from subclu.utils.mlflow_logger import MlflowLogger, log_clf_report_and_conf_matrix

from subclu.utils.data_irl_style import (
    get_colormap, theme_dirl
)


print_lib_versions([mlflow, np, pd, plotly, sns, subclu])

python		v 3.7.10
===
mlflow		v: 1.16.0
numpy		v: 1.19.5
pandas		v: 1.2.4
plotly		v: 4.14.3
seaborn		v: 0.11.1
subclu		v: 0.4.1


In [12]:
# plotting
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.dates as mdates
plt.style.use('default')

setup_logging()
notebook_display_config()

# Test additional classification metrics

In [13]:
import random

from sklearn.metrics import (
    classification_report,
    accuracy_score, precision_recall_fscore_support,
    confusion_matrix
)

from subclu.utils import ml_metrics

In [14]:
n_array_test = 99
y_true = np.random.choice(['sports', 'politics', 'gaming', 'anime', 'music'], size=n_array_test, 
                          p=[.2, .3, .4, .05, .05])
y_pred = [random.choice(['sports', 'politics', 'gaming', ] + [v] * 2) for v in y_true]

In [24]:
y_true_binary = np.random.choice(
    ['sports', 'gaming'], size=n_array_test, 
    p=[.8, .2]
)
y_pred_binary = [random.choice(['sports'] * 2 + [v] * 1) for v in y_true_binary]

In [25]:
y_true_manual = [
    'politics', 'politics', 'anime', 'politics', 'gaming', 'sports',
    'gaming', 'politics', 'politics', 'gaming', 'gaming', 'gaming',
    'politics', 'politics', 'sports', 'gaming', 'sports', 'politics',
    'sports', 'sports', 'politics', 'gaming', 'gaming', 'gaming',
    'politics', 'gaming', 'music', 'sports', 'sports', 'gaming',
    'politics', 'gaming', 'politics', 'politics', 'politics',
    'politics', 'music', 'politics', 'politics', 'sports', 'sports',
    'gaming', 'gaming', 'politics', 'sports', 'gaming', 'politics',
    'sports', 'politics', 'music', 'politics', 'politics', 'gaming',
    'politics', 'gaming', 'gaming', 'gaming', 'gaming', 'gaming',
    'politics', 'sports', 'gaming', 'politics', 'anime', 'gaming',
    'politics', 'sports', 'politics', 'sports', 'gaming', 'gaming',
    'gaming', 'music', 'gaming', 'gaming', 'gaming', 'politics',
    'politics', 'sports', 'gaming', 'gaming', 'politics', 'gaming',
    'gaming', 'politics', 'gaming', 'music', 'sports', 'politics',
    'sports', 'gaming', 'sports', 'sports', 'gaming', 'politics',
    'gaming', 'gaming', 'music', 'politics'
]

y_pred_manual = [
    'politics', 'politics', 'politics', 'politics', 'gaming', 'sports',
    'gaming', 'politics', 'politics', 'gaming', 'politics', 'politics',
    'politics', 'politics', 'gaming', 'gaming', 'politics', 'politics',
    'sports', 'sports', 'politics', 'politics', 'gaming', 'gaming',
    'politics', 'gaming', 'sports', 'sports', 'sports', 'politics',
    'gaming', 'gaming', 'gaming', 'sports', 'sports', 'politics',
    'sports', 'politics', 'politics', 'politics', 'sports', 'gaming',
    'gaming', 'gaming', 'sports', 'gaming', 'politics', 'sports',
    'politics', 'gaming', 'gaming', 'sports', 'politics', 'politics',
    'politics', 'politics', 'gaming', 'gaming', 'gaming', 'politics',
    'sports', 'gaming', 'politics', 'sports', 'sports', 'sports',
    'sports', 'politics', 'sports', 'gaming', 'gaming', 'gaming',
    'politics', 'politics', 'gaming', 'sports', 'sports', 'gaming',
    'sports', 'gaming', 'gaming', 'sports', 'gaming', 'sports',
    'sports', 'gaming', 'gaming', 'sports', 'sports', 'sports',
    'gaming', 'sports', 'gaming', 'politics', 'gaming', 'gaming',
    'gaming', 'music', 'sports'
]

## Log classification report AND confusion matrix in one call

In [31]:
print(classification_report(y_true_manual, y_pred_manual, digits=3, zero_division=0))

              precision    recall  f1-score   support

       anime      0.000     0.000     0.000         2
      gaming      0.722     0.684     0.703        38
       music      1.000     0.167     0.286         6
    politics      0.594     0.559     0.576        34
      sports      0.500     0.789     0.612        19

    accuracy                          0.616        99
   macro avg      0.563     0.440     0.435        99
weighted avg      0.638     0.616     0.602        99



In [32]:
# with sort_labels_by_support=False, we should get the same class order as classification report
ml_metrics.log_classification_report_and_confusion_matrix(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    class_labels=None,
    sort_labels_by_support=False,
    save_path='mlruns/logs/test_classification_report3',
    log_metrics_to_console=False,
    log_df_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_support_avg=False,
    log_support_per_class=False,
)

01:48:46 | INFO | "df metrics:
      class  precision    recall  f1_score  support data_fold
0     anime   0.000000  0.000000  0.000000        2      k100
1    gaming   0.722222  0.684211  0.702703       38      k100
2     music   1.000000  0.166667  0.285714        6      k100
3  politics   0.593750  0.558824  0.575758       34      k100
4    sports   0.500000  0.789474  0.612245       19      k100"
01:48:46 | INFO | "df metrics:
          class  precision    recall  f1_score  support data_fold
0     macro_avg   0.563194  0.439835  0.435284       99      k100
1  weighted_avg   0.637696  0.616162  0.602277       99      k100"
01:48:46 | INFO | "confusion matrix k100:
          anime  gaming  music  politics  sports
anime         0       0      0         1       1
gaming        0      26      0         9       3
music         0       2      1         1       2
politics      0       6      0        19       9
sports        0       2      0         2      15"


In [21]:
# with sort_labels_by_support=True, we'll see the classes with the most support (true labels) at the top 
ml_metrics.log_classification_report_and_confusion_matrix(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    class_labels=None,
    sort_labels_by_support=True,
    save_path='mlruns/logs/test_classification_report3',
    log_metrics_to_console=False,
    log_df_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_support_avg=False,
    log_support_per_class=False,
)

18:00:47 | INFO | "df metrics:
      class  precision    recall  f1_score  support data_fold
0    gaming   0.722222  0.684211  0.702703       38      k100
1  politics   0.593750  0.558824  0.575758       34      k100
2    sports   0.500000  0.789474  0.612245       19      k100
3     music   1.000000  0.166667  0.285714        6      k100
4     anime   0.000000  0.000000  0.000000        2      k100"
18:00:47 | INFO | "df metrics:
          class  precision    recall  f1_score  support data_fold
0     macro_avg   0.563194  0.439835  0.435284       99      k100
1  weighted_avg   0.637696  0.616162  0.602277       99      k100"
18:00:47 | INFO | "confusion matrix k100:
          gaming  politics  sports  music  anime
gaming        26         9       3      0      0
politics       6        19       9      0      0
sports         2         2      15      0      0
music          2         1       2      1      0
anime          0         1       1      0      0"


### Check Binary case

In [27]:
print(classification_report(y_true_binary, y_pred_binary, digits=3))

              precision    recall  f1-score   support

      gaming      1.000     0.227     0.370        22
      sports      0.819     1.000     0.901        77

    accuracy                          0.828        99
   macro avg      0.910     0.614     0.635        99
weighted avg      0.859     0.828     0.783        99



In [30]:
ml_metrics.log_classification_report_and_confusion_matrix(
    y_true=y_true_binary,
    y_pred=y_pred_binary,
    data_fold_name='kbinary',
    beta=1,
    class_labels=None,
    sort_labels_by_support=True,
    save_path='mlruns/logs/test_classification_report3',
    log_metrics_to_console=True,
    log_df_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_support_avg=False,
    log_support_per_class=False,
)

01:46:47 | INFO | "precision-sports-kbinary: 0.8191489361702128"
01:46:47 | INFO | "precision-gaming-kbinary: 1.0"
01:46:47 | INFO | "recall-sports-kbinary: 1.0"
01:46:47 | INFO | "recall-gaming-kbinary: 0.22727272727272727"
01:46:47 | INFO | "f1_score-sports-kbinary: 0.9005847953216375"
01:46:47 | INFO | "f1_score-gaming-kbinary: 0.37037037037037035"
01:46:47 | INFO | "df metrics:
    class  precision    recall  f1_score  support data_fold
0  sports   0.819149  1.000000  0.900585       77   kbinary
1  gaming   1.000000  0.227273  0.370370       22   kbinary"
01:46:48 | INFO | "precision-macro_avg-kbinary: 0.9095744680851063"
01:46:48 | INFO | "precision-weighted_avg-kbinary: 0.8593380614657211"
01:46:48 | INFO | "recall-macro_avg-kbinary: 0.6136363636363636"
01:46:48 | INFO | "recall-weighted_avg-kbinary: 0.8282828282828283"
01:46:48 | INFO | "f1_score-macro_avg-kbinary: 0.635477582846004"
01:46:48 | INFO | "f1_score-weighted_avg-kbinary: 0.7827593675546893"
01:46:48 | INFO | "df metr

## Confusion matrix

In [18]:
print(confusion_matrix(y_true_manual, y_pred_manual,))

[[ 0  0  0  1  1]
 [ 0 26  0  9  3]
 [ 0  2  1  1  2]
 [ 0  6  0 19  9]
 [ 0  2  0  2 15]]


In [33]:
ml_metrics.log_confusion_matrix(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    class_labels=None,
    save_path='mlruns/logs/test_classification_report2',
    log_df_to_console=False,
    log_artifacts_to_mlflow=False,
    sort_labels_by_support=False,
)

Unnamed: 0,anime,gaming,music,politics,sports
anime,0,0,0,1,1
gaming,0,26,0,9,3
music,0,2,1,1,2
politics,0,6,0,19,9
sports,0,2,0,2,15


In [34]:
ml_metrics.log_confusion_matrix(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    class_labels=None,
    save_path='mlruns/logs/test_classification_report2',
    log_df_to_console=False,
    log_artifacts_to_mlflow=False,
    sort_labels_by_support=True,
)

Unnamed: 0,gaming,politics,sports,music,anime
gaming,26,9,3,0,0
politics,6,19,9,0,0
sports,2,2,15,0,0
music,2,1,2,1,0
anime,0,1,1,0,0


In [20]:
l_class_labels = ['sports', 'politics', 'gaming', 'anime', 'music']
ml_metrics.log_confusion_matrix(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    class_labels=l_class_labels,
    save_path='mlruns/logs/test_classification_report2',
    log_df_to_console=False,
    log_artifacts_to_mlflow=False,
    sort_labels_by_support=True,
)

Unnamed: 0,sports,politics,gaming,anime,music
sports,15,2,2,0,0
politics,9,19,6,0,0
gaming,3,9,26,0,0
anime,1,1,0,0,0
music,2,1,2,0,1


In [119]:
ml_metrics.log_confusion_matrix(
    y_true=y_true_manual,
    y_pred=['anime'] * 49 + ['music'] * 50,
    data_fold_name='k100',
    class_labels=None,
    save_path='mlruns/logs/test_classification_report2',
    log_df_to_console=False,
    log_artifacts_to_mlflow=False,
    sort_labels_by_support=True,
)

Unnamed: 0,gaming,politics,sports,music,anime
gaming,0,0,0,23,15
politics,0,0,0,14,20
sports,0,0,0,8,11
music,0,0,0,4,2
anime,0,0,0,1,1


### Use new function to replicate classification report
Calling classification report can be messy, so try instead `precision_recall_fscore_support` which makes it easier to iterate over metrics for logging & saving.

- `accuracy` = `recall-weighted_avg`
    - so maybe don't calculate accuracy but need a way to reference that `recall-weighted_avg` is the same as accuracy



### Aggregate AND per-class metrics


In [121]:
l_class_labels = ['sports', 'politics', 'gaming', 'anime', 'music']
ml_metrics.log_classification_report_and_confusion_matrix(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    class_labels=l_class_labels,
    save_path='mlruns/logs/test_classification_report1',
    log_metrics_to_console=True,
    log_df_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_support_avg=True,
    log_support_per_class=True,
)

23:06:32 | INFO | "precision-sports-k100: 0.5"
23:06:32 | INFO | "precision-politics-k100: 0.59375"
23:06:32 | INFO | "precision-gaming-k100: 0.7222222222222222"
23:06:32 | INFO | "precision-anime-k100: 0.0"
23:06:32 | INFO | "precision-music-k100: 1.0"
23:06:32 | INFO | "recall-sports-k100: 0.7894736842105263"
23:06:32 | INFO | "recall-politics-k100: 0.5588235294117647"
23:06:32 | INFO | "recall-gaming-k100: 0.6842105263157895"
23:06:32 | INFO | "recall-anime-k100: 0.0"
23:06:32 | INFO | "recall-music-k100: 0.16666666666666666"
23:06:32 | INFO | "f1_score-sports-k100: 0.6122448979591837"
23:06:32 | INFO | "f1_score-politics-k100: 0.5757575757575757"
23:06:32 | INFO | "f1_score-gaming-k100: 0.7027027027027027"
23:06:32 | INFO | "f1_score-anime-k100: 0.0"
23:06:32 | INFO | "f1_score-music-k100: 0.2857142857142857"
23:06:32 | INFO | "support-sports-k100: 19"
23:06:32 | INFO | "support-politics-k100: 34"
23:06:32 | INFO | "support-gaming-k100: 38"
23:06:32 | INFO | "support-anime-k100: 2"

In [97]:
l_class_labels = ['sports', 'politics', 'gaming', 'anime', 'music']
ml_metrics.log_classification_report_and_confusion_matrix(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    class_labels=l_class_labels,
    save_path='mlruns/logs/test_classification_report1',
    log_metrics_to_console=False,
    log_df_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_support_avg=True,
    log_support_per_class=True,
)

22:11:28 | INFO | "df metrics:
      class  precision    recall  f1_score  support data_fold
0    sports   0.500000  0.789474  0.612245       19      k100
1  politics   0.593750  0.558824  0.575758       34      k100
2    gaming   0.722222  0.684211  0.702703       38      k100
3     anime   0.000000  0.000000  0.000000        2      k100
4     music   1.000000  0.166667  0.285714        6      k100"
22:11:28 | INFO | "df metrics:
          class  precision    recall  f1_score  support data_fold
0     macro_avg   0.563194  0.439835  0.435284       99      k100
1  weighted_avg   0.637696  0.616162  0.602277       99      k100"


### Aggregate metrics

In [92]:
l_class_labels = ['sports', 'politics', 'gaming', 'anime', 'music']
ml_metrics.log_precision_recall_fscore_support(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    average='macro_and_weighted',
    class_labels=l_class_labels,
    save_path='mlruns/logs/test_classification_report',
    log_metrics_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_df_to_console=False,
)

19:59:18 | INFO | "precision-macro_avg-k100: 0.5631944444444444"
19:59:18 | INFO | "precision-weighted_avg-k100: 0.6376964085297419"
19:59:18 | INFO | "recall-macro_avg-k100: 0.4398348813209495"
19:59:18 | INFO | "recall-weighted_avg-k100: 0.6161616161616161"
19:59:18 | INFO | "f1_score-macro_avg-k100: 0.4352838924267496"
19:59:18 | INFO | "f1_score-weighted_avg-k100: 0.602276758120914"


Unnamed: 0,class,precision,recall,f1_score,support,data_fold
0,macro_avg,0.563194,0.439835,0.435284,99,k100
1,weighted_avg,0.637696,0.616162,0.602277,99,k100


In [93]:
display(
    ml_metrics.log_precision_recall_fscore_support(
        y_true=y_true_manual,
        y_pred=y_pred_manual,
        data_fold_name='k100',
        beta=1,
        average='weighted',
        class_labels=None,
        save_path=None,
        log_metrics_to_console=True,
        log_metrics_to_mlflow=False,
        log_artifacts_to_mlflow=False,
        log_df_to_console=False,
    )
)

ml_metrics.log_precision_recall_fscore_support(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    average='macro',
    class_labels=None,
    save_path=None,
    log_metrics_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_df_to_console=False,
)

19:59:22 | INFO | "precision-weighted_avg-k100: 0.6376964085297419"
19:59:22 | INFO | "recall-weighted_avg-k100: 0.6161616161616161"
19:59:22 | INFO | "f1_score-weighted_avg-k100: 0.602276758120914"


Unnamed: 0,class,precision,recall,f1_score,support,data_fold
0,weighted_avg,0.637696,0.616162,0.602277,99,k100


19:59:22 | INFO | "precision-macro_avg-k100: 0.5631944444444444"
19:59:22 | INFO | "recall-macro_avg-k100: 0.4398348813209495"
19:59:22 | INFO | "f1_score-macro_avg-k100: 0.4352838924267496"


Unnamed: 0,class,precision,recall,f1_score,support,data_fold
0,macro_avg,0.563194,0.439835,0.435284,99,k100


### per-class metrics

In [90]:
print(classification_report(y_true_manual, y_pred_manual, zero_division=0, digits=3))

              precision    recall  f1-score   support

       anime      0.000     0.000     0.000         2
      gaming      0.722     0.684     0.703        38
       music      1.000     0.167     0.286         6
    politics      0.594     0.559     0.576        34
      sports      0.500     0.789     0.612        19

    accuracy                          0.616        99
   macro avg      0.563     0.440     0.435        99
weighted avg      0.638     0.616     0.602        99



In [87]:
l_class_labels = ['sports', 'politics', 'gaming', 'anime', 'music']
ml_metrics.log_precision_recall_fscore_support(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    average=None,
    class_labels=l_class_labels,
    save_path=None,
    log_metrics_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_df_to_console=False,
    log_support=True,
)

19:52:54 | INFO | "precision-sports-k100: 0.5"
19:52:54 | INFO | "precision-politics-k100: 0.59375"
19:52:54 | INFO | "precision-gaming-k100: 0.7222222222222222"
19:52:54 | INFO | "precision-anime-k100: 0.0"
19:52:54 | INFO | "precision-music-k100: 1.0"
19:52:54 | INFO | "recall-sports-k100: 0.7894736842105263"
19:52:54 | INFO | "recall-politics-k100: 0.5588235294117647"
19:52:54 | INFO | "recall-gaming-k100: 0.6842105263157895"
19:52:54 | INFO | "recall-anime-k100: 0.0"
19:52:54 | INFO | "recall-music-k100: 0.16666666666666666"
19:52:54 | INFO | "f1_score-sports-k100: 0.6122448979591837"
19:52:54 | INFO | "f1_score-politics-k100: 0.5757575757575757"
19:52:54 | INFO | "f1_score-gaming-k100: 0.7027027027027027"
19:52:54 | INFO | "f1_score-anime-k100: 0.0"
19:52:54 | INFO | "f1_score-music-k100: 0.2857142857142857"
19:52:54 | INFO | "support-sports-k100: 19"
19:52:54 | INFO | "support-politics-k100: 34"
19:52:54 | INFO | "support-gaming-k100: 38"
19:52:54 | INFO | "support-anime-k100: 2"

Unnamed: 0,class,precision,recall,f1_score,support,data_fold
0,sports,0.5,0.789474,0.612245,19,k100
1,politics,0.59375,0.558824,0.575758,34,k100
2,gaming,0.722222,0.684211,0.702703,38,k100
3,anime,0.0,0.0,0.0,2,k100
4,music,1.0,0.166667,0.285714,6,k100


In [88]:
l_class_labels = ['sports', 'politics', 'gaming', 'anime', 'music']
ml_metrics.log_precision_recall_fscore_support(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    average=None,
    class_labels=l_class_labels,
    save_path='mlruns/logs/test_classification_report',
    log_metrics_to_console=True,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_df_to_console=False,
    log_support=True,
)

19:55:50 | INFO | "precision-sports-k100: 0.5"
19:55:50 | INFO | "precision-politics-k100: 0.59375"
19:55:50 | INFO | "precision-gaming-k100: 0.7222222222222222"
19:55:50 | INFO | "precision-anime-k100: 0.0"
19:55:50 | INFO | "precision-music-k100: 1.0"
19:55:50 | INFO | "recall-sports-k100: 0.7894736842105263"
19:55:50 | INFO | "recall-politics-k100: 0.5588235294117647"
19:55:50 | INFO | "recall-gaming-k100: 0.6842105263157895"
19:55:50 | INFO | "recall-anime-k100: 0.0"
19:55:50 | INFO | "recall-music-k100: 0.16666666666666666"
19:55:50 | INFO | "f1_score-sports-k100: 0.6122448979591837"
19:55:50 | INFO | "f1_score-politics-k100: 0.5757575757575757"
19:55:50 | INFO | "f1_score-gaming-k100: 0.7027027027027027"
19:55:50 | INFO | "f1_score-anime-k100: 0.0"
19:55:50 | INFO | "f1_score-music-k100: 0.2857142857142857"
19:55:50 | INFO | "support-sports-k100: 19"
19:55:50 | INFO | "support-politics-k100: 34"
19:55:50 | INFO | "support-gaming-k100: 38"
19:55:50 | INFO | "support-anime-k100: 2"

Unnamed: 0,class,precision,recall,f1_score,support,data_fold
0,sports,0.5,0.789474,0.612245,19,k100
1,politics,0.59375,0.558824,0.575758,34,k100
2,gaming,0.722222,0.684211,0.702703,38,k100
3,anime,0.0,0.0,0.0,2,k100
4,music,1.0,0.166667,0.285714,6,k100


In [84]:
ml_metrics.log_precision_recall_fscore_support(
    y_true=y_true_manual,
    y_pred=y_pred_manual,
    data_fold_name='k100',
    beta=1,
    average=None,
    class_labels=None,
    save_path=None,
    log_metrics_to_console=False,
    log_metrics_to_mlflow=False,
    log_artifacts_to_mlflow=False,
    log_df_to_console=False,
)

Unnamed: 0,class,precision,recall,f1_score,support,data_fold
0,class_0,0.0,0.0,0.0,2,k100
1,class_1,0.722222,0.684211,0.702703,38,k100
2,class_2,1.0,0.166667,0.285714,6,k100
3,class_3,0.59375,0.558824,0.575758,34,k100
4,class_4,0.5,0.789474,0.612245,19,k100


In [54]:
print('accuracy', accuracy_score(y_true, y_pred))
from collections import defaultdict

col_class_labels = 'class'
beta = 1
d_class_metrics = defaultdict(list)


for avg_ in ['macro', 'weighted']:
    class_name = f'{avg_}_avg'
    d_class_metrics[col_class_labels].append(class_name)
    score_tuple = precision_recall_fscore_support(y_true, y_pred, beta=beta, average=avg_, zero_division='warn')
    d_class_metrics['precision'].append(score_tuple[0])
    d_class_metrics['recall'].append(score_tuple[1])
    d_class_metrics[f'f{beta}_score'].append(score_tuple[2])

display(pd.DataFrame(d_class_metrics))

for metric_ in [k for k in d_class_metrics.keys() if k != col_class_labels]:
    for class_, val_ in zip(d_class_metrics[col_class_labels], d_class_metrics[metric_]):
        print(f"{metric_}-{class_}", val_)

accuracy 0.6060606060606061


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,class,precision,recall,f1_score
0,macro_avg,0.359969,0.402993,0.373818
1,weighted_avg,0.58129,0.606061,0.586018


precision-macro_avg 0.3599686379928316
precision-weighted_avg 0.5812900510481156
recall-macro_avg 0.4029927760577916
recall-weighted_avg 0.6060606060606061
f1_score-macro_avg 0.3738181818181818
f1_score-weighted_avg 0.5860177532904804


In [15]:
lbls_ = ['gaming', 'politics', 'sports']
precision_recall_fscore_support(y_true, y_pred, beta=1, labels=lbls_, average=None)

(array([0.88235294, 0.51282051, 0.61538462]),
 array([0.6122449 , 0.76923077, 0.66666667]),
 array([0.72289157, 0.61538462, 0.64      ]),
 array([49, 26, 24]))

If we supply the labels, the order of the arrays will match the labels and we can use this to create a dict, which we can then convert into a dataframe.

In [16]:
d_class_metrics = dict()
col_class_labels = 'class'
beta = 1
d_class_metrics[col_class_labels] = ['gaming', 'sports', 'politics',]
(
    d_class_metrics['precision'],
    d_class_metrics['recall'],
    d_class_metrics[f'f{beta}_score'],
    d_class_metrics['support']
) = precision_recall_fscore_support(y_true, y_pred, beta=beta, labels=d_class_metrics[col_class_labels], average=None)
display(pd.DataFrame(d_class_metrics))

for metric_ in [k for k in d_class_metrics.keys() if k != col_class_labels]:
    for class_, val_ in zip(d_class_metrics[col_class_labels], d_class_metrics[metric_]):
        print(f"{metric_}-{class_}", val_)

Unnamed: 0,class,precision,recall,f1_score,support
0,gaming,0.882353,0.612245,0.722892,49
1,sports,0.615385,0.666667,0.64,24
2,politics,0.512821,0.769231,0.615385,26


precision-gaming 0.8823529411764706
precision-sports 0.6153846153846154
precision-politics 0.5128205128205128
recall-gaming 0.6122448979591837
recall-sports 0.6666666666666666
recall-politics 0.7692307692307693
f1_score-gaming 0.7228915662650602
f1_score-sports 0.64
f1_score-politics 0.6153846153846154
support-gaming 49
support-sports 24
support-politics 26


In [54]:
log_clf_report_and_conf_matrix(
        y_true=y_true,
        y_pred=y_pred,
        data_fold_name='k2',
        class_labels=['gaming', 'politics', 'sports'],
        save_path=None,
        log_metrics_to_mlflow=False,
        log_artifacts_to_mlflow=False,
        log_to_console=True,
        remove_files_from_local_path=False,
        print_clf_df=True,
        return_confusion_mx=False,
)

05:41:59 | INFO | "Start processing K2 metrics for logging"
  0%|          | 0/15 [00:00<?, ?it/s]05:41:59 | INFO | " K2 gaming-f1_score: 0.816327"
05:41:59 | INFO | " K2 gaming-support: 50"
05:41:59 | INFO | " K2 politics-f1_score: 0.677419"
05:41:59 | INFO | " K2 politics-support: 36"
05:41:59 | INFO | " K2 sports-f1_score: 0.526316"
05:41:59 | INFO | " K2 sports-support: 13"
05:41:59 | INFO | " K2 accuracy: 0.717172"
05:41:59 | INFO | " K2 precision_macro_avg: 0.680342"
05:41:59 | INFO | " K2 recall_macro_avg: 0.717521"
05:41:59 | INFO | " K2 f1_score_macro_avg: 0.673354"
05:41:59 | INFO | " K2 support_macro_avg: 99"
05:41:59 | INFO | " K2 precision_weighted_avg: 0.767107"
05:41:59 | INFO | " K2 recall_weighted_avg: 0.717172"
05:41:59 | INFO | " K2 f1_score_weighted_avg: 0.727733"
05:41:59 | INFO | " K2 support_weighted_avg: 99"
100%|██████████| 15/15 [00:00<00:00, 1152.53it/s]

             precision recall  f1-score  support
class                                           
gaming           0.833  0.800     0.816       50
politics         0.808  0.583     0.677       36
sports           0.400  0.769     0.526       13
macro avg        0.680  0.718     0.673       99
weighted avg     0.767  0.717     0.728       99
accuracy                          0.717       99





In [25]:
d_clf_report = dict()
d_clf_report['class'] = ['sports', 'gaming', 'politics', ]
(
    d_clf_report['precision'],
    d_clf_report['recall'],
    d_clf_report['f1_score'],
    d_clf_report['support']
) = precision_recall_fscore_support(y_true, y_pred, beta=1, labels=d_clf_report['class'], average=None)
pd.DataFrame(d_clf_report)

Unnamed: 0,class,precision,recall,f1_score,support
0,sports,0.4,0.769231,0.526316,13
1,gaming,0.833333,0.8,0.816327,50
2,politics,0.807692,0.583333,0.677419,36


In [11]:
precision_recall_fscore_support(y_true, y_pred, beta=1, labels=['politics', 'sports', 'gaming', ])

(array([0.66666667, 0.58823529, 0.8       ]),
 array([0.8       , 0.76923077, 0.58333333]),
 array([0.72727273, 0.66666667, 0.6746988 ]),
 array([25, 26, 48]))