In [14]:
import pandas as pd
import numpy as np


In [15]:
def cm_performance(cm, rnd = 5):
    """
    Calculate and return various performance metrics from a confusion matrix.

    Parameters:
    cm (numpy.ndarray): Confusion matrix as a 2x2 numpy array.
                        The format should be:
                        [[TN, FP],
                         [FN, TP]]
    rnd (int, optional): Number of decimal places to round the performance metrics. Default is 5.

    Returns:
    pandas.DataFrame: A dataframe containing the calculated performance metrics:
                      - 'Accuracy': Proportion of correct predictions.
                      - 'Error rate': Proportion of incorrect predictions.
                      - 'Sensitivity (Recall)': True positive rate, the proportion of actual positives correctly identified.
                      - 'Specificity': True negative rate, the proportion of actual negatives correctly identified.
                      - 'Precision': Proportion of positive identifications that were actually correct.
                      - 'F1': Harmonic mean of precision and recall.
                      - 'F2': Weighted harmonic mean of precision and recall with more weight on recall.
                      - 'F0.5': Weighted harmonic mean of precision and recall with more weight on precision.

    Example:
    >>> from sklearn.metrics import confusion_matrix
    >>> import numpy as np
    >>> cm = np.array([[50, 10],
                       [5,  35]])
    >>> cm_performance(cm)
    [[50 10]
     [ 5 35]]
    TN,  FP
    FN, TP
           Metric   Value
    0      Accuracy  0.85000
    1     Error rate  0.15000
    2  Sensitivity (Recall)  0.87500
    3    Specificity  0.83333
    4      Precision  0.77778
    5            F1  0.82353
    6            F2  0.85969
    7          F0.5  0.79167

    Docstring generated by ChatGPT.
    """
    TN, FP, FN, TP = cm.ravel()
    accuracy = (TN + TP) / (TN + FP + FN + TP)
    error_rate = 1 - accuracy
    sensitivity_recall = TP / (FN + TP)
    specificity = TN / (TN + FP)
    precision = TP / (FP + TP)
    f1 = (2 * precision * sensitivity_recall) / (precision + sensitivity_recall)
    f2 = (5 * precision * sensitivity_recall) / ((4 * precision) + sensitivity_recall)
    f05 = (1.25 * precision * sensitivity_recall) / ((0.25 * precision) + sensitivity_recall)

    data = {
        'Metric': ['Accuracy', 'Error rate', 'Sensitivity (Recall)', 
                   'Specificity', 'Precision', 'F1', 'F2', 'F0.5'],
        'Value': [accuracy, error_rate, sensitivity_recall, specificity,
                  precision, f1, f2, f05]
    }

    performance_df = pd.DataFrame(data)
    print(cm)
    print("TN,  FP\nFN, TP")
    return(performance_df.round(rnd))


In [16]:
def multiclass_cm_metrics(cm, rnd=5):
    """
    Calculate and return various performance metrics from a confusion matrix.

    Parameters:
    cm (numpy.ndarray): Confusion matrix as a numpy array of any size.
    rnd (int, optional): Number of decimal places to round the performance metrics. Default is 5.

    Returns:
    pandas.DataFrame: A dataframe containing the calculated performance metrics:
                      - 'Accuracy': Proportion of correct predictions.
                      - 'Error rate': Proportion of incorrect predictions.
                      - 'Sensitivity (Recall)': True positive rate for each class.
                      - 'Specificity': True negative rate for each class.
                      - 'Precision': Proportion of positive identifications that were actually correct for each class.
                      - 'F1': Harmonic mean of precision and recall for each class.
                      - 'F2': Weighted harmonic mean of precision and recall with more weight on recall for each class.
                      - 'F0.5': Weighted harmonic mean of precision and recall with more weight on precision for each class.

    Example:
    >>> from sklearn.metrics import confusion_matrix
    >>> import numpy as np
    >>> cm = np.array([[50, 10, 5],
                       [5, 35, 5],
                       [5, 10, 40]])
    >>> cm_performance(cm)

    Docstring generated by ChatGPT
    """

    # Initialize lists to hold metrics for each class
    classes = cm.shape[0]
    metrics = ['Accuracy', 'Error rate', 'Sensitivity (Recall)', 
               'Specificity', 'Precision', 'F1', 'F2', 'F0.5']
    performance_dict = {metric: [] for metric in metrics}

    # Calculate metrics for each class
    for i in range(classes):
        TP = cm[i, i]
        FN = np.sum(cm[i, :]) - TP
        FP = np.sum(cm[:, i]) - TP
        TN = np.sum(cm) - (TP + FP + FN)

        accuracy = (TP + TN) / np.sum(cm)
        error_rate = 1 - accuracy
        sensitivity_recall = TP / (TP + FN) if (TP + FN) != 0 else 0
        specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
        precision = TP / (TP + FP) if (TP + FP) != 0 else 0
        f1 = (2 * precision * sensitivity_recall) / (precision + sensitivity_recall) if (precision + sensitivity_recall) != 0 else 0
        f2 = (5 * precision * sensitivity_recall) / ((4 * precision) + sensitivity_recall) if ((4 * precision) + sensitivity_recall) != 0 else 0
        f05 = (1.25 * precision * sensitivity_recall) / ((0.25 * precision) + sensitivity_recall) if ((0.25 * precision) + sensitivity_recall) != 0 else 0

        performance_dict['Accuracy'].append(accuracy)
        performance_dict['Error rate'].append(error_rate)
        performance_dict['Sensitivity (Recall)'].append(sensitivity_recall)
        performance_dict['Specificity'].append(specificity)
        performance_dict['Precision'].append(precision)
        performance_dict['F1'].append(f1)
        performance_dict['F2'].append(f2)
        performance_dict['F0.5'].append(f05)

    # Convert to DataFrame with classes as columns
    performance_df = pd.DataFrame(performance_dict, 
                                  index=[f'Class {i}' for i in range(classes)])
    performance_df = performance_df.T.round(rnd)  # Transpose and round

    print("Confusion Matrix:")
    print(cm)
    return performance_df


In [19]:
cm = np.array([[50, 10, 5], [5, 35, 5], [5, 10, 40]])
print(multiclass_cm_metrics(cm))

Confusion Matrix:
[[50 10  5]
 [ 5 35  5]
 [ 5 10 40]]
                      Class 0  Class 1  Class 2
Accuracy              0.84848  0.81818  0.84848
Error rate            0.15152  0.18182  0.15152
Sensitivity (Recall)  0.76923  0.77778  0.72727
Specificity           0.90000  0.83333  0.90909
Precision             0.83333  0.63636  0.80000
F1                    0.80000  0.70000  0.76190
F2                    0.78125  0.74468  0.74074
F0.5                  0.81967  0.66038  0.78431
