# Sklearn Classification Report to Pandas Dataframe

In this code I modify the code given by Sklearn in https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/metrics/classification.py#L1363 for returning a Pandas dataframe instead of a print of the classification report function

In [1]:
#required pakages
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, classification_report

In [43]:
def my_classification_report(y_true, y_pred, labels = None, target_names=None,
                             sample_weight=None, digits=2):
    ''' 
    y_true = real classes
    y_pred = predicted classes
    labels = classes
    '''
    
    #obtain the classes
    if labels == None:
        labels = np.unique(np.array(y_true))
    else:
        labels = np.sort(np.array(labels))
        
    
    #define the colnames
    colname = ['precision', 'recall', 'f1_score', 'support']
    #define the rownames
    rownames = np.array(labels).tolist()
    rownames.append('avg/total')
    
    #calculate the values
    p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,
                                                  labels=labels,
                                                  average=None,
                                                  sample_weight=sample_weight)
    
    #compute the average
    a = np.array([np.average(p, weights= s),
                 np.average(r, weights= s),
                 np.average(f1, weights= s),
                 np.sum(s)])
    
    #now, fill the report with the obtained values
    val = np.concatenate([[p],[r],[f1],[s]]).T
    
    #create the final matrix with all the values
    ris = np.zeros((len(rownames), len(colname)))
    ris[0:len(rownames) - 1,:] = val #insert the average / total
    ris[len(rownames) - 1,:] = a #insert the average / total
    
    #round with digits
    ris = np.round_(ris, decimals= digits)
    ris[:,3] = np.round_(ris[:,3], decimals= 0)
    
    #define the empty dataframe
    report = pd.DataFrame(ris,
                         columns= colname,
                         index= rownames)
    
    
    return report
    
    

In [44]:
###test 1
y_true = [0, 1, 2, 2, 2]
y_pred = [0, 0, 2, 2, 1]

print(my_classification_report(y_true, y_pred, labels= [1,2,0]))
print(classification_report(y_true, y_pred))


           precision  recall  f1_score  support
0                0.5    1.00      0.67      1.0
1                0.0    0.00      0.00      1.0
2                1.0    0.67      0.80      3.0
avg/total        0.7    0.60      0.61      5.0
             precision    recall  f1-score   support

          0       0.50      1.00      0.67         1
          1       0.00      0.00      0.00         1
          2       1.00      0.67      0.80         3

avg / total       0.70      0.60      0.61         5



In [46]:
y_true = [2, 2, 0, 1, 0]
y_pred = [2, 0, 0, 1, 1]

print(my_classification_report(y_true, y_pred, labels= [1,2,0]))
print(classification_report(y_true, y_pred))


           precision  recall  f1_score  support
0                0.5     0.5      0.50      2.0
1                0.5     1.0      0.67      1.0
2                1.0     0.5      0.67      2.0
avg/total        0.7     0.6      0.60      5.0
             precision    recall  f1-score   support

          0       0.50      0.50      0.50         2
          1       0.50      1.00      0.67         1
          2       1.00      0.50      0.67         2

avg / total       0.70      0.60      0.60         5

