In [36]:
import ast
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize

def clean_report_df(filepath):
    report_df = pd.read_csv(filepath, header=None).T
    report_df.columns = report_df.iloc[0]
    report_df = report_df.drop(report_df.index[0])
    report_df.columns = ['classifier', 'No', 'Yes', 'accuracy', 'Macro Avg', 'Micro Avg' ]
    dict_columns = ['No', 'Yes', 'Macro Avg', 'Micro Avg']
    keys = ['precision', 'recall', 'f1-score', 'support']
    report_df['classifier'] = report_df['classifier'].apply(lambda x: x.split('(')[0])
    report_df = report_df.set_index('classifier')


    def revise_dict(x, col, keys):
        new_keys = [key+'_'+col for key in keys]
        new_dict = dict(zip(new_keys, list(x.values())))
        return new_dict

    for col in dict_columns:
        report_df[col] = report_df[col].apply(lambda x: ast.literal_eval(x))
        report_df[col] = report_df[col].apply(lambda x: revise_dict(x, col, keys))

    for col in dict_columns:
        new_dict = {}
        for classifier in report_df.index.values.tolist():
            name = str(classifier) + '_df'
            new_dict[name]= json_normalize(report_df.loc[classifier][col])
            new_dict[name]['classifier'] = [classifier]
        dict_df = pd.concat(list(new_dict.values())).reset_index().drop(columns=['index'], axis=1)
        report_df = report_df.merge(dict_df, how='left', left_on='classifier', right_on='classifier').set_index('classifier')

    return report_df

In [44]:
df = clean_report_df('Data/Output/report_df.csv').iloc[:, 5:]
df = df[sorted([col for col in df.columns if 'support' not in col])]
df

Unnamed: 0_level_0,f1-score_Macro Avg,f1-score_Micro Avg,f1-score_No,f1-score_Yes,precision_Macro Avg,precision_Micro Avg,precision_No,precision_Yes,recall_Macro Avg,recall_Micro Avg,recall_No,recall_Yes
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
GradientBoostingClassifier,0.496476,0.991621,0.992952,0.0,0.499321,0.997303,0.998642,0.0,0.493663,0.986002,0.987326,0.0
RandomForestClassifier,0.496676,0.992803,0.993351,0.0,0.49972,0.998889,0.999441,0.0,0.493668,0.986791,0.987336,0.0


In [38]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(
                   z=df.values.tolist(),
                   x=df.columns,
                   #y=[classifier for classifier in df.index.values.tolist()],
                    y = df.index.values.tolist(),
                   hoverongaps = False,
                    xgap = 3,
                    ygap = 3), 
                   )
fig.show()

# Upsampled

In [42]:
df = clean_report_df('Data/Output/report_df_upsampled.csv').iloc[:, 5:]
df = df[sorted([col for col in df.columns if 'support' not in col])]
df

Unnamed: 0_level_0,f1-score_Macro Avg,f1-score_Micro Avg,f1-score_No,f1-score_Yes,precision_Macro Avg,precision_Micro Avg,precision_No,precision_Yes,recall_Macro Avg,recall_Micro Avg,recall_No,recall_Yes
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
GradientBoostingClassifier,0.644724,0.647274,0.623044,0.666403,0.64579,0.652633,0.587617,0.703963,0.647833,0.646047,0.663017,0.632649
RandomForestClassifier,0.996686,0.996687,0.996661,0.996712,0.996672,0.996708,0.993343,1.0,0.996723,0.996686,1.0,0.993445


In [43]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(
                   z=df.values.tolist(),
                   x=df.columns,
                   #y=[classifier for classifier in df.index.values.tolist()],
                    y = df.index.values.tolist(),
                   hoverongaps = False,
                    xgap = 3,
                    ygap = 3), 
                   )
fig.show()