In [36]:
import ast
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
import plotly.graph_objects as go

def clean_report_df(filepath):
    report_df = pd.read_csv(filepath, header=None).T
    report_df.columns = report_df.iloc[0]
    report_df = report_df.drop(report_df.index[0])
    report_df.columns = ['classifier', 'No', 'Yes', 'accuracy', 'Macro Avg', 'Micro Avg' ]
    dict_columns = ['No', 'Yes', 'Macro Avg', 'Micro Avg']
    keys = ['precision', 'recall', 'f1-score', 'support']
    report_df['classifier'] = report_df['classifier'].apply(lambda x: x.split('(')[0])
    report_df = report_df.set_index('classifier')


    def revise_dict(x, col, keys):
        new_keys = [key+'_'+col for key in keys]
        new_dict = dict(zip(new_keys, list(x.values())))
        return new_dict

    for col in dict_columns:
        report_df[col] = report_df[col].apply(lambda x: ast.literal_eval(x))
        report_df[col] = report_df[col].apply(lambda x: revise_dict(x, col, keys))

    for col in dict_columns:
        new_dict = {}
        for classifier in report_df.index.values.tolist():
            name = str(classifier) + '_df'
            new_dict[name]= json_normalize(report_df.loc[classifier][col])
            new_dict[name]['classifier'] = [classifier]
        dict_df = pd.concat(list(new_dict.values())).reset_index().drop(columns=['index'], axis=1)
        report_df = report_df.merge(dict_df, how='left', left_on='classifier', right_on='classifier').set_index('classifier')

    return report_df

In [45]:
def create_heatmap(df):
    fig = go.Figure(data=go.Heatmap(
                       z=df.values.tolist(),
                       x=df.columns,
                       #y=[classifier for classifier in df.index.values.tolist()],
                        y = df.index.values.tolist(),
                       hoverongaps = False,
                        xgap = 3,
                        ygap = 3), 
                       )
    fig.show()

In [51]:
df = clean_report_df('Data/Output/report_df.csv').iloc[:, 5:]
df = df[sorted([col for col in df.columns if 'support' not in col])]
create_heatmap(df)

# Upsampled

In [52]:
df_upsampled = clean_report_df('Data/Output/report_df_upsampled.csv').iloc[:, 5:]
df_upsampled = df_upsampled[sorted([col for col in df_upsampled.columns if 'support' not in col])]
create_heatmap(df_upsampled)