# Imports

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns

# Model load

In [2]:
path_qing_cheng = '../model/qing_cheng'
path_vallen = '../model/vallen'

qing_cheng_model_name = 'Qing-Cheng'
vallen_model_name = 'Vallen'

In [3]:
def get_file_paths(path: str):
    file_paths = []

    for dir_path, _, file_names in os.walk(path):
        file_path = {}
        
        for file_name in file_names:
            
            if file_name.endswith('all_results.csv'):
                file_path['all_results'] = os.path.join(dir_path, file_name)
                
        if len(file_path) > 0:
            file_paths.append(file_path)

    return file_paths

In [4]:
paths_qing_cheng = get_file_paths(path_qing_cheng)
paths_vallen = get_file_paths(path_vallen)

In [5]:
def load_model_result(path_dict: dict):    
    all_results = pd.read_csv(path_dict['all_results'])
    
    return {
        'all_results': all_results,
    }

In [6]:
qing_cheng_models = [load_model_result(path) for path in paths_qing_cheng]
vallen_models = [load_model_result(path) for path in paths_vallen]

# Analysis

In [7]:
def process_scores (scores: list):
    transposed_scores = list(zip(*scores))
    
    return [np.mean(score) for score in transposed_scores]

In [8]:
def process_fold_results (df: pd.DataFrame, model_name):
    new_df = df.copy()
    
    fold_results_df = new_df['Fold Results (accuracy, precision, recall, f1)'].apply(lambda row: eval(row))
    
    new_df.drop('Fold Results (accuracy, precision, recall, f1)',  axis=1, inplace=True)
    
    results_means_df = fold_results_df.apply(process_scores)
    
    new_df['accuracy'] = results_means_df.apply(lambda row: row[0])
    new_df['precision'] = results_means_df.apply(lambda row: row[1])
    new_df['recall'] = results_means_df.apply(lambda row: row[2])
    new_df['f1'] = results_means_df.apply(lambda row: row[3])
                                          
    new_df['model'] = new_df.apply(lambda row: f"{model_name}({ row['num_units'] },{ row['batch_size'] },{ row['epochs'] },{ row['drop_out'] })", axis=1)
    
    return new_df

In [9]:
def process_model_results(model_dict: dict, model_name: str):    
    column_dict = {'Num Units': 'num_units', 'Batch Size': 'batch_size', 'Epochs': 'epochs', 'Dense Activation': 'dense_activation', 'drop': 'drop_out'}
    
    model_dict['all_results'] = model_dict['all_results'].rename(column_dict, axis=1)
    
    model_dict['all_results'] = process_fold_results(model_dict['all_results'], model_name)
    
    return

In [10]:
for model in qing_cheng_models:
    process_model_results(model, qing_cheng_model_name)
    
for model in vallen_models:
    process_model_results(model, vallen_model_name)

# Plot

In [11]:
def plot_best_model_by_metric(model_a: pd.DataFrame, model_b: pd.DataFrame, metric: str='accuracy'):
    best_models = pd.concat([
        model_a.sort_values(metric, ascending=False).head(1), 
        model_b.sort_values(metric, ascending=False).head(1),
    ])
    
    sns.barplot(y=metric, x='model', data=best_models, hue='model')
    plt.yticks(np.arange(0, 1.1, 0.1))
    plt.show()

## Accuracy

In [12]:
for index in range(len(qing_cheng_models)):
    plot_best_model_by_metric(
        qing_cheng_models[index]['all_results'],
        vallen_models[index]['all_results'],
        'accuracy'
    )

## Precision

In [13]:
for index in range(len(qing_cheng_models)):
    plot_best_model_by_metric(
        qing_cheng_models[index]['all_results'],
        vallen_models[index]['all_results'],
        'precision'
    )

## Recall

In [14]:
for index in range(len(qing_cheng_models)):
    plot_best_model_by_metric(
        qing_cheng_models[index]['all_results'],
        vallen_models[index]['all_results'],
        'recall'
    )

## F1

In [15]:
for index in range(len(qing_cheng_models)):
    plot_best_model_by_metric(
        qing_cheng_models[index]['all_results'],
        vallen_models[index]['all_results'],
        'f1'
    )

# ---

In [32]:
qing_cheng_models[6]['all_results'].sort_values('accuracy', ascending=False).head(1)

In [33]:
vallen_models[6]['all_results'].sort_values('accuracy', ascending=False).head(1)