# URLs para obtenção dos datasets (Obtido dia 07/06/2021)
- ### Explicação dos datasets: https://repositoriodatasharingfapesp.uspdigital.usp.br/
- ### Dataset do hospital Albert Einstein: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/98 - (2020-06-30)
- ### Dataset do hospital Sirio Libanes: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/97 (2020-06-30)
- ### Dataset do hospital Beneficencia Portuguesa: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/101 (2021-04-28)
- ### Dataset do grupo Fleury: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/99 - (2020-06-30)
- ### Dataset do hospital das clinicas da faculdade de medicina da Universidade de São Paulo: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/100 - (2021-02-17)

# 1. Importing Libraries

In [1]:
import json
import os
import sys

current_path = os.path.abspath(os.getcwd())
sys.path.append(f"{current_path}/../libs")

from plot import plot_confusion_matrix, plot_table
from config import load_config

# 2. Reading Configs

In [2]:
config = load_config("../config.yaml")

# 3. Auxiliar Functions

In [3]:
def get_rows_label(model):
    rows_label=[]
    for i in range(len(model)):
        rows_label.append("fold "+str(i))
    rows_label.append("average")
    rows_label.append("standard deviation")
    return rows_label


In [4]:
def add_avg_row(data):
    row = []
    for i in range(len(data[0])):
        total = 0
        for j in range(len(data)):
            total += data[j][i]
        row.append(round(total/(len(data)),3))
    data.append(row)
    return data

In [5]:
def add_std_var_row(data):
    row = []
    for i in range(len(data[0])):
        total = 0
        for j in range(len(data)-1):
            total += pow(abs(data[j][i]-data[-1][i]),2)
        row.append(round(pow((total/(len(data)-1)),1/2),3))
    data.append(row)
    return data

In [6]:
def get_data(model, columns):
    data = []
    for i in range(len(model)):   
        metrics = model.get("fold "+str(i),"")
        row=[]
        for metric in columns:
            row.append(metrics.get(metric,""))
        data.append(row)
    data = add_avg_row(data)
    data = add_std_var_row(data)
    return data

In [7]:
def get_tables(model,fold_name,model_name=""):
    row_labels = get_rows_label(model)
    columns_label = [
        "accuracy",
        "precision",
        "recall",
        "f1-score",]
    data = get_data(model, columns_label)
    path = f"{config.PLOTS_PATH}{config.MODELS_PATH}{fold_name}/"
    if not os.path.exists(path):
        os.makedirs(path)
    plot_table(
        data,
        columns=columns_label, 
        rows=row_labels,
        path=f"{path}{model_name} metrics",
        title = f"Metrics Table {model_name}")

In [8]:
def get_confusion_matrix(model,fold_name,model_name=""):
    tp = 0
    tn = 0
    for i in range(len(model)):   
        matrix = model.get("fold "+str(i),"").get("confusion matrix","")
        tp += float(matrix.get("tp",""))
        tn += float(matrix.get("tn",""))
    tp = tp/len(model)
    tn = tn/len(model)
    fn = 1-tp
    fp = 1- tn
    cm = [[tp, fp], [fn, tn]]
    path = f"{config.PLOTS_PATH}{config.MODELS_PATH}{fold_name}/"
    if not os.path.exists(path):
        os.makedirs(path)
    plot_confusion_matrix(cm,path=f"{path}{model_name} confusion matrix",model_name=model_name)

# 4. Reading Each Dataset Model Report

In [9]:
models_reports = []
for dataset in config.DATASETS:
    with open(config.REPORT_PATH+'/models.json', 'r') as fp:
        models_reports.append(json.load(fp))

# 5. Getting Each Dataset Models Info

In [10]:
for models_report,dataset in zip(models_reports,config.DATASETS):
    for model_name in config.MODELS:
        model = models_report.get(dataset).get(model_name,"")
        get_tables(model,f"metrics/{dataset}",model_name)
        get_confusion_matrix(model,f"confusion-matrix/{dataset}",model_name)