# URLs para obtenção dos datasets (Obtido dia 07/06/2021)
- ### Explicação dos datasets: https://repositoriodatasharingfapesp.uspdigital.usp.br/
- ### Dataset do hospital Albert Einstein: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/98 - (2020-06-30)
- ### Dataset do hospital Sirio Libanes: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/97 (2020-06-30)
- ### Dataset do hospital Beneficencia Portuguesa: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/101 (2021-04-28)
- ### Dataset do grupo Fleury: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/99 - (2020-06-30)
- ### Dataset do hospital das clinicas da faculdade de medicina da Universidade de São Paulo: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/100 - (2021-02-17)

# 1. Importing Libraries

In [43]:
import json
import os
import sys

current_path = os.path.abspath(os.getcwd())
sys.path.append(f"{current_path}/../libs")

from plot import plot_confusion_matrix, plot_table

# 2. Defining Constants and Globals

In [44]:
COMPLETE_DATASET_REPORT_PATH = "../reports/complete-dataset/"
EACH_DATASET_REPORT_PATH = "../reports/each-dataset/"
PLOTS_MODELS_PATH = "../plots/models/"
REPORT_PATH = "../reports/"
DATASETS = [
    "albert-einstein",
    "beneficencia-portuguesa",
    "hospital-de-clinicas",
    "sirio-libanes",
    "grupo-fleury"
    ]

MODELS = [
    "Decision Tree",
    "Random Forest",
    "KNN",
    # "Logistic Regression",
    # "SVM"
]

# 3. Auxiliar Functions

In [45]:
def get_rows_label(model):
    rows_label=[]
    for i in range(len(model)):
        rows_label.append("fold "+str(i))
    rows_label.append("average")
    rows_label.append("standard deviation")
    return rows_label


In [46]:
def add_avg_row(data):
    row = []
    for i in range(len(data[0])):
        total = 0
        for j in range(len(data)):
            total += data[j][i]
        row.append(round(total/(len(data)),3))
    data.append(row)
    return data

In [47]:
def add_std_var_row(data):
    row = []
    for i in range(len(data[0])):
        total = 0
        for j in range(len(data)-1):
            total += pow(abs(data[j][i]-data[-1][i]),2)
        row.append(round(pow((total/(len(data)-1)),1/2),3))
    data.append(row)
    return data

In [48]:
def get_data(model, columns):
    data = []
    for i in range(len(model)):   
        metrics = model.get("fold "+str(i),"")
        row=[]
        for metric in columns:
            row.append(metrics.get(metric,""))
        data.append(row)
    data = add_avg_row(data)
    data = add_std_var_row(data)
    return data

In [49]:
def get_tables(model,fold_name,model_name=""):
    row_labels = get_rows_label(model)
    columns_label = [
        "accuracy",
        "precision",
        "recall",
        "f1-score",]
    data = get_data(model, columns_label)
    path = f"{PLOTS_MODELS_PATH}{fold_name}/"
    if not os.path.exists(path):
        os.makedirs(path)
    plot_table(
        data,
        columns=columns_label, 
        rows=row_labels,
        path=path+model_name+" metrics",
        title = ("Metrics Table " + model_name))

In [50]:
def get_confusion_matrix(model,fold_name,model_name=""):
    tp = 0
    tn = 0
    for i in range(len(model)):   
        matrix = model.get("fold "+str(i),"").get("confusion matrix","")
        tp += float(matrix.get("tp",""))
        tn += float(matrix.get("tn",""))
    tp = tp/len(model)
    tn = tn/len(model)
    fn = 1-tp
    fp = 1- tn
    cm = [[tp, fp], [fn, tn]]
    path = f"{PLOTS_MODELS_PATH}{fold_name}/"
    if not os.path.exists(path):
        os.makedirs(path)
    plot_confusion_matrix(cm,path=path+model_name+" confusion matrix",model_name=model_name)

# 4. Reading Complete Dataset Models Report

In [51]:
with open(COMPLETE_DATASET_REPORT_PATH+'models.json', 'r') as fp:
    models_report = json.load(fp)

# 5. Getting Complete Dataset Models Info

In [52]:
models_dict = models_report.get("models","")
for model_name in MODELS:
    model = models_dict.get(model_name,"")
    get_tables(model,"complete-dataset/metrics",model_name)
    get_confusion_matrix(model,"complete-dataset/confusion-matrix",model_name)


# 6. Reading Each Dataset Model Report

In [53]:
models_reports = []
for dataset in DATASETS:
    with open(EACH_DATASET_REPORT_PATH+dataset+'/models.json', 'r') as fp:
        models_reports.append(json.load(fp))

# 7. Getting Each Dataset Models Info

In [54]:
for models_report,dataset in zip(models_reports,DATASETS):
    models_dict = models_report.get("models","")
    for model_name in MODELS:
        model = models_dict.get(model_name,"")
        get_tables(model,f"each-dataset/{dataset}/metrics",model_name)
        get_confusion_matrix(model,f"each-dataset/{dataset}/confusion-matrix",model_name)
