# URLs para obtenção dos datasets (Obtido dia 07/06/2021)
- ### Explicação dos datasets: https://repositoriodatasharingfapesp.uspdigital.usp.br/
- ### Dataset do hospital Albert Einstein: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/98 - (2020-06-30)
- ### Dataset do hospital Sirio Libanes: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/97 (2020-06-30)
- ### Dataset do hospital Beneficencia Portuguesa: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/101 (2021-04-28)
- ### Dataset do grupo Fleury: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/99 - (2020-06-30)
- ### Dataset do hospital das clinicas da faculdade de medicina da Universidade de São Paulo: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/100 - (2021-02-17)

# 1. Importing Libraries

In [1]:
import json
from operator import itemgetter
import os
import sys

current_path = os.path.abspath(os.getcwd())
sys.path.append(f"{current_path}/../libs")

from plot import plot_confusion_matrix, plot_table

# 2. Defining Constants and Globals

In [2]:
COMPLETE_DATASET_REPORT_PATH = "../reports/complete-dataset/"
EACH_DATASET_REPORT_PATH = "../reports/each-dataset/"
INTERIM_REPORT_PATH = "../reports/interim/"
PLOTS_FEATURE_SELECTION_PATH = "../plots/feature_selection/"
REPORT_PATH = "../reports/"
DATASETS = [
    "albert-einstein",
    "beneficencia-portuguesa",
    "hospital-de-clinicas",
    "sirio-libanes",
    "grupo-fleury"
    ]

FEATURE_SELECTION_MODELS = [
    "select-k-best"
]
N_FEATURES = 15

report_interim = {}

# 3. Auxiliar Functions

# 4. Reading Complete Dataset Feature Selection Report

In [5]:
with open(INTERIM_REPORT_PATH+'feature-selection-results.json', 'r') as fp:
    feature_selection_report = json.load(fp)

In [7]:
models_names = [
    "Decision Tree",
    "Random Forest",
    "KNN" ,
    # "Logistic Regression",
    # "SVM"
]

In [31]:
def get_row(ml_results):
    n_hospitals = len(ml_results)
    accuracy = 0
    precision = 0
    recall = 0
    f1_score = 0
    tp = 0
    tn = 0
    for hospital, metrics in ml_results.items():
        accuracy += float(metrics["accuracy"])
        precision += float(metrics["precision"])
        recall += float(metrics["recall"])
        f1_score += float(metrics["f1-score"])
        tp += float(metrics["confusion matrix"]["tp"])
        tn += float(metrics["confusion matrix"]["tn"])
    return [
        round(accuracy/n_hospitals,3),
        round(precision/n_hospitals,3),
        round(recall/n_hospitals,3),
        round(f1_score/n_hospitals,3),
        round(tp/n_hospitals,3),
        round(tn/n_hospitals,3),
        round(1 - tp/n_hospitals,3),
        round(1 - tn/n_hospitals,3),
    ]


In [35]:
feature_selection_results = {}
for model,results in feature_selection_report.items():
    data = []
    columns = [
        "accuracy","precision","recall","f1-score",
        "tp","tn","fp","fn"]
    feature_selection_results[model] = {}
    for ml_model, ml_results in results.items(): 
        

        if ml_model in models_names:
            data.append(get_row(ml_results))



    path = f"{PLOTS_FEATURE_SELECTION_PATH}"
    if not os.path.exists(path):
        os.makedirs(path)
    print(data)
    plot_table(
        data,
        columns=columns, 
        rows=models_names,
        path=path+"result/metrics",
        title = (model+" Results"))
            


[[0.774, 0.684, 0.622, 0.65, 0.622, 0.764, 0.378, 0.236], [0.826, 0.776, 0.664, 0.712, 0.664, 0.772, 0.336, 0.228], [0.79, 0.726, 0.59, 0.644, 0.59, 0.72, 0.41, 0.28]]


In [36]:
feature_selection_results

{'select-k-best': {}}