# URLs para obtenção dos datasets (Obtido dia 07/06/2021)
- ### Explicação dos datasets: https://repositoriodatasharingfapesp.uspdigital.usp.br/
- ### Dataset do hospital Albert Einstein: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/98 - (2020-06-30)
- ### Dataset do hospital Sirio Libanes: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/97 (2020-06-30)
- ### Dataset do hospital Beneficencia Portuguesa: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/101 (2021-04-28)
- ### Dataset do grupo Fleury: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/99 - (2020-06-30)
- ### Dataset do hospital das clinicas da faculdade de medicina da Universidade de São Paulo: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/100 - (2021-02-17)

# 1. Importing Libraries

In [17]:
import json
from operator import itemgetter
import os
import sys

current_path = os.path.abspath(os.getcwd())
sys.path.append(f"{current_path}/../libs")

from plot import plot_confusion_matrix, plot_table

# 2. Defining Constants and Globals

In [18]:
COMPLETE_DATASET_REPORT_PATH = "../reports/complete-dataset/"
EACH_DATASET_REPORT_PATH = "../reports/each-dataset/"
INTERIM_REPORT_PATH = "../reports/interim/"
PLOTS_FEATURE_SELECTION_PATH = "../plots/features/"
REPORT_PATH = "../reports/"
DATASETS = [
    "albert-einstein",
    "beneficencia-portuguesa",
    "hospital-de-clinicas",
    "sirio-libanes",
    "grupo-fleury"
    ]

FEATURE_SELECTION_MODELS = [
    "select-k-best"
]
N_FEATURES = 15

report_interim = {}

# 3. Auxiliar Functions

In [19]:
def get_data(model):
    features = {}
    for features_list in model.values():
        for feature in features_list:
            if feature not in features:
                features[feature]=1
            else:
                features[feature] += 1
    rows = [[feature, times] for feature, times in features.items()]
    rows.sort(key=lambda x: (x[1],x[0]),reverse=True)
    return rows

In [20]:
def get_table(model,fold_name,model_name, dataset=None):
    columns_label = [
        "",
        "Times\ on\ Top\ 10",]
    data = get_data(model)
    if dataset not in report_interim:
        report_interim[dataset]={}
    report_interim[dataset][model_name] = dict(data)
    path = f"{PLOTS_FEATURE_SELECTION_PATH}{fold_name}/"
    path = path+f"{dataset}/" if dataset else path
    if not os.path.exists(path):
        os.makedirs(path)
    plot_table(data, columns=columns_label, path=path+model_name, title="Feature Selection "+model_name)
    

In [21]:
def get_results(report_interim):
    results = {}
    for dataset,result in report_interim.items():
        if dataset != "complete-dataset" and dataset != "results":
            for model,features in result.items():
                if model not in results:
                    results[model]={}
                # print(dataset,"-",model)
                for feature,times in features.items():
                    if feature not in results[model]:
                        results[model][feature]=times
                    else:
                        results[model][feature]+=times
                        # ????????????????????????????
                        # data ta saindo com o modelo e devo ordenar
                        # posso usr um for por modelo e ordenar salvando em cima
    for model_name,result in results.items():
        
        data = [[feature, times] for feature, times in result.items()]
  
        data.sort(key=lambda x: (x[1],x[0]),reverse=True)
        if "results" not in report_interim:
            report_interim["results"]={}
        report_interim["results"][model_name] = dict(data)
    return data

    

# 4. Reading Complete Dataset Feature Selection Report

In [22]:
with open(COMPLETE_DATASET_REPORT_PATH+'feature-selection.json', 'r') as fp:
    feature_selection_report = json.load(fp)

# 5. Getting Complete Dataset Feature Selection Table 

In [23]:
models_dict = feature_selection_report.get("feature selection","")
for model_name in FEATURE_SELECTION_MODELS:
    model = models_dict.get(model_name,"")
    get_table(model,"complete-dataset",model_name,"complete-dataset")


# 6. Reading Each Dataset Feature Selection Report

In [24]:
models_reports = []
for dataset in DATASETS:
    with open(EACH_DATASET_REPORT_PATH+dataset+'/feature-selection.json', 'r') as fp:
        models_reports.append(json.load(fp))

# 7. Getting Each Dataset Feature Selection Table

In [25]:
for models_report,dataset in zip(models_reports,DATASETS):

    models_dict = models_report.get("feature selection","")
    for model_name in FEATURE_SELECTION_MODELS:
        model = models_dict.get(model_name,"")
        get_table(model,f"each-dataset",model_name,dataset)


# 8. Getting Results Feature Selection Table

In [26]:
data = get_results(report_interim)
columns_label = ["","Times\ on\ Top\ 10"]
path = f"{PLOTS_FEATURE_SELECTION_PATH}result/"
if not os.path.exists(path):
     os.makedirs(path)
plot_table(data[:N_FEATURES], columns=columns_label, path=path+"results", title="Feature Selection Result")

# 9. Saving Results Report

In [27]:
path = REPORT_PATH+"interim/"
if not os.path.exists(path):
    os.makedirs(path)

with open(INTERIM_REPORT_PATH+'features.json', 'w') as f:
    json.dump(report_interim, f)