# URLs para obtenção dos datasets (Obtido dia 07/06/2021)
- ### Explicação dos datasets: https://repositoriodatasharingfapesp.uspdigital.usp.br/
- ### Dataset do hospital Albert Einstein: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/98 - (2020-06-30)
- ### Dataset do hospital Sirio Libanes: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/97 (2020-06-30)
- ### Dataset do hospital Beneficencia Portuguesa: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/101 (2021-04-28)
- ### Dataset do grupo Fleury: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/99 - (2020-06-30)
- ### Dataset do hospital das clinicas da faculdade de medicina da Universidade de São Paulo: https://repositoriodatasharingfapesp.uspdigital.usp.br/handle/item/100 - (2021-02-17)

# 1. Importing Libraries

In [10]:
import json
from operator import itemgetter
import os
import sys

current_path = os.path.abspath(os.getcwd())
sys.path.append(f"{current_path}/../libs")

from plot import plot_confusion_matrix, plot_table
from config import load_config


# 2. Reading Configs and Defining Globals

In [11]:
config = load_config("../config.yaml")

# 3. Auxiliar Functions

In [12]:
def get_data(model):
    features = {}
    for features_list in model.values():
        for feature in features_list:
            if feature not in features:
                features[feature]=1
            else:
                features[feature] += 1
    rows = [[feature, times] for feature, times in features.items()]
    rows.sort(key=lambda x: (x[1],x[0]),reverse=True)
    return rows

In [13]:
def get_table(model,model_name, dataset=None):
    columns_label = [
        "",
        "Times\ on\ Top\ 10",]
    data = get_data(model)
    if model_name not in features_result:
        features_result[model_name]={}
    features_result[model_name][dataset] = dict(data)
    path = f"{config.PLOTS_PATH}{config.FEATURES_PATH}/{model_name}/"
    if not os.path.exists(path):
        os.makedirs(path)
    plot_table(data, columns=columns_label, path=f"{path}{dataset}", title=f"Feature Selection {model_name}")
    

In [14]:
def get_results(fs_model):
    results = {}
    for dataset,result in fs_model.items():
        if dataset != "concatenated-dataset" and dataset != "features-result":
            for feature,times in result.items():
                if feature not in results:
                    results[feature]=times
                else:
                    results[feature]+=times
    data = [[feature, times] for feature, times in results.items()]
    data.sort(key=lambda x: (x[1],x[0]),reverse=True)
    return data

# 4. Reading Each Dataset Feature Selection Report

In [15]:
with open(f"{config.REPORT_PATH}features.json", 'r') as fp:
    features_reports = json.load(fp)

# 5. Getting Each Dataset Feature Selection Table

In [16]:
features_result = {}
for fs_model_name, fs_model in features_reports.items():
    for dataset in config.DATASETS:
        features_dict = fs_model.get(dataset,"")
        get_table(features_dict,fs_model_name,dataset)

# 6. Getting Results Feature Selection Table

In [17]:
for fs_model_name, fs_model in features_result.items():
     data = get_results(fs_model)
     columns_label = ["","Times\ on\ Top\ 10"]
     path = f"{config.PLOTS_PATH}{config.FEATURES_PATH}{fs_model_name}/"
     if not os.path.exists(path):
          os.makedirs(path)
     plot_table(data[:config.N_FEATURES], columns=columns_label, path=f"{path}results", title=f"Feature Selection - {fs_model_name}")
     features_result[fs_model_name]["features-result"]=dict(data)

# 7. Saving Results Report

In [18]:
path = f"{config.REPORT_PATH}"
if not os.path.exists(path):
    os.makedirs(path)

with open(f"{path}features-result.json", 'w') as f:
    json.dump(features_result, f)