In [6]:
import json
from operator import itemgetter
import os
import sys
from tqdm.notebook import tqdm

current_path = os.path.abspath(os.getcwd())
sys.path.append(f"{current_path}/../libs")

from config import load_config
from dataframe import read_dataset
from stability import get_jaccard_index
from feature_selection import get_best_features


### 1. Configs

In [7]:
# Read Configs and Define Globals
config = load_config("../config.yaml")

best_features_report = {}
stability_report = {}

### 2. Reports

In [8]:
# Read Feature Selection Report and Dataset
with open(f"{config.REPORT_PATH}best-features.json", 'r') as fp:
    features_reports = json.load(fp)

# df = read_dataset("concatenated-dataset").drop("ID_PACIENTE",axis=1)

### 3. Features Selection

In [11]:
# Get Best Features and Calculate Stability For Each Fold
stability_report["between-folds"] = {}
best_between_datasets = {}
best_between_models = {}
for fs_model, fs_models_result in features_reports.items():
    stability_report["between-folds"][fs_model] = {}
    best_features_report[fs_model]={}
    best_between_datasets[fs_model]=[]
    for dataset, dataset_result in fs_models_result.items():
        stability_report["between-folds"][fs_model][dataset] = {}
        if dataset not in best_between_models.keys():
            best_between_models[dataset]=[]
        features = []
        for fold, fold_result in dataset_result.items():
            features.append(fold_result)
        for i in range(len(features[0])):
            data = []
            for feature in features:
                data.append(feature[:i+1])
            jaccard = get_jaccard_index(data)
            stability_report["between-folds"][fs_model][dataset][f"{i+1} feature(s)"] = jaccard
        best_features = get_best_features(features)
        best_features_report[fs_model][dataset] = best_features
        best_between_datasets[fs_model].append(best_features)
        best_between_models[dataset].append(best_features)

In [14]:
# Calculate Stability Between Datasets and Between Models
stability_report["between-datasets"] = {}
k = min([len(x) for x in features])
for fs_model, features in best_between_datasets.items():
    stability_report["between-datasets"][fs_model]={}
    
    for i in range(k):
        data = []
        for feature in features:
            data.append(feature[:i+1])
        jaccard = get_jaccard_index(data)
        stability_report["between-datasets"][fs_model][f"{i+1} feature(s)"] = jaccard

stability_report["between-methods"] = {}
for dataset, features in best_between_models.items():
    stability_report["between-methods"][dataset] = {}
    for i in range(k):
        data = []
        for feature in features:
            data.append(feature[:i+1])
        jaccard = get_jaccard_index(data)
        stability_report["between-methods"][dataset][f"{i+1} feature(s)"] = jaccard


### 4. Save

In [15]:
# Save Reports
path = f"{config.REPORT_PATH}"
if not os.path.exists(path):
    os.makedirs(path)

with open(f"{path}best-features-result.json", 'w') as f:
    json.dump(best_features_report, f)
with open(f"{path}stability-result.json", 'w') as f:
    json.dump(stability_report, f)