In [None]:
import os
import re
import json
import gc
import dill
import pickle
import warnings
import urllib.request
from functools import reduce
from collections import defaultdict

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import patchworklib as pw
from tqdm import tqdm

from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import (
    roc_auc_score, roc_curve,
    average_precision_score, precision_recall_curve,
    confusion_matrix, ConfusionMatrixDisplay,
    classification_report,
    recall_score, precision_score,
    PrecisionRecallDisplay
)

from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.inspection import permutation_importance

from plotnine import *

import joblib
from sklearn.metrics import f1_score

warnings.filterwarnings('ignore')


### get metrics

In [None]:
import warnings
warnings.filterwarnings("ignore", message="X has feature names, but DecisionTreeClassifier was fitted without feature names")

def preprocess(xtrain, xtest ):
    cols_to_impute = [col for col in xtrain.columns if  col == "age_at_prediction_window"]

    if len(cols_to_impute) == 0:
        print('\tpreprocess column', None)
        return xtrain, xtest, None

    scaler = StandardScaler()
    
    scaler.fit(xtrain[cols_to_impute])
    print('\tpreprocess column', cols_to_impute)
    xtrain[cols_to_impute] = scaler.transform(xtrain[cols_to_impute])
    if xtest is not None:
        xtest[cols_to_impute] = scaler.transform(xtest[cols_to_impute])

    return xtrain, xtest, scaler

   
def preprocess_scalar(xtrain, xtest, scalar ):
    cols_to_impute = [col for col in xtrain.columns if  col == "age_at_prediction_window"]

    if len(cols_to_impute) == 0:
        print('\tpreprocess column', None)
        return xtrain
    
    xtrain[cols_to_impute] = scalar.transform(xtrain[cols_to_impute])
    if xtest is not None:
        xtest[cols_to_impute] = scalar.transform(xtest[cols_to_impute])

    return xtrain, xtest

# metrics of 6 
def calculate_metrics(y_true, y_pred_prob, y_pred):
    auc = roc_auc_score(y_true, y_pred_prob)
    avpre = average_precision_score(y_true, y_pred_prob)

    # tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    # sensitivity = recall_score(y_true, y_pred)
    # specificity = tn / (tn + fp)
    # ppv = precision_score(y_true, y_pred)
    # npv = tn / (tn + fn)
    return auc, avpre, 0.1, 0.1, 0.1, 0.1

# ppv  
def ppv_sensitivity(specificity_levels, _y_true, _y_pred_proba):
    add_sensitivity_results = []
    add_ppv_results =  []
    add_fpr, add_tpr, add_thresholds = roc_curve(_y_true, _y_pred_proba)
    _results = {}
    for specificity in specificity_levels:

        _threshold_index = np.where(add_fpr <= (1 - specificity))[0][-1]
        _threshold = add_thresholds[_threshold_index]

        # Sensitivity (True Positive Rate)
        _sensitivity = add_tpr[_threshold_index]
        add_sensitivity_results.append( _sensitivity)

        # Positive Predictive Value (PPV)
        _y_pred_binary = (_y_pred_proba >= _threshold).astype(int)
        _ppv = precision_score(_y_true, _y_pred_binary)
        add_ppv_results.append(_ppv)

    # Add metrics to results
    _results['Sensitivity'] = add_sensitivity_results
    _results['PPV'] = add_ppv_results
    return _results


### get pipeline

In [None]:

from joblib import Parallel, delayed
import numpy as np
import gc

from sklearn.ensemble import RandomForestClassifier
import numpy as np
import joblib
import os
from sklearn.metrics import f1_score
from sklearn.utils.validation import check_is_fitted

def evaluate_base_model(X_test, y_test, model, CP_num, prediction_window, N, feature_map, model_name, plot=True):
    best_mod = model

    predicted_proba = best_mod.predict_proba(X_test)[:, 1]
    predicted_labels = best_mod.predict(X_test)

    auc, pre,sensitivity , specificity , ppv, npv = calculate_metrics(y_test, predicted_proba, predicted_labels)
    
    add_results = ppv_sensitivity([0.9, 0.95], y_test, predicted_proba)
    sensitivity_90, sensitivity_95 = add_results['Sensitivity']
    ppv_90, ppv_95 = add_results['PPV']
    
    if plot:
        print(f"\nDisplaying performance for CP {CP_num} with a {prediction_window}-year prediction window:\n")

        prec, rec, threshold = precision_recall_curve(y_test, predicted_proba)
        prc_df = pd.DataFrame({"Recall": rec, "Precision": prec})
        ap_score = average_precision_score(y_test, predicted_proba)
        base_ap_score = np.mean(y_test)
        prc_plot = (
            ggplot(prc_df, aes("Recall", "Precision")) + 
            geom_line(color="#3C5488B2") +
            theme_bw() +
            theme() +
            coord_fixed() +
            geom_hline(yintercept=base_ap_score, linetype="dashed") +
            labs(title="Precision-Recall Curve") +
            annotate("text", x=0.15, y=1, label=f"AP={ap_score:.2f}", size=8) +
            annotate("text", x=0.3, y=0.95, label=f"Chance Level AP={base_ap_score:.2f}", size=8)
            )
        ax1 = pw.load_ggplot(prc_plot, figsize=(2.5, 2.5))
    
    
        ax2 = pw.Brick(figsize=(2.5, 2.5))
        cm = confusion_matrix(y_test, predicted_labels, labels=best_mod.classes_)
        sns.heatmap(cm, annot=True, linewidth=1, cmap="GnBu", fmt="g",
                    yticklabels=["Control", "Case"], xticklabels=["Control", "Case"], ax=ax2)
        ax2.set_title("Confusion Matrix")
        ax2.set_xlabel("Predicted Label")
        ax2.set_ylabel("True Label")
    
        coefs = best_mod.coef_[0]
        top_N_feature_index = np.argsort(abs(coefs))[-N:]
        top_N_feature_names = X_test.columns[top_N_feature_index]

        def get_name(x):
            if isinstance(x, str):
                return feature_map.get(x.strip(), x)
            else:
                return feature_map.get(x, x)
        top_N_feature_names = pd.Series(top_N_feature_names).apply(get_name)


        top_N_coefs_abs = abs(coefs)[top_N_feature_index]
        top_N_coefs = coefs[top_N_feature_index]
        coef_plot_df = pd.DataFrame({"feature_name": top_N_feature_names,
                                "abs_coef": top_N_coefs_abs,
                                "coef": top_N_coefs})
        coef_plot_df['feature_name'] = pd.Categorical(coef_plot_df['feature_name'], categories=coef_plot_df.sort_values('abs_coef')['feature_name'])

        feature_importance_plot = (
            ggplot(coef_plot_df, aes("feature_name", "coef")) +
                geom_bar(stat="identity", fill="#91D1C2B2", color="black") +
                coord_flip() +
                theme_bw() +
                labs(x="", y="Feature Coefficients", title=f"{model_name} Model (CP {CP_num} with a {prediction_window}-yr prediction window)")
            )
        ax3 = pw.load_ggplot(feature_importance_plot, figsize=(5, 4))
        ax_all = (ax1 | ax2)/ax3
    else:
        ax_all = None
    results_list = [auc , pre, sensitivity , specificity , ppv, npv , sensitivity_90, sensitivity_95, ppv_90, ppv_95]

    return ax_all, results_list


def evaluate_ensemble_model( X_test,  y_test, model, CP_num, prediction_window, N, feature_map, model_name='', plot=True, pretrained_model=None, X_test_format=None):
    
    best_mod = model
    X_test_input = X_test.values
    if pretrained_model is not None:
        X_test_format_input = X_test_format.values

        predicted_proba = predict_proba_joint(model, pretrained_model, X_test_input, X_test_format_input )[:, 1]
        predicted_labels = predict_joint(model, pretrained_model, X_test_input, X_test_format_input)
    else:
        # print(X_test_input.shape, best_mod)
        predicted_proba = best_mod.predict_proba(X_test_input)
        # print(predicted_proba, predicted_proba.shape)
        predicted_proba = predicted_proba[:, 1]
        # predicted_labels = best_mod.predict( X_test_input)

    predicted_labels=None
    auc, pre,sensitivity , specificity , ppv, npv = calculate_metrics(y_test, predicted_proba, predicted_labels)
#     numbers = [float("{:.5f}".format(num)) for num in [auc, pre,sensitivity, specificity, ppv, npv]]
#     print(numbers)
    add_results = ppv_sensitivity([0.9, 0.95], y_test, predicted_proba)
    sensitivity_90, sensitivity_95 = add_results['Sensitivity']
    ppv_90, ppv_95 = add_results['PPV']
    plot= False
    if plot: # plot prc curve, confusion matrix, feature importance using default method from each type of models itself. Could skip this plot. 
        print(f"\nDisplaying performance for CP {CP_num} with a {prediction_window}-year prediction window:\n")

        prec, rec, threshold = precision_recall_curve(y_test, predicted_proba)
        prc_df = pd.DataFrame({"Recall": rec, "Precision": prec})
        ap_score = average_precision_score(y_test, predicted_proba)
        base_ap_score = np.mean(y_test)

        prc_plot = (
            ggplot(prc_df, aes("Recall", "Precision")) + 
            geom_line(color="#3C5488B2") +
            theme_bw() +
            theme() +
            coord_fixed() +
            geom_hline(yintercept=base_ap_score, linetype="dashed") +
            labs(title="Precision-Recall Curve") +
            annotate("text", x=0.15, y=1, label=f"AP={ap_score:.2f}", size=8) +
            annotate("text", x=0.3, y=0.95, label=f"Chance Level AP={base_ap_score:.2f}", size=8)
            )
        ax1 = pw.load_ggplot(prc_plot, figsize=(2.5, 2.5))


        ax2 = pw.Brick(figsize=(2.5, 2.5))
        cm = confusion_matrix(y_test, predicted_labels, labels=best_mod.classes_)
        sns.heatmap(cm, annot=True, linewidth=1, cmap="GnBu", fmt="g",
                    yticklabels=["Control", "Case"], xticklabels=["Control", "Case"], ax=ax2)
        ax2.set_title("Confusion Matrix")
        ax2.set_xlabel("Predicted Label")
        ax2.set_ylabel("True Label")

        feature_importances = best_mod.feature_importances_
        top_N_feature_index = np.argsort(feature_importances)[-N:]
        top_N_feature_names = X_test.columns[top_N_feature_index]

        def get_name(x):
            if isinstance(x, str):
                return feature_map.get(x.strip(), x)
            else:
                return feature_map.get(x, x)

        top_N_feature_names = pd.Series(top_N_feature_names).apply(get_name)

        top_N_importances = feature_importances[top_N_feature_index]

        coef_plot_df = pd.DataFrame({
            "feature_name": top_N_feature_names,
            "importance": top_N_importances
        })

        coef_plot_df['feature_name'] = pd.Categorical(coef_plot_df['feature_name'], categories=coef_plot_df.sort_values('importance')['feature_name'])

        feature_importance_plot = (
            ggplot(coef_plot_df, aes("feature_name", "importance")) +
                geom_bar(stat="identity", fill="#91D1C2B2", color="black") +
                coord_flip() +
                theme_bw() +
                labs(x="", y="Feature Importance", title=f"{model_name} Model (CP {CP_num} with a {prediction_window}-yr prediction window)")
            )
        ax3 = pw.load_ggplot(feature_importance_plot, figsize=(5, 4))
        ax_all = (ax1 | ax2)/ax3
    else:
        ax_all = None
    results_list = [auc , pre, sensitivity , specificity , ppv, npv , sensitivity_90, sensitivity_95, ppv_90, ppv_95]
    print(results_list)
    return ax_all , results_list


def format_input(_current_feature, prediction_window, f_reference):
    cp = 1
    try:
        saved_model_features = f_reference[f'CP_{cp}_{prediction_window}_yr'].drop('person_id', axis=1).columns
    except:
        refer_cols = f_reference[f'CP_{cp}_{prediction_window}_yr']

        if not isinstance(refer_cols, list):
            refer_cols= f_reference[f'CP_{cp}_{prediction_window}_yr'].to_list()
        saved_model_features = [i for i in  refer_cols if i != 'person_id']
    
    current_features = _current_feature.columns
    overlapping_cols = set(saved_model_features).intersection(set(current_features))
    missing_cols = set(saved_model_features) - set(current_features)
    print('Current cols\t', len(current_features))
    print('Reference cols\t', len(saved_model_features))
    print('Overalaping cols\t', len(overlapping_cols))
    print('Missing_cols cols\t', len(missing_cols), missing_cols)

    f_input = _current_feature.reindex(columns=saved_model_features, fill_value=0)
    assert 'patid' not in f_input.columns
    return f_input

# def parallel_forest_predict_proba(forest, X, n_jobs=-1):
#     def predict_proba_tree(tree, X_subset):
#         leaf_ids = tree.apply(X_subset)
        
#         leaf_values = tree.tree_.value  # Shape: (n_nodes, n_classes)
        
#         probas = leaf_values[leaf_ids][:, 0]   
        
#         probas = probas / probas.sum(axis=1, keepdims=True)
#         return probas

#     tree_probas = Parallel(n_jobs=n_jobs)(
#         delayed(predict_proba_tree)(tree, X) for tree in forest.estimators_
#     )
#     # print('tree', len(tree_probas))
#     # avg_probas = np.mean(tree_probas, axis=0)  # Shape: (n_samples, n_classes)
#     all_probas = np.sum(tree_probas, axis=0)  # Shape: (n_samples, n_classes)
#     # print(all_probas.shape)
#     # avg_probas = all_probas / len(forest.estimators_)
#     return all_probas


def predict_joint(_model, _loadmodel, input_np, input_np_format):

    try:
        check_is_fitted(_model, "estimators_")
        estimators = _model.estimators_
    except Exception as e:
        print("Model is not fitted or estimators_ not initialized:", e)
        return None
    
    saved_estimators  =  _loadmodel.estimators_ 
    for tree in saved_estimators:
        if not hasattr(tree, "monotonic_cst"):
            tree.monotonic_cst = None  # Set default value to None

    saved_predictions = [ tree.predict(input_np_format) for tree in saved_estimators]
    predictions = [tree.predict(input_np) for tree in estimators]
    all_predictions = np.array( saved_predictions + predictions)

    if all_predictions.dtype != np.int64:
        all_predictions = all_predictions.astype(int)

    majority_vote = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=all_predictions)
    return majority_vote


def evaluate_params(random_params, inner_cv, X_train, y_train, pretrained_model=None, X_train_format=None, random_seed=42):
    inner_scores = []
    # print('evaluate params...', random_params)
    for inner_train_idx, inner_valid_idx in inner_cv.split(X_train, y_train):
        X_inner_train, X_inner_valid = (
            X_train.iloc[inner_train_idx],
            X_train.iloc[inner_valid_idx],
        )
        y_inner_train, y_inner_valid = (
            y_train[inner_train_idx],
            y_train[inner_valid_idx],
        )

        model = RandomForestClassifier(random_state=random_seed, n_jobs=-1, **random_params)
        model.fit(X_inner_train, y_inner_train)

        X_inner_valid_np = X_inner_valid.values
        if pretrained_model is None:
            y_pred = model.predict( X_inner_valid_np)
        else:
            # should not run 
            X_inner_train_format, X_inner_valid_format = (
            X_train_format.iloc[inner_train_idx],
            X_train_format.iloc[inner_valid_idx],
        )
            X_inner_valid_format_np = X_inner_valid_format.values
            y_pred = predict_joint(model, pretrained_model, X_inner_valid_np, X_inner_valid_format_np)
            
            del X_train_format, X_inner_valid_format, X_inner_train_format, X_inner_valid_format_np
        
        score = f1_score(y_inner_valid, y_pred, average="macro")
        # score = roc_auc_score(y_inner_valid, y_pred)
        # use fi-score to train the models
        inner_scores.append(score)

    avg_inner_score = np.mean(inner_scores)
    print('Done evaluating params...', random_params)

    del X_train,   X_inner_train, X_inner_valid,  y_inner_train, y_inner_valid
    
    gc.collect()
    return avg_inner_score, random_params

import numpy as np
import multiprocessing
import math


def build_RF_model(random_seed):
    estimator = RandomForestClassifier(class_weight='balanced', random_state=random_seed, warm_start=False)

    max_depth =  [int(x) for x in np.linspace(3, 20, 5, dtype=int)]
    # max_depth.append(None)
    print('max_depth option:', max_depth)

    param_dict = { 
        "n_estimators": [1000, 3000, 5000], 
        "max_depth": max_depth,
        "min_samples_split": [5,10, 40],
        "max_features": ['sqrt']
    }

    return estimator, param_dict


def nested_cv_pipeline_parallel(
    X_input,
    y,
    model_type,
    cp,
    prediction_window,
    featuremap,
    random_seed=99,
    score=None,
    pretrained_model=None,
    pretrained_scalar=None,
    reference_cols=None,
):
    outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_seed)
    assert model_type == "rf"

    if model_type == "rf":
        _, param_grid = build_RF_model(random_seed)
        X = X_input
    outer_cv_splits = list(outer_cv.split(X, y))

    outer_results = pd.DataFrame(columns=["cv", "auc", "pre", "sensitivity", "specificity", "ppv", "npv", "sensitivity_90", "sensitivity_95", "ppv_90", "ppv_95"])
    outer_best_models = []
    standardizer_models = []

    cores = multiprocessing.cpu_count()
    set_n_jobs = max(1, math.floor(cores*0.05))
    print('set_njobs', set_n_jobs)

    
    import itertools
    all_combi = list(itertools.product(param_grid['n_estimators'], param_grid['max_depth'],param_grid['min_samples_split'],param_grid['max_features'] ))

    for cv_, (train_idx, test_idx) in enumerate(outer_cv_splits):
        print(f"---Outer CV Fold {cv_}---")
        # X_format = format_input(X, prediction_window, reference_cols)

        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        # X_train_format, X_test_format = X_format.iloc[train_idx], X_format.iloc[test_idx]

        X_train, X_test, scalar = preprocess(X_train, X_test)

        standardizer_models.append(scalar)
        print('Input shape', X_train.shape, X_test.shape)

        inner_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_seed)

        results = Parallel(n_jobs=1)(
            delayed(evaluate_params)(
                {'n_estimators': all_combi[er][0],'max_depth': all_combi[er][1], 'min_samples_split': all_combi[er][2], 'max_features': all_combi[er][3]} ,  # Randomly sample parameters
                inner_cv,
                X_train.copy(),
                y_train,
                random_seed=random_seed,
            )
            for er in range(30)  # Test 20 random parameter settings
        )

        # Extract best parameters and score
        best_score, best_params = max(results, key=lambda x: x[0])
        print(f"Best Parameters for Outer Fold {cv_}: {best_params}")
        print(f"Best Inner CV Score for Outer Fold {cv_}: {best_score:.4f}")

        final_model = RandomForestClassifier(random_state=random_seed, n_jobs=-1, **best_params)
        final_model.fit(X_train, y_train)


        axall, results_list = evaluate_ensemble_model(
            X_test,
            y_test,
            final_model,
            cp,
            prediction_window,
            N=30,
            feature_map=featuremap,
            model_name="Random Forest",
            # pretrained_model=pretrained_model,
            # X_test_format=X_test_format,
        )
        display(axall)
        outer_results.loc[cv_] = [int(cv_)] + results_list
        outer_best_models.append(final_model)

    mean_values = outer_results.mean().tolist()
    std_values = outer_results.std().tolist()
    outer_results.loc["mean"] = ["-"] + mean_values[1:]
    outer_results.loc["std"] = ["-"] + std_values[1:]
    display(outer_results)

    return outer_results, outer_best_models, standardizer_models


%config Application.warn_no_config=True
import warnings
warnings.filterwarnings("ignore")
def run_retrain_matched_pipeline(X, y, all_map, model_type, cps=[1], years=[0], score=None, pretrained_model=None, pretrained_scalar=None, reference_cols=None):
    cp_year_results = {}
    cp_year_models = {}
    cp_year_scalars = {}

    for cp in cps:
        for prediction_window in reversed(years):
            start = time.time()
            print(f"\nRunning pipeline for CP {cp} with {prediction_window}-year prediction window...\n")
            xinput = X[prediction_window]

            if 'patid' in xinput.columns:
                print('drop patid')
                f_input = xinput.drop('patid', axis=1)

                outer_results, outer_best_models, standardizer_models = nested_cv_pipeline_parallel(f_input, y[prediction_window], model_type, cp, prediction_window,all_map,\
                                                                                            score=score, pretrained_model=pretrained_model, pretrained_scalar=pretrained_scalar, \
                                                                                                reference_cols=reference_cols)
                cp_year_results[prediction_window] = outer_results
                cp_year_models[prediction_window] = outer_best_models
                cp_year_scalars[prediction_window] = standardizer_models

                print('Time eplapse', time.time() - start )
    return cp_year_results, cp_year_models, cp_year_scalars
 

all_map = pickle.load(open('all_map.pkl', 'rb'))
for i, v in all_map.items():
    all_map[i] = i + ' ' + v




#### retrain on matched training data



In [9]:
hold_out_portion =0.5
ratio = 10
matched_f = pickle.load( open(f'./MiddleFeatures/demo_matched_fs.pkl', 'rb'))
matched_t = pickle.load( open(f'./MiddleFeatures/matched_t_drop_portion_{str(hold_out_portion).split('.')[-1]}_ratio_{str(ratio)}.pkl', 'rb'))
matched_e = pickle.load( open(f'./MiddleFeatures/matched_e_drop_portion_{str(hold_out_portion).split('.')[-1]}_ratio_{str(ratio)}.pkl', 'rb'))
 
all_map = pickle.load(open('all_map.pkl', 'rb'))
for i, v in all_map.items():
    all_map[i] = i + ' ' + v


In [None]:
windows = reversed([ 0,1,2,5,10])
import time

all_rs_years, all_model_test_years, all_model_scalar_years = {},{},{}
for window in windows:
    print(window)
    all_rs_years[window], all_model_test_years[window], all_model_scalar_years[window]  = run_retrain_matched_pipeline(matched_f, matched_t, all_map, 'rf', cps=[1], years=[window],  score=None,\
                    pretrained_model=None, pretrained_scalar=None, reference_cols=None)
    
    print('Save all year models together: ', windows)
    # save it all
    # save it all
    pickle.dump(all_model_test_years, open(f'rf_chunks/model_test_rf_all_feature_retrain/demo_all_uncommon_model_test_years_till{window}.pkl', 'wb'))
    pickle.dump(all_model_scalar_years, open(f'rf_chunks/model_test_rf_all_feature_retrain/demo_all_uncommon_model_scalar_years_till{window}.pkl', 'wb'))
    pickle.dump(all_rs_years, open(f'rf_chunks/model_test_rf_all_feature_retrain/demo_all_uncommon_rs_years_till{window}.pkl', 'wb'))
                    

### hold-out testing 

In [8]:


import time
def run_retrain_evaluate_pipeline(X, y, all_map, model_type,  years=[0,1,2,5,10], pre_trained_model=None, score=None, f_reference=None, model_name=None, pre_trained_scalar=None, plot=False, finetuned_scalars=None, finetuned_models=None):
    cp_year_results = {}
    show_results_dict = {}
    shap_years = {}

    for cp in [1]:
        for prediction_window in reversed(years):
            if 1: # no pretrained model needed
                cv_results = []
                cv_num = 5
                xinput = X[prediction_window]
                for cv in range(cv_num):  # test each cv from the pre-trained model or only test a part of cvs
                    print('CV: ', cv, '| Prediction window: ', prediction_window, '| Model type: ', model_type)
                    if cv_num > 1: 
                        # model = pre_trained_model[prediction_window][cv]
                        # pretrained_scalar = scalars[prediction_window][cv]
                        # no pretrained model
                        scalar = finetuned_scalars[prediction_window][cv]
                        model = finetuned_models[prediction_window][cv]


                    f_input = xinput.drop('patid', axis=1)
                    print('ori f-input', f_input.shape)
                    f_input =  f_input.reindex(columns=model.feature_names_in_, fill_value=0)
                    print('after reindex', f_input.shape)
                    f_input, _ = preprocess_scalar(f_input, None, scalar)
                    print('after scalar', f_input.shape)

                    y_input = y[prediction_window]
                    
                    if model_type == 'rf' or model_type == 'xgb':
                        axall, results_list = evaluate_ensemble_model(f_input, y_input, model, 1, prediction_window, 30, all_map, model_name=model_name, plot=plot, pretrained_model=None, X_test_format=None)

                    if plot: 
                        display(axall)

                    cp_year_results[f"{str(prediction_window)}_{model_type}_{str(cv)}"] = results_list 
                    cv_results.append(results_list)  

                showresults = pd.DataFrame(cv_results)
                showresults.columns = ['auc', 'pre', 'sensitivity', 'specificity', 'ppv', 'npv', 'sensitivity_90', 'sensitivity_95', 'ppv_90', 'ppv_95']
                showresults.loc['mean'] = showresults.mean()
                showresults.loc['std'] = showresults.std()
                display('Show results of cvs', showresults)
                show_results_dict[f"{str(cp)}_{str(prediction_window)}_{model_type}"] = showresults

    return show_results_dict, cp_year_results 



####  test on uncommon

In [9]:
all_map = pickle.load(open('all_map.pkl', 'rb'))
for i, v in all_map.items():
    all_map[i] = i + ' ' + v


In [10]:
hold_out_portion = 0.5

unmatched_f = pickle.load( open(f'./MiddleFeatures/test_f_portion_{str(hold_out_portion).split('.')[-1]}.pkl', 'rb'))
unmatched_t = pickle.load( open(f'./MiddleFeatures/test_t_portion_{str(hold_out_portion).split('.')[-1]}.pkl', 'rb'))
unmatched_e = pickle.load( open(f'./MiddleFeatures/test_e_portion_{str(hold_out_portion).split('.')[-1]}.pkl', 'rb'))


In [None]:


import joblib

model_test_years_save = pickle.load( open(f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_model_test_years.pkl', 'rb'))
model_scalar_years_save = pickle.load( open(f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_model_scalar_years.pkl', 'rb'))

# showresults, results = {}, {}
for i in [10, 5, 2, 1, 0]:
    model_test_years = model_test_years_save[i] 
    model_scalar_years = model_scalar_years_save[i] 
    showresults, results = run_retrain_evaluate_pipeline(unmatched_f, unmatched_t, all_map, 'rf',  years=[i],  pre_trained_model=None,\
                                score='f1_macro' , model_name='all feature', pre_trained_scalar=None, f_reference= None, finetuned_models=model_test_years, finetuned_scalars=model_scalar_years)


    note = str(i)
    print('save to ', f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_showrs_years_unmatched_year{note}.pkl')
    pickle.dump(results, open(f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_rs_years_unmatched_year{note}.pkl', 'wb'))
    print('save to ', f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_showrs_years_unmatched_year{note}.pkl')
    pickle.dump(showresults, open(f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_showrs_years_unmatched_year{note}.pkl', 'wb'))
        


In [None]:

print('save to ', f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_showrs_years_unmatched.pkl')
pickle.dump(results, open(f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_rs_years_unmatched.pkl', 'wb'))
print('save to ', f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_showrs_years_unmatched.pkl')
pickle.dump(showresults, open(f'rf_chunks/model_test_rf_all_feature_retrain/all_uncommon_showrs_years_unmatched.pkl', 'wb'))
