### 1. Imports and File Paths

In [None]:
import os
import sys
import joblib
import numpy as np
from keras.models import load_model
from sklearn import metrics
import pandas as pd
import shutil
from pathlib import Path
from tqdm import tqdm
import itertools
from TL_BaseModel import *

# dirpath = "./../../../Output_Results/TL_BaseModel_Results/"
# if os.path.exists(dirpath) and os.path.isdir(dirpath):
#     shutil.rmtree(dirpath)
# os.makedirs(os.path.dirname(dirpath), exist_ok=True)

In [None]:
#"#../../../../Output_Results/"

In [None]:
TL_path = "../../../../Output_Results/Freeze/"
Base_path = "../../../../Output_Results/classification_results_2023_base_tl/"
output_path = "../../../../Output_Results/TL_BaseModel_Results/"

print("TL_path",TL_path)
print("Base_path",Base_path)
print("output_path",output_path)

### 2. Patients Target Group

In [None]:
target_grp = ["SB-001", "SB-003", "SB-008", "SB-011", "SB-012", "SB-022", "SB-023", "SB-025", "SB-028", "SB-037", "SB-043", "SB-056", "SB-059", "SB-060", "SB-070", "SB-071", "SB-073", "SB-078", "SB-079", "SB-080", "SB-082", "SB-083", "SB-112"]
print(len(target_grp))

### 3. find the best TL model

In [None]:
# Get all TL models
def find_best_TL_model(target_grp, TL_path, output_path, epoch_sizes = None):
    Model_List = []
    TL_folder_List = os.listdir(TL_path)
    # print(TL_folder_List)

    if epoch_sizes is None:
        epoch_sizes = [f"{i}_{j}" for i in [100, 300, 500, 1000] for j in [100, 300, 500, 1000]]
#Output_Results/TL_results/1_layerFrozen.png/diagrams/SB-001/
#Output_Results\TL_results\TL_2_Frozen\diagrams\SB-112
    for epoch_size in epoch_sizes:
        for target_id in target_grp:
            for folder_name in TL_folder_List:
                TL_model_path = f"{TL_path}/{folder_name}/diagrams/{target_id}/"
                #print("CURRENT PATH : ",os.listdir(TL_path))
                #continue
                for file_name in os.listdir(TL_model_path):
                    if file_name[-5:] == ".hdf5" and "IND_FOLD" in file_name and epoch_size in file_name: # Model Files with extension hdf5 #  Keyword "IND_FOLD" for models to be considered for TL+Base Modelling
                        # print("file_name", file_name)
                        Model_List.append(file_name)
                        tl_model_path_file = TL_model_path + file_name
                        output_path_file = f"{output_path}{epoch_size}/{target_id}/{file_name}"
                        # print("tl_model_path_file", tl_model_path_file)
                        # print("output_path_file", output_path_file)
                        os.makedirs(os.path.dirname(f"{output_path}{epoch_size}/{target_id}/"), exist_ok=True)
                        shutil.copyfile(tl_model_path_file, output_path_file)
    return Model_List

epoch_sizes = ["100_300"] # Best Epoch parameter as per findings during the research
TL_Models = find_best_TL_model(target_grp, TL_path, output_path, epoch_sizes)
display(TL_Models[:5], TL_Models[-5:])

### 4. find the best Base model

In [None]:
# Get all Base models
def find_best_Base_model(Base_path, output_path, epoch_sizes = None):
    Model_List = []

    if epoch_sizes is None:
        epoch_sizes = [f"{i}_{j}" for i in [100, 300, 500, 1000] for j in [100, 300, 500, 1000]]
    target_grp = os.listdir(Base_path)
    Base_model_Folder_List = os.listdir(f"{Base_path}{os.listdir(Base_path)[0]}/")

    for epoch_size in epoch_sizes:
        for target_id in target_grp:
            for folder_name in Base_model_Folder_List:
                file_list = os.listdir(f"{Base_path}{target_id}/{folder_name}/diagrams/")
                for file_name in file_list:
                    if file_name[-7:] == ".joblib":
                        # display(file_name)
                        Model_List.append(file_name)
                        base_model_path_file = f"{Base_path}{target_id}/{folder_name}/diagrams/{file_name}"
                        output_path_file = f"{output_path}{epoch_size}/{target_id}/{file_name}"
                        os.makedirs(os.path.dirname(f"{output_path}{epoch_size}/{target_id}/"), exist_ok=True)
                        shutil.copyfile(base_model_path_file, output_path_file)
    return Model_List

epoch_sizes = ["100_300"] # Best Epoch parameter as per findings during the research
Base_Models = find_best_Base_model(Base_path, output_path, epoch_sizes)
display(Base_Models[:5], Base_Models[-5:])

### 5. TL + Base model

#### 5a. Data Load

In [None]:
#GLOBALS AND FILEPATHS#
source_quantile = "0.3"
source_file_path = f"all_pids_q={source_quantile}.npy"
print(source_file_path)

In [None]:
##### KEY TROUBLESHOOTING NOTE
##### `np.load` returns a `numpy ndarray`
data_ndarr = np.load(source_file_path, allow_pickle=True)
print("data_ndarr type :", type(data_ndarr))

##### That array has a method `item()`,
##### which returns a *dictionary* of key-values: `p_id`:`DataFrame`
datadict = data_ndarr.item()
print("datadict type :", type(datadict))

In [None]:
p_ids = datadict.keys()
sample_size_dict={}
print("patients: n =", len(p_ids), end='\n\n')

for p_id in p_ids:
    df = datadict[p_id]
    # drop column from input data file
    df.drop(['id','date','am_pef_org','BMI','sex', 'age'], axis=1, inplace=True)
    df = np.asarray(df).astype(np.float32)
    print(p_id, "shape:", df.shape)#, end="\t\t")
    sample_size_dict[p_id] = df.shape[0]

display(datadict[p_id].head(5))
display(sample_size_dict)

#### 5b. TL and Base Models Function

In [None]:
def TL_and_Base_Model(target_id, TL_Models, Base_Models, model_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes = None):
    df_evaluation_results = pd.DataFrame(columns=EVALUATION_METRICS)
    # Mark: EXPLICITLY set binary outcome variable for supervised learning
    CLASS_VAR = 'class'
    sample_size=sample_size_dict[target_id]

    # load target patient data
    dataset_list = split_data(target=target_id, data=datadict, class_var=CLASS_VAR, balance=True)

    if epoch_sizes is None :
        epoch_sizes = [f"{i}_{j}" for i in [100, 300, 500, 1000] for j in [100, 300, 500, 1000]]

    for epoch_size in epoch_sizes:
        for Base_model_Category, tl_arch_type in itertools.product(Base_model_Category_list, tl_arch_list):
            # Creating Category directory for the patient
            os.makedirs(os.path.dirname(f"{model_path}{epoch_size}/{target_id}/{Base_model_Category}/"), exist_ok=True)
            for Base_Model in Base_Models:
                if Base_Model[-7:] == ".joblib" and target_id in Base_Model and Base_model_Category in Base_Model:
                    # print("Base_Model", Base_Model)
                    for TL_model in TL_Models:
                        # setup for k-fold cross-validation
                        kfold_evaluation_results_list = []
                        kfold_confusion_matrix_list = []
                        # print("TL_model", TL_model)
                        if TL_model[-5:] == ".hdf5" and TL_model[:6] == target_id and tl_arch_type in TL_model and epoch_size in TL_model:
                            TL_model_path_file = f"{model_path}{epoch_size}/{target_id}/{TL_model}"
                            # print("TL_model_path_file", TL_model_path_file)

                            for i in range(3):
                                print(f'================================================{i}================================================')
                                X_train, X_test, Y_train, Y_test = dataset_list[i]

                                print("---------------TL_Model---------------")
                                model = load_model(TL_model_path_file, compile=False)
                                y_hat  = model.predict(X_test)
                                #print(y_hat)
                                y_pred = (y_hat+0.5).astype(int)
                                #print(y_pred)
                                # print("Len of y_pred", len(y_pred))
                                # print("Len of Y_test", len(Y_test))
                                TL_results, c_matrix = evaluate(y_pred,Y_test)
                                # print("TL results: ", TL_results)
                                # print("TL c_matrix: ", c_matrix)

                                print("---------------feature extract for Base Model---------------")
                                tl_model_for_base= Model(inputs=model.input, outputs=model.layers[-2].output)

                                #Now, let us use training features from neural network for Base------------------------
                                feature_extractor = tl_model_for_base.predict(X_train)
                                X_train_for_base = feature_extractor.reshape(feature_extractor.shape[0], -1) #This is our X input to Base

                                print("---------------Base_Model---------------")
                                Base_model_path_file = f"{model_path}{epoch_size}/{target_id}/{Base_Model}"
                                # print("Base_model_path_file", Base_model_path_file)

                                # print(os.path.isfile(Base_model_path_file))
                                Base_model = joblib.load(Base_model_path_file)

                                # Train the model on training data
                                Base_model.fit(X_train_for_base, Y_train.ravel()) #For sklearn no one hot encoding

                                #Send test data through same feature extractor process---------------
                                X_test_feature = tl_model_for_base.predict(X_test)
                                X_test_features = X_test_feature.reshape(X_test_feature.shape[0], -1)

                                #Now predict using the trained Base model.
                                y_test_prediction = Base_model.predict(X_test_feature)
                                TL_Base_results, c_matrix = evaluate(y_test_prediction.reshape(y_test_prediction.shape[0], -1),Y_test)
                                # print("TL_Base results: ", TL_Base_results)
                                # print("TL_Base c_matrix: ", c_matrix)

                                kfold_evaluation_results_list.append(TL_Base_results)
                                kfold_confusion_matrix_list.append(c_matrix)

                            avg_results = pd.DataFrame(kfold_evaluation_results_list, columns=EVALUATION_METRICS).mean().values
                            kfold_evaluation_results_list = []

                            avg_results = avg_results.reshape(1, 15)
                            avg_results_df = pd.DataFrame(avg_results, columns=EVALUATION_METRICS)
                            avg_results_df.index = [f"{TL_model[:-5]}_{sample_size}"]

                            # print("data Type of df_evaluation_results", type(df_evaluation_results))
                            # print("data Type of avg_results_df", type(avg_results_df))
                            df_evaluation_results = pd.concat([df_evaluation_results, avg_results_df])

                            confusion_matrix_file = f"{model_path}{epoch_size}/{target_id}/{Base_model_Category}/{TL_model[:-5]}_CM"
                            # print(confusion_matrix_file)
                            save_confusion_matrix(kfold_confusion_matrix_list, confusion_matrix_file)
                            kfold_confusion_matrix_list = []

                    evaluation_results_save_path = os.path.join(f"{model_path}{epoch_size}/{target_id}/{Base_model_Category}/{target_id}_TL_Base_total_evaluation_result_{Base_model_Category}.csv")

                    if os.path.isfile(evaluation_results_save_path):
                        df_evaluation_results.to_csv(evaluation_results_save_path, mode='a', header=False)
                    else:
                        df_evaluation_results.to_csv(evaluation_results_save_path, mode='a')
                    df_evaluation_results = pd.DataFrame(columns=EVALUATION_METRICS)

### 5c. Parameter Setup for the Function

In [None]:
EVALUATION_METRICS = ["Weighted Accuracy", "Sensitivity/Recall", "Specificity", "Precision_class0", "Precision_class1", "Precision_avg", "F1_class0", "F1_class1", "F1_avg", "auc_roc_score", "False_Discovery_Rate", "False_Negative_Rate", "False_Omission_Rate", "False_Positive_Rate", "Jaccard"]
tl_arch_list = ["1_Frozen", "2_Frozen", "3_Frozen", "All_Frozen", "All_Unfrozen"]
Base_model_Category_list = ['K-NN', 'LogisticRegression', 'NaiveBayes', 'SVM'] # "DecisionTree" - Not Working
epoch_sizes = ["100_300"]

### 6. Function Execution

In [None]:
target_id = "SB-001"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)

In [None]:
target_id = "SB-003"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-008"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-011"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-012"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-022"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-023"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-025"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-028"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-033"
# TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-037"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-043"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-056"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
#target_id = "SB-059"
#TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-060"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
#target_id = "SB-070"
#TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-071"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-073"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-078"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-079"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-080"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-082"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-083"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-089"
# TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)


In [None]:
target_id = "SB-112"
TL_and_Base_Model(target_id, TL_Models, Base_Models, output_path, datadict, sample_size_dict, EVALUATION_METRICS, tl_arch_list, Base_model_Category_list, epoch_sizes)