# Data Processing

In [1]:
# Import library
import pandas as pd
import numpy as np
import joblib
import os
import sys

fp_project_folder = "../../"
sys.path.append(fp_project_folder)

from src.configs.config import input_cols, intermediate_col_dict, tcu_col, \
    le_label, re_label, num_input_cols # change this
    
from src.configs.config import fp_checkpoint_folder, fp_data_folder, fp_actual_data_file

# File Paths
fp_col_info_file = os.path.join(fp_data_folder, "col_info.joblib")
fp_data_dfs_file = os.path.join(fp_data_folder, "data_dfs.joblib")

seed_no = 2024

## Load Data

In [2]:
df = pd.read_csv(fp_actual_data_file, index_col=0)
df

Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,DRC_MildNPDR RE,DRC_ModerateNPDR RE,DRC_SevereNPDR RE,DRC_ProliferativeDR RE,DRC_Unreadable RE,DRC_NA RE,GSC_G0 RE,GSC_G1 RE,GSC_NA RE,Total_Time
0,1.0,0.1,13.0,0.0,0.3,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,52.0
1,1.0,0.0,18.0,1.0,0.4,0,0,0,0,1,...,1,0,0,0,0,0,1,0,0,26.0
2,0.0,0.3,12.0,1.0,0.4,0,0,0,0,1,...,1,0,0,0,0,0,1,0,0,26.0
3,0.0,0.0,17.0,1.0,0.4,0,0,0,0,1,...,1,0,0,0,0,0,1,0,0,26.0
4,1.0,0.2,18.0,1.0,0.2,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,52.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,0.0,0.0,20.0,1.0,0.4,0,0,1,0,0,...,1,0,0,0,0,0,1,0,0,26.0
864,0.0,0.0,18.0,1.0,0.7,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,52.0
865,1.0,0.1,15.0,1.0,0.4,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,52.0
866,1.0,0.3,13.0,1.0,0.4,0,0,0,0,1,...,0,1,0,0,0,0,1,0,0,13.0


In [3]:
def get_feat_col_types(df, input_cols, le_label, re_label):
    bin_cols = []
    cont_cols = []
    all_input_cols = [col+" "+le_label for col in input_cols] + [col+" "+re_label for col in input_cols]
    for col in all_input_cols:
        # If binary
        if (set(df[col]) == {0, 1}):
            bin_cols.append(col)
        else:
            cont_cols.append(col)
    return bin_cols, cont_cols
bin_input_cols, cont_input_cols = get_feat_col_types(df, input_cols, le_label, re_label)
print(f"Binary Columns: {bin_input_cols}")
print(f"Continuous Columns: {cont_input_cols}")

Binary Columns: ['Vision Test LE', 'Gradable LE', 'OCT RNFL_Abnormal (Progressing) LE', 'OCT RNFL_Abnormal (Stable) LE', 'OCT RNFL_Normal LE', 'OCT RNFL_Unreliable LE', 'OCT RNFL_nan LE', 'MAC GCA_Abnormal (Progressing) LE', 'MAC GCA_Abnormal (Stable) LE', 'MAC GCA_Normal LE', 'MAC GCA_Unreliable LE', 'MAC GCA_nan LE', 'HVF_Abnormal (Stable) LE', 'HVF_Normal LE', 'HVF: flat, no IRF/SRF. LE', 'HVF_Unreliable LE', 'HVF_nan LE', 'DRF_DH/MA LE', 'DRF_CWS LE', 'DRF_BH LE', 'DRF_FH LE', 'DRF_NVE LE', 'DRF_IRMA LE', 'DRF_PRH LE', 'DRF_10M LE', 'DMF_DH/MA LE', 'DMF_BH LE', 'DMF_Inner LE', 'DMF_Better LE', 'DMF_HE LE', 'AMDF_DDin LE', 'AMDF_GT125 LE', 'AMDF_PA LE', 'AMDF_GA LE', 'AMDF_PED LE', 'AMDF_SR/subRPE LE', 'AMDF_CNVM LE', 'GSF_RT LE', 'GSF_Notch LE', 'GSF_CDR LE', 'GSF_DA LE', 'GSF_DH LE', 'OCTM_IRF LE', 'OCTM_Normal LE', 'OCTM_Atrophy LE', 'OCTM_ERMpreservedFC LE', 'OCTM_Others LE', 'OCTM_ISOSloss LE', 'OCTM_VRtraction LE', 'OCTM_Drusen LE', 'OCTM_ERMlossFC LE', 'OCTM_SRF LE', 'OCTM_Un

### Generate TCU Label

In [4]:
def convert_tcu_label_bilateral(df, tcu_col):
    df = df.copy()
    unique_tcus = df[tcu_col].unique().tolist()
    unique_tcus.sort()
    new_tcu_cols = []
    for tcu in unique_tcus:
        new_tcu_col = f"{tcu_col} >= {tcu}"
        df[new_tcu_col] = (df[tcu_col] >= tcu).astype(int)
        new_tcu_cols.append(new_tcu_col)
    return df, new_tcu_cols

df, output_cols_bilateral = convert_tcu_label_bilateral(df, tcu_col)
display(df)

Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,DRC_NA RE,GSC_G0 RE,GSC_G1 RE,GSC_NA RE,Total_Time,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0
0,1.0,0.1,13.0,0.0,0.3,0,0,0,0,1,...,0,1,0,0,52.0,1,1,1,1,1
1,1.0,0.0,18.0,1.0,0.4,0,0,0,0,1,...,0,1,0,0,26.0,1,1,1,1,0
2,0.0,0.3,12.0,1.0,0.4,0,0,0,0,1,...,0,1,0,0,26.0,1,1,1,1,0
3,0.0,0.0,17.0,1.0,0.4,0,0,0,0,1,...,0,1,0,0,26.0,1,1,1,1,0
4,1.0,0.2,18.0,1.0,0.2,0,0,0,0,1,...,0,1,0,0,52.0,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,0.0,0.0,20.0,1.0,0.4,0,0,1,0,0,...,0,1,0,0,26.0,1,1,1,1,0
864,0.0,0.0,18.0,1.0,0.7,0,0,1,0,0,...,0,1,0,0,52.0,1,1,1,1,1
865,1.0,0.1,15.0,1.0,0.4,0,0,0,0,1,...,0,1,0,0,52.0,1,1,1,1,1
866,1.0,0.3,13.0,1.0,0.4,0,0,0,0,1,...,0,1,0,0,13.0,1,1,1,0,0


In [5]:
def convert_tcu_label_baseline(df, tcu_col): # This is for baseline 
    # Instead of our encoding method just use one-hot
    dummies = pd.get_dummies(df[tcu_col], prefix=tcu_col).astype("int32")
    df = pd.concat([df, dummies], axis=1)
    return df, dummies.columns.tolist()
    
df, output_cols_baseline = convert_tcu_label_baseline(df, tcu_col)
display(df)

Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0,Total_Time_1.0,Total_Time_4.33,Total_Time_13.0,Total_Time_26.0,Total_Time_52.0
0,1.0,0.1,13.0,0.0,0.3,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
1,1.0,0.0,18.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
2,0.0,0.3,12.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
3,0.0,0.0,17.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
4,1.0,0.2,18.0,1.0,0.2,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,0.0,0.0,20.0,1.0,0.4,0,0,1,0,0,...,1,1,1,1,0,0,0,0,1,0
864,0.0,0.0,18.0,1.0,0.7,0,0,1,0,0,...,1,1,1,1,1,0,0,0,0,1
865,1.0,0.1,15.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
866,1.0,0.3,13.0,1.0,0.4,0,0,0,0,1,...,1,1,1,0,0,0,0,1,0,0


## Split Data Set

In [6]:
def split_data_set(df, valid_prop, test_prop, seed=seed_no):
    from numpy import random
    random.seed(seed=seed)
    df_size = len(df)
    train_prop = 1 - valid_prop - test_prop
    train_size, valid_size = round(df_size*train_prop), round(df_size*(train_prop+valid_prop))
    indices = np.arange(df_size)
    np.random.shuffle(indices)
    train_indices, valid_indices, test_indicies = indices[:train_size], indices[train_size:valid_size], indices[valid_size:]
    return df.iloc[train_indices], df.iloc[valid_indices], df.iloc[test_indicies]

train_df, valid_df, test_df = split_data_set(df, valid_prop=0.1, test_prop=0.1, seed=seed_no)
display(train_df)
display(valid_df)
display(test_df)

Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0,Total_Time_1.0,Total_Time_4.33,Total_Time_13.0,Total_Time_26.0,Total_Time_52.0
274,1.0,0.0,18.0,1.0,0.7,1,0,0,0,0,...,1,1,1,0,0,0,0,1,0,0
126,0.0,0.0,12.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
261,1.0,0.0,14.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
472,0.0,0.2,14.0,1.0,0.6,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
648,0.0,0.3,17.0,1.0,0.3,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40,1.0,0.1,14.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
23,1.0,0.1,16.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
164,0.0,0.2,14.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
572,0.0,0.0,17.0,1.0,0.5,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0


Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0,Total_Time_1.0,Total_Time_4.33,Total_Time_13.0,Total_Time_26.0,Total_Time_52.0
776,1.0,0.0,21.0,1.0,0.5,0,0,0,0,1,...,1,1,1,0,0,0,0,1,0,0
703,0.0,0.0,21.0,1.0,0.3,0,0,0,0,1,...,1,1,1,0,0,0,0,1,0,0
599,0.0,0.3,18.0,1.0,0.4,0,0,0,0,1,...,1,1,0,0,0,0,1,0,0,0
548,1.0,0.3,19.0,1.0,0.3,0,0,0,0,1,...,1,1,1,0,0,0,0,1,0,0
354,1.0,0.0,10.0,1.0,0.5,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
381,0.0,0.0,15.0,1.0,0.5,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
254,0.0,0.1,12.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
743,0.0,0.1,15.0,1.0,0.6,0,0,1,0,0,...,1,1,1,1,1,0,0,0,0,1
702,0.0,0.1,17.0,1.0,0.6,0,0,1,0,0,...,1,1,1,1,1,0,0,0,0,1


Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0,Total_Time_1.0,Total_Time_4.33,Total_Time_13.0,Total_Time_26.0,Total_Time_52.0
180,1.0,0.0,19.0,1.0,0.2,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
7,0.0,0.4,15.0,1.0,0.3,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
808,0.0,0.0,16.0,1.0,0.5,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
399,0.0,0.0,21.0,1.0,0.2,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
64,0.0,0.3,10.0,1.0,0.4,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549,0.0,0.0,20.0,1.0,0.5,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
652,0.0,0.1,14.0,1.0,0.7,0,0,1,0,0,...,1,1,1,0,0,0,0,1,0,0
620,0.0,0.0,12.0,0.0,0.4,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
516,0.0,0.3,15.0,1.0,0.5,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0


## Normalise Data

In [7]:
def normalise_data(train_df, valid_df, test_df, cont_input_cols):
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    scaler.fit(train_df[cont_input_cols])
    train_df.loc[:,cont_input_cols] = scaler.transform(train_df.loc[:,cont_input_cols])
    valid_df.loc[:,cont_input_cols] = scaler.transform(valid_df.loc[:,cont_input_cols])
    test_df.loc[:,cont_input_cols] = scaler.transform(test_df.loc[:,cont_input_cols])
    return train_df, valid_df, test_df, scaler
train_df, valid_df, test_df, scaler = normalise_data(train_df, valid_df, test_df, cont_input_cols)
display(train_df)
display(valid_df)
display(test_df)

Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0,Total_Time_1.0,Total_Time_4.33,Total_Time_13.0,Total_Time_26.0,Total_Time_52.0
274,1.0,0.000000,0.769231,1.0,0.894737,1,0,0,0,0,...,1,1,1,0,0,0,0,1,0,0
126,0.0,0.000000,0.307692,1.0,0.736842,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
261,1.0,0.000000,0.461538,1.0,0.736842,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
472,0.0,0.222222,0.461538,1.0,0.842105,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
648,0.0,0.333333,0.692308,1.0,0.684211,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40,1.0,0.111111,0.461538,1.0,0.736842,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
23,1.0,0.111111,0.615385,1.0,0.736842,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
164,0.0,0.222222,0.461538,1.0,0.736842,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
572,0.0,0.000000,0.692308,1.0,0.789474,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0


Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0,Total_Time_1.0,Total_Time_4.33,Total_Time_13.0,Total_Time_26.0,Total_Time_52.0
776,1.0,0.000000,1.000000,1.0,0.789474,0,0,0,0,1,...,1,1,1,0,0,0,0,1,0,0
703,0.0,0.000000,1.000000,1.0,0.684211,0,0,0,0,1,...,1,1,1,0,0,0,0,1,0,0
599,0.0,0.333333,0.769231,1.0,0.736842,0,0,0,0,1,...,1,1,0,0,0,0,1,0,0,0
548,1.0,0.333333,0.846154,1.0,0.684211,0,0,0,0,1,...,1,1,1,0,0,0,0,1,0,0
354,1.0,0.000000,0.153846,1.0,0.789474,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
381,0.0,0.000000,0.538462,1.0,0.789474,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
254,0.0,0.111111,0.307692,1.0,0.736842,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
743,0.0,0.111111,0.538462,1.0,0.842105,0,0,1,0,0,...,1,1,1,1,1,0,0,0,0,1
702,0.0,0.111111,0.692308,1.0,0.842105,0,0,1,0,0,...,1,1,1,1,1,0,0,0,0,1


Unnamed: 0,Vision Test LE,VA LE,IOP LE,Gradable LE,Cup Disc Ratio LE,OCT RNFL_Abnormal (Progressing) LE,OCT RNFL_Abnormal (Stable) LE,OCT RNFL_Normal LE,OCT RNFL_Unreliable LE,OCT RNFL_nan LE,...,Total_Time >= 1.0,Total_Time >= 4.33,Total_Time >= 13.0,Total_Time >= 26.0,Total_Time >= 52.0,Total_Time_1.0,Total_Time_4.33,Total_Time_13.0,Total_Time_26.0,Total_Time_52.0
180,1.0,0.000000,0.846154,1.0,0.631579,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
7,0.0,0.444444,0.538462,1.0,0.684211,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
808,0.0,0.000000,0.615385,1.0,0.789474,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
399,0.0,0.000000,1.000000,1.0,0.631579,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
64,0.0,0.333333,0.153846,1.0,0.736842,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549,0.0,0.000000,0.923077,1.0,0.789474,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0
652,0.0,0.111111,0.461538,1.0,0.894737,0,0,1,0,0,...,1,1,1,0,0,0,0,1,0,0
620,0.0,0.000000,0.307692,0.0,0.736842,0,0,0,0,1,...,1,1,1,1,1,0,0,0,0,1
516,0.0,0.333333,0.538462,1.0,0.789474,0,0,0,0,1,...,1,1,1,1,0,0,0,0,1,0


In [8]:
data_dfs = dict(
    train_df=train_df, valid_df=valid_df, test_df=test_df
)
col_info = dict(
    input_cols=input_cols, intermediate_col_dict=intermediate_col_dict, 
    output_cols_bilateral=output_cols_bilateral,
    output_cols_baseline=output_cols_baseline,
    le_label=le_label, re_label=re_label
)
col_info

{'input_cols': ['Vision Test',
  'VA',
  'IOP',
  'Gradable',
  'Cup Disc Ratio',
  'OCT RNFL_Abnormal (Progressing)',
  'OCT RNFL_Abnormal (Stable)',
  'OCT RNFL_Normal',
  'OCT RNFL_Unreliable',
  'OCT RNFL_nan',
  'MAC GCA_Abnormal (Progressing)',
  'MAC GCA_Abnormal (Stable)',
  'MAC GCA_Normal',
  'MAC GCA_Unreliable',
  'MAC GCA_nan',
  'HVF_Abnormal (Stable)',
  'HVF_Normal',
  'HVF: flat, no IRF/SRF.',
  'HVF_Unreliable',
  'HVF_nan',
  'DRF_DH/MA',
  'DRF_CWS',
  'DRF_BH',
  'DRF_FH',
  'DRF_NVE',
  'DRF_IRMA',
  'DRF_PRH',
  'DRF_10M',
  'DMF_DH/MA',
  'DMF_BH',
  'DMF_Inner',
  'DMF_Better',
  'DMF_HE',
  'AMDF_DDin',
  'AMDF_GT125',
  'AMDF_PA',
  'AMDF_GA',
  'AMDF_PED',
  'AMDF_SFS',
  'AMDF_SR/subRPE',
  'AMDF_CNVM',
  'GSF_RT',
  'GSF_Notch',
  'GSF_CDR',
  'GSF_DA',
  'GSF_DH',
  'OCTM_IRF',
  'OCTM_Normal',
  'OCTM_Atrophy',
  'OCTM_ERMpreservedFC',
  'OCTM_Others',
  'OCTM_ISOSloss',
  'OCTM_VRtraction',
  'OCTM_Drusen',
  'OCTM_ERMdetVA',
  'OCTM_ERMlossFC',
  'OCTM

In [9]:
joblib.dump(data_dfs, fp_data_dfs_file)
joblib.dump(col_info, fp_col_info_file)

['../data\\actual\\col_info.joblib']