In [None]:
from STARE_manager import STARE_manager
import numpy as np
from docx import Document
from custom_functions import cross_val_prediction

In [None]:
###### To select a dataset to analyze, uncomment all code under the dataset's name

### STARE Expert 1
data_set = "stare"
image_dir = "../Data/Dataset_1/Provided_masks/"
file_name = "im"
data_name = "DS1_"
results_dir = "../Results/Dataset_1/"

'''### STARE Expert 2
data_set = "stare"
image_dir = "../Data/Dataset_1/Provided_masks_VK/"
file_name = "im"
data_name = "DS1_"
results_dir = "../Results/Dataset_1_VK/"'''

#HRF
'''data_set = "HRF"
image_dir = "../Data/HRF_Dataset_1/Provided_masks/"
retinal_image_folder = "../Data/HRF_Dataset_1/Provided_retinal_images/*.png"
file_name = "im"
data_name = "DS1_"
results_dir = "../Results/HRF_Dataset_1/"'''

## all
'''data_set = "all"
image_dir = "../Data/all/Provided_masks/"
file_name = "im"
data_name = "DS1_"
results_dir = "../Results/all/"'''

In [None]:
#load in computed topological descriptor vectors (computed in TDA_pipeline.ipynb)
# and their disease classifications

stare = STARE_manager(image_dir = image_dir,
                     file_name = file_name, 
                     data_name = data_name,
                     results_dir=results_dir,
                     data_set=data_set)

ID, data, diag =  stare.obtain_diagnoses(data_type="PI")
stare.plot_labelings(diag)

## Perform cross-validation over each filtration

In [None]:
#perform five-fold cross validation for each type of descriptor vector

filtrations = data.keys()

y = 1*(np.any(diag==0,axis=1))

features = ["b0","b1","b0 & b1"]

for filtration in filtrations:
    
    print(f"Results using {filtration} filtration:")
    
    X = stare.PCA_reduction(data[filtration],comp=2,data_type="PI")
    
    for i in np.arange(3):
        mean, std = cross_val_prediction(X[i],y)
        print(f"{features[i]}: {100*np.round(mean,3)}, SD: {100*np.round(std,3)}")
    
    print("")    

## All datasets in one table, export in .docx file

In [None]:
datasets = ['STARE1','STARE2','HRF','all']

document = Document()

table = document.add_table(rows=9, cols=1+len(datasets))

column_count = 1    
for data_set in datasets:    
    print(data_set)
    ###
    ### dataset details
    ### 

    file_name = "im"
    data_name = "DS1_"
    column = table.columns[column_count]
    column.cells[0].text = data_set
    
    if data_set == "STARE1":
        image_dir = "../Data/Dataset_1/Provided_masks/"
        results_dir = "../Results/Dataset_1/"

    elif data_set == "STARE2":
        image_dir = "../Data/Dataset_1/Provided_masks_VK/"
        results_dir = "../Results/Dataset_1_VK/"
        
    elif data_set == "HRF":
        image_dir = "../Data/HRF_Dataset_1/Provided_masks/"
        results_dir = "../Results/HRF_Dataset_1/"

    elif data_set == "all":
        image_dir = "../Data/all/Provided_masks/"
        results_dir = "../Results/all/"
        
    ###
    ### Load in dataset computations
    ###
    
    stare = STARE_manager(image_dir = image_dir,
                     file_name = file_name, 
                     data_name = data_name,
                     results_dir=results_dir,
                     data_set=data_set)
    ID, data, diag =  stare.obtain_diagnoses(data_type="PI")
    keys = data.keys()
    y = 1*(np.any(diag==0,axis=1))
    
    row_count = 1
    for key in keys:

        X = stare.PCA_reduction(data[key],comp=2,data_type="PI")

        for j in np.arange(2):
            results = cross_val_prediction(X[j],y)

            mean = int(np.round(100*results[0],0))
            sd   = np.round(100*results[1],1)

            column.cells[row_count].text = f"{mean} ({sd}) %"
            row_count+=1

    column_count+=1
    
###
### Fill in left column
### 
column = table.columns[0]
row_count = 1
for i, key in enumerate(keys):
    column.cells[row_count].text = f"{key} (CC)"
    column.cells[row_count+1].text = f"{key} (loops)"
    row_count+=2
    

document.save('../Results/Docx_tables/TDA_table.docx')