In [None]:
import sys
sys.path.insert(0, '..')

import glob
#import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
#from sklearn.decomposition import PCA

from custom_functions import FD_calculation, cross_val_prediction, network_descriptors

In [None]:
###### To select a dataset to analyze, uncomment all code under the dataset's name

### Expert #1
'''dataset = "STARE1"
nefi_output_folder = "../Data/Dataset_1/NEFI_graphs/*/"
image_folder = "../Data/Dataset_1/Provided_masks/"
write_folder = "../Results/Dataset_1/"'''

###Expert #2
'''dataset = "STARE2"
nefi_output_folder = "../Data/Dataset_1/NEFI_graphs_VK/*/"
image_folder = "../Data/Dataset_1/Provided_masks_VK/"
write_folder = "../Results/Dataset_1_VK/"'''

###HRF
dataset = "HRF"
nefi_output_folder = "../Data/HRF_Dataset_1/NEFI_graphs/*/"
image_folder = "../Data/HRF_Dataset_1/Provided_masks/"
write_folder = "../Results/HRF_Dataset_1/"

### all
'''dataset = "all"
nefi_output_folder = "../Data/all/NEFI_graphs/*/"
image_folder = "../Data/all/Provided_masks/"
write_folder = "../Results/all/"'''

## Record fractal dimension, standard descriptors of each image

In [None]:
file_name = "im"
nefi_outputs = glob.glob(f"{nefi_output_folder}*.txt")

FDs = []
log_Ns = []
nodes = []
edges = []
edge_lengths = []
y = []

if dataset == "HRF":
    nums = np.arange(1,46)
    mat = np.load("../Data/Diagnoses/image_diagnoses_HRF.npy",allow_pickle=True).item()
elif "STARE" in dataset:
    nums = np.array([1,2,3,4,5,44,77,81,82,139,162, 163, 235, 236, 239, 240, 255, 291, 319, 324])
    mat = np.load("../Data/Diagnoses/image_diagnoses.npy",allow_pickle=True).item()    
elif "all" in dataset:    
    mat = np.load("../Data/Diagnoses/image_diagnoses_all.npy",allow_pickle=True).item()    
    nums = list(mat['image_diagnoses'].keys())

for num in nums:
    
    print(num)
    
    if "all" in dataset:
        num_str = num
    else:
        num_str = f"{str(num).zfill(4)}"
    
    #load in image
    image_loc = f"{image_folder}{file_name}{num_str}.png"
    image = mpimg.imread(image_loc)
    
    #compute Fractal dimension
    try:
        FD, log_N, Hs = FD_calculation(image[:,:])
    except:
        FD, log_N, Hs = FD_calculation(image[:,:,0])
            
    #compute network descriptors
    
    #find nefi output file
    nefi_output = [s for s in nefi_outputs if num_str in s]
    #ensure there is only one location in this list
    assert len(nefi_output)==1
    #read in graph
    graph_in = nx.read_multiline_adjlist(nefi_output[0],delimiter='|')
    edges_tmp, nodes_tmp, edge_lengths_tmp = network_descriptors(graph_in)
    
    #find classification
    classification = 1*(0 in mat['image_diagnoses'][num_str])
    
    #record each entry
    FDs.append(FD)
    log_Ns.append(log_N)
    edges.append(edges_tmp)
    nodes.append(nodes_tmp)
    edge_lengths.append(edge_lengths_tmp)
    y.append(classification)

    #save FD, box-counting for later use
    data = {}
    data['FD'] = FD
    data['Boxcounting'] = log_N
    save_filename = f"{write_folder}DS1_im{num_str}_FD_results.npy"
    np.save(save_filename,data)
    
#convert lists to arrays    
FDs = np.array(FDs)[:,None]
log_Ns = np.array(log_Ns)
edges = np.array(edges)[:,None]
nodes = np.array(nodes)[:,None]
edge_lengths = np.array(edge_lengths)[:,None]
y = np.array(y)

## Compute average fractal dimension values

In [None]:
normal_fd = np.mean(FDs[y==1])
normal_sd = np.std(FDs[y==1])

dis_fd = np.mean(FDs[y==0])
dis_sd = np.std(FDs[y==0])

print(f"Normal FD mean: {normal_fd}%, \pm {normal_sd}")
print(f"Diseased FD mean: {dis_fd}%, \pm {dis_sd}")

In [None]:
descriptors = [edges, edge_lengths, nodes]
descriptor_strings = ["edges", "lengths", "nodes"]

for d,v in zip(descriptors,descriptor_strings):
    normal_fd = np.mean(d[y==1])
    normal_sd = np.std(d[y==1])

    dis_fd = np.mean(d[y==0])
    dis_sd = np.std(d[y==0])

    print(f"Normal {v} mean: {normal_fd}%, \pm {normal_sd}")
    print(f"Diseased {v} mean: {dis_fd}%, \pm {dis_sd}")
    print("")

## Classification by scalar values

In [None]:
names = ["Df", "Avg. Edge Length", "# Nodes", "# Edges"]

X = np.hstack([FDs, edge_lengths, nodes, edges])

#normalize
X_norm = X - X.mean(axis=0)
X_norm /= X.std(axis=0)

In [None]:
for i in np.arange(4):
    print(cross_val_prediction(X_norm[:,i:i+1],y))

## classification by log_N (box counting vector)

In [None]:
X = log_Ns

#normalize
X_norm = X - X.mean(axis=0)
#perform PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_norm)

#perform 5-fold cross validation)

print(cross_val_prediction(X_pca,y))

'''plt.figure()
plt.plot(log_Ns[y==0,:].T,"r")
plt.plot(log_Ns[y==1,:].T,"b")'''

## Box counting vector figure depiction (tailored to HRF)

In [None]:
fontsize = 18

scales = np.logspace(0, 
                     np.log2(np.min(image.shape)/2), 
                     num=10, 
                     endpoint=True, 
                     base=2)
#ensure integer valued box sizes
scales = np.array([np.floor(s) for s in scales],dtype=int)

fig = plt.figure(figsize=(15,5))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

for ax in [ax1,ax2]:
    ax.loglog(scales,np.exp(log_Ns[y==0,:].T),"r--")
    ax.loglog(scales,np.exp(log_Ns[y==1,:].T),"b")

ax1.plot([1e7,1e7],[1e6,1e6],"b",label="Normal")
ax1.plot([1e7,1e7],[1e6,1e6],"r--",label="Diseased")
ax1.legend(fontsize=fontsize,loc=1)
ax1.set_xlim([.8, 2e3])
ax1.set_ylim([1e0, 2e6])

c = plt.Rectangle((.9,5e4), 4e0-.9, 1e6-5e4, facecolor='None', edgecolor = "k", linewidth=2)
ax1.add_patch(c)

ax1.set_xlabel("Box side length $(s)$", fontsize=fontsize)
ax1.set_ylabel("Nonzero box count $(N(s))$", fontsize=fontsize)
ax1.set_title("HRF box-counting vectors by disease",fontsize=fontsize)    

ax2.set_title("Zoomed in",fontsize=fontsize)    
ax2.set_xlim([.9, 4e0])
ax2.set_ylim([5e4, 1e6])
ax2.set_xticks([])
ax2.set_yticks([])
plt.minorticks_off()

plt.savefig(f"../Figures/box_counting_hist_{dataset}.pdf",format="pdf")

## Make Df computation figure

In [None]:
fontsize=10

if len(image.shape) == 3: image = image[:,:,0]

#image dimensions
im_shape = image.shape
im_min_shape = np.min(im_shape)

#create logscale for s -- starts with s=1 and 
#ends with (smaller length of image) / 2.
scales = np.logspace(0, 
                     np.log2(im_min_shape/2), 
                     num=10, 
                     endpoint=True, 
                     base=2)
#ensure integer valued box sizes
scales = np.array([np.floor(s) for s in scales],dtype=int)
#ensure 1 doesn't repeat itself
if scales[1] == 1:
    scales[1] = 2
    
coeffs=np.polyfit(np.log(scales), log_Ns[-1], 1)


fig = plt.figure(figsize=(10,5))

for i in np.arange(5):

    H = Hs[2*i]
    Hy, Hx = H.shape
    
    ax = fig.add_subplot(2,3,i+2)
    ax.imshow(1*(H>0),vmin=0,vmax=1,aspect="auto",cmap="bone")

    ax.set_title(f"$s$ = {scales[2*i]}, N(s) = {int(np.exp(log_Ns[-1][2*i]))}",fontsize=fontsize)
    
    if i >=2:
        ax.hlines(y = np.arange(0.5,Hy), xmin=-0.5, xmax = Hx, alpha=0.5, linewidth=0.5)
        ax.vlines(x = np.arange(0.5,Hx), ymin=-0.5, ymax = Hy, alpha=0.5, linewidth=0.5)
    
    ax.set_xlim([-.5,Hx-.5])
    ax.set_ylim([-.5,Hy-.5])
    ax.set_xticks([])
    ax.set_yticks([])
ax = fig.add_subplot(2,3,1)    

ax.loglog(scales,np.exp(log_Ns[-1]),".",label = "N(s)")
ax.loglog(scales,np.exp(coeffs[1] + coeffs[0]*np.log(scales)), label = f"$ln(N(s)) = {round(coeffs[1],1)}  {round(coeffs[0],1)}\ln(s)$")

ax.set_xlabel("$s$",fontsize=fontsize)
ax.set_ylabel("$N(s)$",fontsize=fontsize)
ax.set_title("Fractal dimension calculation",fontsize=fontsize)
ax.legend(fontsize=8)
plt.axis("tight")