In [3]:
#automatically reload stuff
%load_ext autoreload
%autoreload 2

import simplejson
import Utils
import matplotlib.pyplot as plt
from SpatialPreprocessing import *
from Autoencoders import autoencode

from multiprocessing import Pool, cpu_count
import os

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#read in the organ info and save it
spatial_files = load_spatial_files()
od = OrganData()
pdict = od.process_cohort_spatial_dict(spatial_files)
np_dict_to_json(pdict,Const.processed_organ_json)
print(pdict)
del pdict

In [None]:
#make sure the dict is being saved properly
pdata = load_pdict()
pdata 

In [None]:
#get a dict of denoised arrays for each type of organ
data = autoencode(pdata, ['distances','volume','centroids','mean_dose'], train=False)
print()
for k,v in data.items():
    print(k)
    for kk,vv in v.items():
        print(kk,'array->', 'mean:', np.round(vv.mean(),5),'std', np.round(vv.std(),5), 'shape:',vv.shape)
    print()

In [None]:
dists = data['distances']['denoised']
vols = data['volume']['denoised']
dists.shape

In [None]:
def local_tssim(x,y,v = None, w = None):
    #calculates local similarity within two numpy arrays
    #ignores structure of the windows
    #x, y are base variables (distances) for patients 1 and 2
    #v and w are volumes for patients 1 and 2
    #should all be 1-dimensional for original intended use
    c1 = .000001
    c2  = .000001
    x = x
    y = y
    mean_x = np.mean(x)
    mean_y = np.mean(y)
    covariance = np.cov(x,y)
    numerator = (2*mean_x*mean_y + c1) * (covariance[0,1] + covariance[1,0] + c2)
    denominator = (mean_x**2 + mean_y**2 + c1)*(np.var(x) + np.var(y) + c2)
    if v is not None and w is not None:
        mean_v = np.mean(v)
        mean_w = np.mean(w)
        numerator *= (2*mean_v*mean_w + c1)
        denominator *= (mean_v**2 + mean_w**2 + c1)
    if denominator > 0:
        return numerator/denominator
    else:
        print('error, zero denomiator in ssim function')
        return 0
    
def get_adjacency_list(dist_array, window_size = 50, gtvname = "GTV"):
    #takes distance array (n_patients) x (n_organs + gtv) x (n_organs)
    #specific dimensions are flexible but the above is the format I'm writting it for
    #returns a list of (n_organs + gtv) arrays with the indeces of organs withing window_size distance
    mean_dists = np.nanmean(dists,axis=0)
    adjacency_list = []
    for organ_row in mean_dists:
        #gets indeces for organs within a certian distance
        #will index like dist_array[:,organ_row_idx,adjacent_organs]
        adjacent_organs = np.argwhere(organ_row < window_size)
        adjacency_list.append(adjacent_organs.ravel())
    return adjacency_list

def pairwise_mean_local_similarity(d1, d2, v1, v2, adjacency_list, local_sim_func = None):
    #d1, d2 are min_distance arrays of shape (n_organs + <optional gtv>) x (n_organs)
    #v1 v2 are volume arrays of shape (n_organs + <optional_gtv>) x 1
    #adjacency list should be a list of (n_organs + <optional gtv>) indices for dim2
    #local sim func should take 2 vectors for distances and two volumes
    assert(d1.ndim == d2.ndim)
    sims = np.empty((len(adjacency_list),))
    if local_sim_func is None:
        local_sim_func = local_tssim
    for i,adjacency in enumerate(adjacency_list):
        od1 = d1[i,adjacency]
        od2 = d2[i,adjacency]
        ov1 = v1[i]
        ov2 = v2[i]
        sim = local_sim_func(od1,od2,ov1,ov2)
        sims[i] = sim
    return sims.mean()
        
def pairwise_tssim_job(p1, p2, dist_array, vol_array, adjacency_list):
    #wrapper for using similarity using multithreading
    #only works this way becuase it has to be pickled
    d1 = dist_array[p1]
    d2 = dist_array[p2]
    v1 = vol_array[p1]
    v2 = vol_array[p2]
    similarity = pairwise_mean_local_similarity(d1,d2,v1,v2,adjacency_list)
    return p1, p2, similarity

    
def get_similarity_matrix(dist_array, vol_array, sim_func = None, max_jobs = 8):
    #gets a n_items x n_items similarity matrix
    #pairwise sim should take idx 1, idx 2, distance array, volme array, adjacency list
    #default to tssim
    if sim_func is None:
        sim_func = pairwise_tssim_job
        
    x_items = dist_array.shape[0]
    adjacency_list = get_adjacency_list(dist_array)
    similarity_array = np.zeros((x_items, x_items))
    
    #get the number of available cpus
    try:
        available_cpus = len(os.sched_getaffinity(0))
    except:
        available_cpus = cpu_count() - 1
    n_jobs = max(min(max_jobs, available_cpus), 1)
    with Pool(n_jobs) as pool:
        score_results = []
        for p1 in range(x_items):
            for p2 in range(p1 + 1,x_items):
                score_result = pool.apply_async(sim_func,
                                               args = (p1,p2,
                                                      dist_array,
                                                      vol_array,
                                                      adjacency_list)
                                               )
                score_results.append(score_result)
        for res in score_results:
            (p1,p2,sim) = res.get(10000)
            similarity_array[p1,p2] = sim
    similarity_array += similarity_array.transpose()
    return similarity_array
    
get_similarity_matrix(dists, vols)

In [None]:
vols[0]