In [1]:
import numpy as np
import pandas as pd
import holoviews as hv
from holoviews import opts
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

%matplotlib inline


hv.extension('plotly')

ModuleNotFoundError: No module named 'pandas'

## File Loading

In [None]:
def load_IF(file):
    mat = np.load(file)
    assert mat.shape[0] == mat.shape[1]
    return mat

## Initializations

In [None]:
def compute_cube_side(mat_mean, fact=10):
    """
    The sampling\simulation cube, all points will be drawn inside of this cube
    This method was deprecated in the java code....
    """
    return fact*mat_mean

In [None]:
def random_connected_structure(start_x, start_y, start_z, n_points, radius):
    """
    Create a random walki structure.
    
    start_x, start_y, start_z: where to start the random walk in 3D
    radius: step size, the distance between two points.
    """
    current_x = start_x
    current_y = start_y
    current_z = start_z
    
    pts = []
    pts.append((current_x, current_y, current_z))
    
    for i in range(n_points - 1):
        current_x, current_y, current_z = perturb_randomly( np.array((current_x, current_y, current_z)), radius)
        pts.append(np.array((current_x, current_y, current_z)))
        
    return np.array(pts)

In [None]:
def create_pts_indices_of_interest(IF_mat):
    """
    Get the non-zero values in the IF. Will be used as a filter in
    all calculations
    
    IF_mat: Interaction Frequency matrix
    """
    #res = np.nonzero(IF_mat)
    #return IF_mat[res]
    
    return IF_mat > 0

In [None]:
def random_connected_circle(center_x, center_y, center_z, n_points, radius, noise=0):
    """
    Creates a circular structure of points
    
    center_x, center_y, center_z, n_points: Center of the circle in 3D space
    radius: radius of the circle (NOT the distance between the points)
    noise: Not implemented, but basically we should apply perturb_randomly() to each point.
    """
    return np.array([ (np.cos(2*np.pi/n_points*x)*radius, 
                       np.sin(2*np.pi/n_points*x)*radius, 
                       0) for x in range(n_points)])

## Perturbations

In [None]:
def perturb_randomly(pts_arr, radius):
    """
    Given the coordinates of a point (np array of 1x3, for (x, y, z)), move it randomly
    in 3D space with a distance radius
    
    pts_arr: numpy array, representing the x, y, z of the point
    radius: how much to move that point
    """
    angle_1 = np.random.uniform(low=0, high=np.pi*2)
    angle_2 = np.random.uniform(low=0, high=np.pi*2)
    
    new_x = np.cos(angle_1) * np.cos(angle_2) * radius
    new_z = np.sin(angle_1) * np.cos(angle_2) * radius
    new_y = np.sin(angle_2) * radius
    
    return np.array([new_x + pts_arr[0], new_y+ pts_arr[1], new_z + pts_arr[2]])

## Computations

In [None]:
def convert_IF_to_distance(if_mat, c=1,  alpha=2):
    """
    there will be infinity where there are zeros
    """
    return c/np.power(if_mat, alpha)

In [None]:
def convert_distance_to_IF(distance_mat, c=1,  alpha=2):
    """
    there will be infinity where there are zeros
    """
    dists = cdist(distance_mat, distance_mat)
    dists = c/np.power(dists, alpha)
    dists[dists == np.inf ] = 0.0
    return dists

In [None]:
def get_dists(pt, arr):
    """
    Get Eucledian distance between ONE point and an array of points
    
    pt: np array 1x3, for each point
    arr: nx3 array of points
    """
    return np.linalg.norm(pt - arr, axis=1)

## TODO

In [None]:
def compute_pt_ll(pt_coords, other_pts, IF_matrix_vals, sigma=1, c=1, alpha=2):
    """
    pt_loc : a 1x3 array, representing x, y and z of the perturbed point
    prox_matrix: a binary matrix representing non-zero IF entries w.r.t. this point
    IF_matrix: the original IF matrix
    sigma: the variance to be used
    """
    #Compute the inverse (Distance to IF)
    #current_IF = convert_distance_to_IF()
    dists = get_dists(pt_coords, other_pts)
    dists = c/np.power(dists, alpha)
    
    return (dists - IF_matrix_vals)**2

In [None]:
def compute_pts_ll_array(pts_loc, prox_matrix, IF_matrix, sigma=1):
    """
    Computes the initial likelihoods of all points. This initial value
    will be used in the start of optimization to compare if the new perturbation
    resulted for a more probable structure    
    """
    lls = np.zeros(pts_loc.shape[0])
    for p in range(pts_loc.shape[0]):
        proximal_pts = pts_loc[prox_matrix[p,:]]
        pt_IF = IF_matrix[p, prox_matrix[p,:]]
        ll = compute_pt_ll(pts_loc[p], proximal_pts, pt_IF)
        lls[p] = ll.sum()
        
    return lls

# Main

## Load the Data and Initialize everything

In [None]:
#Change here for the path of files
IF_FILE = './data/simulate_data_400.npy'
ground_truth_file = './data/ground_truth_curves/double_spiral_400.npy'

In [None]:
#Load the data
if_mat = load_IF(IF_FILE)
gtruth_3d = np.load(ground_truth_file)
gtruth_3d = gtruth_3d.T

In [None]:
#3D viz
path = hv.Path3D(gtruth_3d)
scatter = hv.Scatter3D((gtruth_3d[:,0].flat, gtruth_3d[:,1].flat, gtruth_3d[:,2].flat))

path * scatter

In [None]:
#Get some basic stats
n_points = if_mat.shape[0]
radius = 0.001

In [None]:
#Initialize our structure, before optimization we start from this random structure
simulated_pts = random_connected_structure(0,0,0,400,0.05)
#simulated_pts = random_connected_circle(0,0,0,400,1)

In [None]:
#visualize the randomly intialized structure
path = hv.Path3D(simulated_pts)
scatter = hv.Scatter3D((simulated_pts[:,0].flat, simulated_pts[:,1].flat, simulated_pts[:,2].flat))

path * scatter

In [None]:
#Compute the simulated IF of the randomly generated points
simulated_IF = convert_distance_to_IF(simulated_pts)
simulated_IF

In [None]:
#Visualize the IF, 
plt.figure(figsize=(8, 8))

sns.heatmap(simulated_IF)

In [None]:
#Get a filter of points to compute. This filter represents which points had an IF value
#for a given point. This is better than having an array per point, since we can then vectorize
#the oprations using the same arrays
pts_proximity = create_pts_indices_of_interest(if_mat)

In [None]:
#Compute the initial likelihoods, for comparison in the optimization
pts_likelihoods = compute_pts_ll_array(simulated_pts, pts_proximity, if_mat, sigma=1)

## Main Loop

In [None]:
#Radius is the radius of the perturbation, how far should the point be moved.
radius = 0.1
counter = 1e5

#Loop counter times
for i in range(counter):
    if i%10000 == 0:
        print(i)
        
    #Pick a random point within the structure
    random_point_index = np.random.randint(n_points)
    random_point = simulated_pts[random_point_index]
    
    #Filter its neighbouring points who had an IF value in the Hi-C data
    neighbouring_points_indices = pts_proximity[random_point_index, :]
    neighbouring_points = simulated_pts[neighbouring_points_indices]
    original_likelihood = pts_likelihoods[random_point_index]
    
    #Perturb that point
    new_coords = perturb_randomly(random_point, radius)
    
    #compute likelihood of new point compared to the remaining points
    pt_IF = if_mat[random_point_index, pts_proximity[random_point_index,:]]
    new_distances = compute_pt_ll(new_coords, neighbouring_points, pt_IF)
    new_likelihood = new_distances.sum() #Change this!
    
    # If we have a better structure, update the points coordinates
    if new_likelihood < original_likelihood:
        #Update the point
        simulated_pts[random_point_index, :] = new_coords
        pts_likelihoods[random_point_index] = new_likelihood

In [None]:
#Visualize the structure
path = hv.Path3D(simulated_pts)
scatter = hv.Scatter3D((simulated_pts[:,0].flat, simulated_pts[:,1].flat, simulated_pts[:,2].flat))

path * scatter

# NOTES

For the inverse distance to IF, I think the used one in the paper is correct (Just by looking into the numbers). 

I will create a circular intialization. Notice how in this version, closer points have lower values (Closer points should have higher interaction frequency).

In [None]:
def convert_distance_to_IF_v2(distance_mat, c=1,  alpha=2):
    """
    there will be infinity where there are zeros
    """
    dists = cdist(distance_mat, distance_mat)
    dists = np.power(dists, alpha)/c
    dists[dists == np.inf ] = 0.0
    return dists

In [None]:
simulated_pts = random_connected_circle(0,0,0,400,1)

In [None]:
path = hv.Path3D(simulated_pts)
scatter = hv.Scatter3D((simulated_pts[:,0].flat, simulated_pts[:,1].flat, simulated_pts[:,2].flat))

path * scatter

In [None]:

simulated_IF_v2 = convert_distance_to_IF_v2(simulated_pts)
plt.figure(figsize=(8, 8))

sns.heatmap(simulated_IF_v2)

But here is the other one (Just swapping IF with distance)

In [None]:
simulated_IF = convert_distance_to_IF(simulated_pts)
plt.figure(figsize=(8, 8))

sns.heatmap(simulated_IF)