## Code for automated selection of a user-defined number of probes 
The probes are as evenly distributed across the target sequence as possible. This is work for MK's paper.

In [99]:
import numpy as np
import itertools

#### NOTES

https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html

https://realpython.com/python-zip-function/

In [100]:
target_position = (1, 8, 10, 13, 23, 25, 82, 92, 96, 97, 98) #list for coordinates of target positions
no_probe_sites = 5 

# all possible combinations for probe site insertions
all_combinations = list(itertools.combinations(target_position, no_probe_sites))

# use this to specify sites of probe insertions
# e.g. if you want a probe site at the first available candidate and the last available candidate
selected_combinations =[]
for x in all_combinations:
    if x[0] == target_position[0] and x[4] == target_position[10]: #I specify the first and the last probe sites
        selected_combinations.append(x)

In [101]:
# make sure selected_combinations is an array
selected_combinations = np.asarray(selected_combinations)

In [102]:
def conseq_point_dist(array):
    """Finds the difference (distance) between consequtive points in an array"""
    #first_probe_site = array[0] # This element is missed.
    first_probe_site = []
    dist_between_probe_sites = np.append(first_probe_site, [t - s for s, t in zip(array[:], array[1:])])
    return dist_between_probe_sites

In [103]:
# Applies the above function to every row of the selected combinations array
# Finds the distance between selected insertion sites for every row
dist_between_probe_sites_array = np.apply_along_axis(conseq_point_dist, 1, selected_combinations)

In [104]:
def func_ideal_length(target_position, no_probe_sites):
    length = target_position[-1] - target_position[0] #length of sequence available for probe sites
    no_intervals = no_probe_sites - 1 #number of intervals between probe sites
    ideal_length = length / no_intervals #if positions of the sites available for probes are continuous
    return ideal_length

In [105]:
ideal_length = func_ideal_length(target_position, no_probe_sites)

In [106]:
def my_func_dist(array, ideal_length):
    """Finds the euclidean distance between the optimum length and the insertion site length"""
    
    square_of_dist_from_ideal_per_point = [(ideal_length - t)*(ideal_length - t) for t in array]
    #1st line of crit
    square_of_dist_from_ideal_SUM = np.sum(square_of_dist_from_ideal_per_point, axis=1) #2nd line of criterion
    #return square_of_dist_from_ideal_SUM
    return square_of_dist_from_ideal_SUM

In [107]:
min_distance = np.min(my_func_dist(dist_between_probe_sites_array, ideal_length)) #Finding the distance that minimises criterion
loc= np.where(my_func_dist(dist_between_probe_sites_array, ideal_length) == np.min(min_distance)) #Index of minimum
print(selected_combinations[loc[0][0]])

[ 1 13 25 82 98]
