In [78]:
#Dependencies
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, Button, RadioButtons
#Useful functions
def imshow3d(Im, axis=0, **kwargs):

    """

    Display a 3d ndarray with a slider to move along the 0th dimension.

    Extra keyword arguments are passed to imshow

    """



    im = np.array(Im)



    # generate figure

    f, ax = plt.subplots()



    f.subplots_adjust(left=0.25, bottom=0.25)



    # select first image

    s = [slice(0, 1) if i == axis else slice(None) for i in xrange(3)]
    im_ = im[s].squeeze()

    # display image

    l = ax.imshow(im_, **kwargs)

    l.set_clim(vmin=np.min(im),vmax=np.max(im))

    # define slider

    axcolor = 'lightgoldenrodyellow'

    ax = f.add_axes([0.25, 0.1, 0.65, 0.03], axisbg=axcolor)



    slider = Slider(ax, 'Axis %i index' % axis, 0, im.shape[axis] - 1,

                    valinit=0, valfmt='%i')



    def update(val):

        ind = int(slider.val)

        s = [slice(ind, ind + 1) if i == axis else slice(None)

                 for i in xrange(3)]

        im_ = im[s].squeeze()

        l.set_data(im_)

        f.canvas.draw()



    slider.on_changed(update)



    plt.show()

def chromosomes(nuc_dia=10000,pixel_sz=100,plt_val=False):
    """
    nuc_dia is the nuclear diameter in nm
    pixel_sz is in nm
    This assumes 46 chromosomes
    
    Return list of chromosomes and pixels in their territory
    """
    #coordinates for 46 sphere centers
    #(equal size spheres in a unit sphere)
    #see:https://oeis.org/A084827/a084827.txt
    centers=[[-0.127724638717686,0.029283782782012,-0.763670872459570], 
            [0.302116854275886,0.146601789724809,-0.698281876003332], 
            [0.050116071438789,-0.375084565347080,-0.676139240788969], 
            [0.387404648096449,-0.300279722464142,-0.600095035492607],
            [-0.221565702064757,0.438003368581342,-0.599521487098418],
            [-0.536838502467010,0.121012629438513,-0.545458207564384],
            [0.470578557122151,-0.324839673302964,-0.522876020618661],
            [0.206821475639773,0.544767478949537,-0.510703040725137],
            [0.647737208453552,0.075787428022586,-0.418398254359731],
            [0.209291510617636,-0.653452063989750,-0.359946924370349],
            [-0.240428762326608,-0.655246890184877,-0.336466711372591],
            [0.027563278735129,0.169874066797150,-0.337139524778479],
            [-0.531122333361574,0.491550397468556,-0.276860250786947],
            [-0.125040038594464,0.718782537235944,-0.260923317520113],
            [-0.028222635427186,-0.267579430698296,-0.245896798982907],
            [0.559897837805783,0.479367416697336,-0.238925962888257],
            [-0.609344934400770,-0.421155893776354,-0.227356083644822],
            [-0.755792906627536,0.000918343779410,-0.170705973387576],
            [0.709453517788630,-0.276107684781292,-0.144237918782831],
            [0.338406350902039,-0.029318746498438,-0.079260341210368],
            [0.256184770042010,0.730938689442354,-0.021501641508632],
            [-0.268046158037773,0.223179830668424,-0.001615424109930],
            [0.463839024087979,-0.620577043697123,0.010090454994701],
            [0.761425580114896,0.142996856131315,0.012137124700828],
            [0.041055031342583,-0.772687639260906,0.040405708106847],
            [-0.343201070932800,-0.214763803705687,0.071596445689072],
            [-0.392969757022585,-0.662069840802751,0.087193008193199],
            [-0.377886422912343,0.667723934061050,0.108217022567140],
            [-0.686352373667351,0.339757482368351,0.117684310970756],
            [0.150619047600183,0.321066162828993,0.132327016008240],
            [0.137964450619487,-0.350718167453077,0.164313718413543],
            [0.559387984377712,0.492787670746059,0.211210130456054],
            [-0.717576062734593,-0.078536494382680,0.281568709115817],
            [0.643403410008865,-0.310581345960640,0.299892559603968],
            [0.002276767510746,0.692083481917933,0.348395549284496],
            [-0.069193117297735,-0.000826838519097,0.357871631431749],
            [-0.074584688024342,-0.626168415760149,0.450238341469810],
            [0.622296753862575,0.114447785021264,0.447227819362128],
            [-0.471682318226388,-0.413806749821993,0.454581223971127],
            [-0.434951569989064,0.423001400164857,0.481924550044267],
            [0.305007962363991,-0.417373667885278,0.577177346197253],
            [0.295340191120549,0.432541638100190,0.571004577504622],
            [-0.446844519125231,-0.001070128504388,0.633003282191707],
            [-0.094303907267779,-0.267030401770297,0.721225250748454],
            [0.281485705865138,0.008444506916010,0.721844036069634],
            [-0.091709170433872,0.260484226789782,0.723948700091940]]

    centers = np.array(centers)
    
    arr_size = nuc_dia/pixel_sz #division casts as int
    x_ = np.linspace(-1,1,arr_size)
    chrters = [[] for i in range(len(centers))]

    for x in x_:
        for y in x_:
            for z in x_:
                #test if in sphere
                if x*x+y*y+z*z<=1:
                    chr_index = np.argmin(np.sum((centers-[[x,y,z]])**2,axis=1))#compute the closest index to current xyz point
                    chrters[chr_index].append([x,y,z])
    if plt_val:
        im = np.zeros([arr_size]*3)
        for i,chr_ in enumerate(chrters):
            for x,y,z in (np.array(chr_)+1)*(arr_size-1)/2:
                im[int(np.round(x)),int(np.round(y)),int(np.round(z))]=i+1
        imshow3d(im,interpolation='nearest')
    return chrters

def TAD_blur(xyzPos,pix_sz=100,nuc_dia=10000):
    perturb=np.random.normal(0,pix_sz/2./(nuc_dia/2.),3)
    return perturb+xyzPos

def TAD_generator(xyzChr,noTADs=100,udist=-0.44276236166846844,sigmadist=0.57416477624326434,nuc_dia=10000,pix_sz=100):
    """
    xyzChr is a list of positions belonging to a chromosome territory
    Returns an array of dimensions noTADSx3
    """
    xyzChr_=np.array(xyzChr)
    tads=[]
    first=xyzChr_[np.random.randint(1,len(xyzChr))] #randomly choose location of first TAD
    first=TAD_blur(first)
    tads.append(first)
    for i_tad in range(noTADs-1):
        difs=xyzChr_-[tads[i_tad]]#unit radius
        dists=np.sqrt(np.sum(difs**2,axis=-1))
        dists=np.log(dists*nuc_dia/2000.)#unit log um
        weights = np.exp(-(dists-udist)**2/(2*sigmadist**2))
        weights = np.cumsum(weights)
        weights = weights/float(np.max(weights))
        index_pj = np.sum(np.random.rand()-weights>0)
        pj=xyzChr_[index_pj]#unit radius
        pj=TAD_blur(pj)
        tads.append(pj)
    return np.array(tads)

In [57]:
chrters=chromosomes()

In [80]:

arr_size=100
im = np.zeros([arr_size]*3)
for i,chr_ in enumerate(chrters):
    for x,y,z in (np.array(chr_)+1)*(arr_size-1)/2:
        im[int(np.round(x)),int(np.round(y)),int(np.round(z))]=i+1
x,y,z = TAD_generator(chrters[0]).T
plt.imshow(np.max(im==1,axis=-1))
plt.plot((y+1)*(arr_size-1)/2,(x+1)*(arr_size-1)/2,'-o')

plt.show()

In [89]:
reals=100 #number of realizations
tads=100 #number of TADs per chromosome
real_matrix=[]
for i_rel in range(reals):
    single_cell=[]
    for chrter in chrters:
        tads_=TAD_generator(chrter,tads)
        single_cell.append(tads_)
    real_matrix.append(single_cell)
real_matrix=np.array(real_matrix)
import cPickle as pickle
pickle.dump(real_matrix,open('simulatedTads.pkl','wb'))

In [90]:
real_matrix = pickle.load(open('simulatedTads.pkl','rb'))

In [92]:
for i,chr_ in enumerate(real_matrix[0]):
    x,y,z = chr_.T
    if i>5:
        break
    plt.plot(x,y,'-o')
plt.show()

In [85]:
chr_

array([[ -2.60485395e-01,   4.45057923e-01,   6.66435619e-01],
       [ -2.85846974e-01,   2.35437111e-01,   6.92857530e-01],
       [ -2.19245380e-01,   3.32166951e-01,   6.12440404e-01],
       [ -1.47459193e-01,   3.74039565e-01,   4.69206394e-01],
       [ -3.50350039e-01,   3.77141451e-01,   8.15050441e-01],
       [  1.00904304e-01,   3.14148933e-01,   6.61485386e-01],
       [  3.03243923e-02,   3.75955003e-01,   7.55287036e-01],
       [ -1.39236766e-01,   1.31095028e-01,   7.19327637e-01],
       [  2.43363442e-02,   4.84112581e-01,   6.42653164e-01],
       [ -1.78864010e-01,   1.93868345e-01,   7.12881178e-01],
       [ -1.52103764e-01,   4.02911360e-01,   6.80603357e-01],
       [ -1.35346498e-01,   3.44894124e-01,   6.51682242e-01],
       [  2.93737584e-02,   2.63490375e-01,   7.04091363e-01],
       [ -2.23454932e-01,   3.31427586e-01,   6.38331657e-01],
       [ -8.90331552e-02,   1.18154590e-01,   7.47092776e-01],
       [ -8.34476185e-04,   3.33326188e-01,   4.7134181

In [44]:
import glob
import numpy as np
#files = glob.glob('*.csv')
files= ['chr21.csv', 'chr22.csv']
file_ =files[0]
def file_to_mat(file_):
    lines = [ln for ln in open(file_,'r')]
    def refine_line(ln):
        splits = ln[:-1].split(',')
        return [np.nan if ln_=='' else float(ln_)for ln_ in splits]
    lines = map(refine_line,lines[1:])
    return np.array(lines)
def data_to_dists(data):
    icell_prev=np.nan
    iTAD_prev=np.nan
    dists = []
    for icell,iTAD,x,y,z in data:
        if icell_prev==icell:
            xyz = np.array([x,y,z])
            dist = np.sqrt(np.sum((xyz-xyz_prev)**2))
            dists.append(dist)
        icell_prev=icell
        xyz_prev = np.array([x,y,z])
    dists = np.array(dists)
    dists = dists[np.isnan(dists)==False]
    return dists

dists0 = data_to_dists(file_to_mat(files[0]))
dists1 = data_to_dists(file_to_mat(files[1]))
dists = np.concatenate([dists0,dists1])


In [43]:
import matplotlib.pylab as plt
#plt.hist(np.log(dists0),bins=30,alpha=0.7)
#plt.hist(np.log(dists1),bins=30,alpha=0.7)
#plt.hist(dists0,bins=30,alpha=0.7)
#plt.hist(dists1,bins=30,alpha=0.7)
plt.hist(dists,bins=40)
plt.show()

In [49]:
np.std(dists)

0.45172828742659688

In [48]:
np.mean(dists)

0.75397658663865952