# Libraries and functions 

In [1]:
from pytadbit.modelling.structuralmodels import StructuralModels,load_structuralmodels
from pytadbit.utils.three_dim_stats import calc_eqv_rmsd
import sys
import os
import cPickle as pickle

# Parameters to modify 

In [2]:
# Base path where the folder branch is stored
basePath = '/home/julen/TADdyn/TADdyn_tutorial/'
# Maximum number of models to be used. From ~1500 done, we will keep the best 1000
modelsKeep = 1000
# Define the orther of the cells for the ploting
orderCell = ['Ery', 'nCD4', 'Mon']

# Wether to divide the analysis by model clusters
toCluster = False


# Run 

## Import additional libraries 

In [3]:
sys.path.append(basePath + 'code')
import fileHandling
import metrics

## Define new paths 

In [4]:
# Output path for pickles and plots
outdata = basePath + 'outData/modelAnalysis/model_comparisons/mergedClustering/'
#outplot = basePath + 'outPlot/modelAnalysis/model_comparisons/mergedClustering/'

# create folders
! mkdir -p {outdata}
#! mkdir -p {outplot}

## Get model files and regions

In [5]:
## Path were the models are stored
cells, regionsAll, models = fileHandling.getModelsPaths(basePath, ending='models')

## get regions
regiones = {}
# get info of the regions
for cell in cells:
    for regi in regionsAll:
        if not regi in regiones:
            
            regiones[regi] = fileHandling.getREgiInfo(basePath, regi, cell)
        else:
            infot = fileHandling.getREgiInfo(basePath, regi, cell)
            if infot != regiones[regi]:
                print('Region %s, cell %s has different parameters than previous cells')
                print('This code is not prepared to compare models with different length \
                      or resolution')

## Retrieve clustering data

In [6]:
# If we ran this script with less models, will recover the previous clustering info
if toCluster == True:
    clustersPickle= basePath + 'outData/modelAnalysis/clustering/allClusterDict.pickle' 
    with open(clustersPickle, "rb") as input_file:
        allClusters = pickle.load(input_file)
    print('Recovering previous clustering data from:')
    print(clustersPickle)

## Store xyz files of models 

In [7]:
# maximum number of clusters to check for extracting models
maxCluster = 2
# Minimum number of models that must be in a cluster to store them
minModel = 50

indexList, passFilter = metrics.writeModelCmm(regionsAll, outdata, orderCell, models,
                 modelsKeep, toCluster=False, allClusters=None,
                 maxCluster=2, minModel=50)

In [8]:
# store index list in a file
toWrite = ''
cells = sorted(indexList[indexList.keys()[0]])
toWrite += '\t' + '\t'.join(str(t) for t in cells) + '\n'
for regi in sorted(indexList):
    toWrite2 = []
    for cel in cells:
        toWrite2 += [indexList[regi][cel][1]]
    toWrite += '%s\t%s\n' %(regi, '\t'.join(str(t) for t in toWrite2))
    
with open(outdata + 'indexes.tsv', 'w') as f:
    f.write(toWrite)

In [9]:
toWrite

'\tEry\tMon\tnCD4\nb-globin\t999\t2999\t1999\n'

## Get dRMSD values 

In [None]:
for regi in sorted(regiones):
    print('#' * 30 + ' %s ' %regi + '#' * 30)
    resol = regiones[regi][3]
    dcutoff = models[orderCell[0]][regi].split('/')[-1]
    dcutoff = float(dcutoff.split('C')[-1].split('L')[0])
    
    # set output path
    outpath2 = outdata + regi + '/'
    outfile = open(outdata + "model_distances_%s.txt" %regi, "w")
    # load models
    models_loaded = metrics.load_all_models(dir_models=outpath2, resolution=resol)
    nloci = models_loaded.nloci

    print 'loaded'
    # will check all loci
    zeros = [True]*nloci
    
    #Compute drmsd
    
    drmsd = calc_eqv_rmsd(beg=0, end=nloci, models=models_loaded, zeros=[True]*nloci, 
                          dcutoff=dcutoff, what="drmsd", normed=False)

    ## Write file
    # write header
    outfile.write("#%s %s %s\n" % ("Model_i","Model_j","dRMSD"))
    # write each drmsd value
    for pair in drmsd:
        outfile.write("%s %s %s\n" % (pair[0], pair[1], drmsd[pair]))

    outfile.close()

############################## b-globin ##############################
loaded
