In [None]:
import networkx as nx
import pickle

import itertools

from collections import Counter
from collections import defaultdict


In [None]:
# import list of ligands to exclude (list pre-generated based on exclusion criteria)

ligs2excl = []

with open('ligands-to-exclude.txt','r') as file:
    line_list = file.readlines()
    for line in line_list:
        ligs2excl.append(line.split()[0])
        

print(len(ligs2excl))

In [None]:
# get total number of residues 

total_res_dict = pickle.load(open('total_res_dict.p','rb'))


In [None]:
# set filters on PDBspheres data

datecut = 'current' # to include all templates currently available: 'current'
resolutioncut = 'all' # to include all resolutions: 'all'
gdccut = '60' 
Nccut = '15'
N4cut = '4'
ligsizecut = '8'
clcut = '0'
            

In [None]:
# create ligand binding dictionary and calculate percentage of residues that each ligand binds

ligand_dict = {}
template_dict = {}
fracres_dict = {}
ligs_leaveout = {}
all_ligs_remove = []
bind_thresh = 0.333

for lig in ligs2excl:
    all_ligs_remove.append(lig)

    
for protnow in ['E','S','ORF3a','nsp12','nsp13','nsp14','nsp15','nsp16','nsp3','nsp5','nsp7','nsp8','nsp9',\
                'nsp1','nsp2','ORF7a','nsp4','nsp10','N','ORF8']:
    
    rlist = []
    llist = []
    ligand_dict[protnow] = {}
    template_dict[protnow] = {}
    
    ligs_leaveout[protnow] = []
    for lig in ligs2excl:
        ligs_leaveout[protnow].append(lig)
    

    file = open('./CCC.confidence_centroid_contacts.'+Nccut+'_10_'+gdccut+'_'+N4cut+'_'+clcut+'.ligs_'+ligsizecut+'.nCoV.'+datecut+'.res'+resolutioncut,'r')
    
    line_list = file.readlines()
    
    for line in line_list:          
        # viral protein
        if line.split()[0].split('.')[0].split('_')[0]=='nCoV':
            protein = line.split()[0].split('.')[0].split('_')[1]
            if protein=='Spike':
                protein = 'S'
        
        if protein==protnow:
            # ligand
            ligand = line.split()[0].split('.')[6]

            # residues
            binding_residues = line.split()[-1].split(',')
            del binding_residues[-1]
                
            if len(binding_residues)>0:
                if ligand not in llist:
                    llist.append(ligand)
    
                for residue in binding_residues:
                    if residue not in rlist:
                        rlist.append(residue)
                        
                    if ligand not in ligand_dict[protnow]:
                        ligand_dict[protnow][ligand] = [residue]
                    elif ligand in ligand_dict[protnow] and residue not in ligand_dict[protnow][ligand]:
                        ligand_dict[protnow][ligand].append(residue)
                        
                    if ligand not in ligs_leaveout[protnow]:
                        if residue not in template_dict[protnow]:
                            template_dict[protnow][residue] = 1
                        elif residue in template_dict[protnow]:
                            template_dict[protnow][residue] = template_dict[protnow][residue] + 1
                        
    file.close()
        
    rlist.sort()
    llist.sort()
    
    fracres_dict[protnow] = {}
    
    for lig in llist:
        fracres_dict[protnow][lig] = float(len(ligand_dict[protnow][lig]))/float(total_res_dict[protnow])
        if fracres_dict[protnow][lig]>bind_thresh and lig not in ligs_leaveout[protnow]:
            ligs_leaveout[protnow].append(lig)
            
    rfd_sorted = sorted(fracres_dict[protnow].items(), key=lambda x: x[1], reverse=True)   
    
    for lig in ligs_leaveout[protnow]:
        if lig not in all_ligs_remove:
            all_ligs_remove.append(lig)
            
            
pickle.dump(ligs_leaveout,open('ligs_leaveout.p','wb'))


In [None]:
# Load the contact ligand residues into data structures
# Filter ligands by SMILES strings and percentage of residues they bind
# Filter PDB templates by date available, resolution, GDC value

def findOccurrences(s, ch):
    return [i for i, letter in enumerate(s) if letter == ch]

with open('./CCC.confidence_centroid_contacts.'+Nccut+'_10_'+gdccut+'_'+N4cut+'_'+clcut+'.ligs_'+ligsizecut+'.nCoV.'+datecut+'.res'+resolutioncut) as M:
    wer=M.readlines()

ncovdict=defaultdict(lambda: ([], [])) #first is conta, second resid
    
ligdict=defaultdict(set)
filedict=defaultdict(set)

all_contacts = {}
for protnow in ['E','S','ORF3a','nsp12','nsp13','nsp14','nsp15','nsp16','nsp3','nsp5','nsp7','nsp8','nsp9',\
                'nsp1','nsp2','ORF7a','nsp4','nsp10','N','ORF8']:
    all_contacts[protnow] = 0

for lin in wer:
    if lin.split()[0].split('.')[0].split('_')[0]=='nCoV':
        ligand = lin.split()[0].split('.')[6]
        ncovfind=lin.find('nCoV_')
        underfind=findOccurrences(lin,'_')
        virprot=lin[(underfind[min(k for k,x in enumerate(underfind) if x>ncovfind)]+1):underfind[min(k for k,x in enumerate(underfind) if x>ncovfind)+1]]
        if virprot=='Spike':
            virprot='S'     
        if ligand not in ligs_leaveout[virprot]:  
            fins=findOccurrences(lin, '.')
            spherfind=lin.find('.Sphere.')
            ligid=lin[(fins[min(k for k,x in enumerate(fins) if x>spherfind)]+1):fins[1+min(k for k,x in enumerate(fins) if x>spherfind)]]
            contstr=lin.strip(',\n').split()[-1]
            conts=contstr.split(',')
            all_contacts[virprot] = all_contacts[virprot] + len(conts)
            fileSrc=lin[:lin.find(':')]
            nonodes=[1 if (not cont[-2]=='_') else 0 for cont in conts]
            if any(nonodes):
                continue
            ncovdict[virprot][1].extend(conts)
            for cont in conts:
                ligdict[virprot+'.'+cont].add(ligid)
                filedict[virprot+'.'+cont].add(fileSrc)
            for pair in itertools.combinations(conts,2):
                ncovdict[virprot][0].append(pair)

print(all_contacts)


In [None]:
# Load the data into weighted networkx graphs, one for each viral protein

from networkx.algorithms import community
import numpy as np

H_all=dict()
shared_int_dict=dict()
all_int_dict=dict()
nnodes_dict=dict()

# Loop over the viral proteins
# Create a graph for each protein
# Loop over all residues that contact ligands
# Add the residues as nodes
# Store the ligands they contact in a list (as well as files to find them)

for protnow in ['E','S','ORF3a','nsp12','nsp13','nsp14','nsp15','nsp16','nsp3','nsp5','nsp7','nsp8','nsp9']:
    shared_int_dict[protnow]=dict()
    all_int_dict[protnow]=dict()
    H=nx.Graph()
    resa=Counter(ncovdict[protnow][1])
    for ress in resa.most_common():
        all_int_dict[protnow][ress[0]]=ress[1]
        H.add_node(ress[0],contacts=ress[1],ligands=ligdict[protnow+'.'+ress[0]],files=filedict[protnow+'.'+ress[0]])
    counta=Counter(ncovdict[protnow][0])
    for conn in counta.most_common():
        shared_int_dict[protnow][conn[0]]=conn[1]
        H.add_edge(conn[0][0], conn[0][1], weight=conn[1], invweight=1.0/conn[1]) #np.exp(-float(conn[1])))
    H_all[protnow]=H
    nnodes_dict[protnow]=H.number_of_nodes()

In [None]:
# distribution of contacts per residue 

from matplotlib import pyplot as plt
from scipy.stats import poisson, gamma, expon, linregress
from scipy.optimize import curve_fit
from math import exp
import pickle

cntctsperres = {}
area = {}
histcounts = {}
num_contacts = {}

for protnow in ['nsp3','nsp5','nsp12','S','ORF3a','nsp13','nsp14','nsp15','nsp16','nsp9']: 
    num_contacts[protnow] = {}
    cntctsperres[protnow] = []
    contributes=nx.get_node_attributes(H_all[protnow],'contacts')
    for nd in H_all[protnow]:
        cntctsperres[protnow].append(contributes[nd])
        if nd not in num_contacts[protnow]:
            num_contacts[protnow][nd] = contributes[nd]
    
    plt.figure()
    histout = plt.hist(cntctsperres[protnow],bins=6500,range=(1,6500))
    plt.title(protnow)
    plt.xlabel('Contacts per residue')
    plt.ylabel('Count')
    plt.show()
    
    bincounts = histout[0]
    histcounts[protnow] = bincounts
    area[protnow] = sum(bincounts)
    binedges = histout[1]
    bincenters = 0.5 * (binedges[1:] + binedges[:-1])
    
    print('mean = '+str(np.mean(cntctsperres[protnow])))
    print('std = '+str(np.std(cntctsperres[protnow])))
    print('total = '+str(sum(cntctsperres[protnow])))
    print('total area = '+str(area[protnow]))
    

In [None]:
# distribution of contacts per residue (continued)

plt.figure()
colors = ['r','b','m','c','g']
labels = []
i = 0

sars2_normalized_counts_dict = {}

for protnow in ['S','nsp3','nsp5','nsp9','nsp12','nsp13','nsp14','nsp16','nsp15','ORF3a']:
    normalized_counts = [float(num)/float(area[protnow]) for num in histcounts[protnow]]
    sars2_normalized_counts_dict[protnow] = normalized_counts
    labels.append(protnow)
    plt.scatter(np.log(np.arange(1,len(normalized_counts)+1,1)),np.log(normalized_counts))
    
    
plt.xlabel('Log(Contacts per residue)',fontsize=16)
plt.ylabel('Log(Normalized count)',fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(labels,fontsize=14,loc=(1.05,0.02))
        
#plt.savefig('figures/contacts_per_residue_plot_SARS2proteins.png')        
#pickle.dump(sars2_normalized_counts_dict,open('sars2_normalized_counts_dict.p','wb'))

In [None]:
# find min and max number of residues in contact across all ligands

def res_contacts(prtn,filename):

    min_res_contact = 100
    max_res_contact = 0
    
    file = open(filename,'r')
    line_list = file.readlines()

    for line in line_list:
        # viral protein
        if line.split()[0].split('.')[0].split('_')[0]=='nCoV':
            protein = line.split()[0].split('.')[0].split('_')[1]
            if protein=='Spike':
                protein = 'S'
                    
        if protein==prtn:
            N4 = int(line.split()[11])
            if N4 < min_res_contact and N4 > 0:
                min_res_contact = N4
            if N4 > max_res_contact:
                max_res_contact = N4
            
    file.close()

    return min_res_contact, max_res_contact


In [None]:
from networkx.algorithms import shortest_paths
from scipy.cluster.hierarchy import linkage,dendrogram
from scipy import cluster
from matplotlib import pyplot as plt
import pickle


In [None]:
# Find max value across series of lists

def max_nested(list_of_lists):
    return max([max(x) for x in list_of_lists])

In [None]:
# Cut tree at specific height and find relevant clusters - threshold based on number of contacts

def cut_res_clust(protnow,comout,ordered_list_of_res,cut_height,H_all,cluster_ligand_dict,min_cp_size,max_cp_size,all_contacts):
    cutree = cluster.hierarchy.cut_tree(comout,height=cut_height)
    clusout=[(x,cutree[k][0]) for k,x in enumerate(ordered_list_of_res)]
    clustall=[]
    for k in range(max([x[1] for x in clusout])+1):
        clustall.append([x[0] for x in clusout if x[1]==k])
    contributes=nx.get_node_attributes(H_all[protnow],'contacts')
    ligributes=nx.get_node_attributes(H_all[protnow],'ligands')
    n_clusters=len(clustall)
    cplist=[]
    cluster_ligand_dict[cut_height]={}
    for m,clust in enumerate(clustall):
        totalContacts=(sum([contributes[res] for res in clust]))
        contactsperres = totalContacts/float(len(clust))
        cout=Counter(itertools.chain.from_iterable([list(ligributes[res]) for res in clust]))
        commall=cout.most_common()
        commadj=list()
        for ite in commall:
            commadj.append((ite[0],ite[1],len(clust),float(ite[1])/len(clust)))
        commadj.sort(key=lambda tup: -tup[3])
        cpnow=dict()
        cpnow['index']=m
        cluster_ligand_list=[]
       
        if contactsperres>90 and len(clust)>(min_cp_size-1) and len(clust)<(max_cp_size+1):
            cpnow['totalContacts']=totalContacts
            cpnow['residuesList']=clust
            cplist.append(clust)
            mols=[]
            proteincount=0
            proteinset=set()
            for r in range(len(commadj)):
                if commadj[r][3]<0.75:
                    break
                x=commadj[r]
                cluster_ligand_list.append((x[0],x[3]))
                if len(x[0])==3:
                    continue
                else:
                    proteinset.add(x[0])
                    proteincount+=1
            cluster_ligand_dict[cut_height][m]=(len(clust),cluster_ligand_list)
            cpnow['proteinCount']=proteincount
            ligroups=[]
            if proteincount:
                ligroups.append({'ligs':[x for x in proteinset]})
            disty=[]
            if not disty:
                cpnow['ligroups']=ligroups
                cpnow['ligroups'].append({'ligs':[x[0] for x in mols]})
                continue
            comout2=linkage(disty,method='complete',optimal_ordering=True)
            cutree2 = cluster.hierarchy.cut_tree(comout2,height=0.1251)
            for groupy in range(max_nested(cutree2)+1):
                indsnow=[k for k,val in enumerate(cutree2) if val[0]==groupy]
                molgroup=[]
                for ind in indsnow:
                    molgroup.append(mols[ind][1])  
                print('-----')
        
    return cplist, cluster_ligand_dict, n_clusters

In [33]:
# identify unique clusters - bottom to top of dendrogram (largest unique within size range)

def unique_clusters(qvec,cpdict):
    cpfinal = []

    for q in qvec:
        if len(cpdict[q])>0:
            for check_cluster in cpdict[q]:
                if len(cpfinal)>0:
                    unique=1
                    cpfinal_add = []
                    for final_cluster in cpfinal:
                        if set(final_cluster).issubset(set(check_cluster))==True or set(final_cluster)==set(check_cluster):
                            cpfinal.remove(final_cluster)
                            if check_cluster not in cpfinal_add and check_cluster not in cpfinal:
                                cpfinal_add.append(check_cluster)
                            unique=0
                        
                    if len(cpfinal_add)>0:
                        cpfinal.extend(cpfinal_add)   
                    
                    if unique==1 and check_cluster not in cpfinal: 
                        cpfinal.append(check_cluster)
                elif len(cpfinal)==0:
                    cpfinal.append(check_cluster)
    
    return cpfinal



In [34]:
# Save final clusters to output file

def save_final_clusters(prtn,final_clusters,directory): 

    try:
        os.system('mkdir '+directory)
    except:
        pass
    f = open(directory+'/clusters_'+prtn+'.txt','w')
    for i in range(1,len(final_clusters)+1):
        f.write('%d:\t' % i)
        for item in final_clusters[i-1]:
            f.write(str(item)+',')     
        f.write('\n')   
    f.close()
    
    f = open(directory+'/clusters_'+prtn+'_formatted.txt','w')
    for i in range(1,len(final_clusters)+1):
        f.write('%d:\t' % i)
        for item in final_clusters[i-1]:
            f.write(str(item[0:-2])+', ')     
        f.write('\n')   
    f.close()
    
    return
        

In [35]:
# Renumber clusters for current date for nsp12 and Spike

def renumber_final_clusters(prtn,final_clusters_temp): 
    
    if prtn=='nsp12':
        final_clusters_renum = []
        final_clusters_renum.append(final_clusters_temp[3])
        final_clusters_renum.extend(final_clusters_temp[0:3])
    elif prtn=='S':
        final_clusters_renum = []
        final_clusters_renum.extend(final_clusters_temp[0:2])
        final_clusters_renum.append(final_clusters_temp[3])
        final_clusters_renum.append(final_clusters_temp[2])
        final_clusters_renum.extend(final_clusters_temp[4:])
    
    return final_clusters_renum
        

In [36]:
# make dictionary with keys = residues, values = ligands they bind
# ligands filtered by SMILES string and percentage of residues they bind

def reslig_dict(prtn,filename):

    residue_list = []
    ligand_list = []
    res_lig_dict = {}
    
    file = open(filename,'r')
    line_list = file.readlines()

    for line in line_list:        
        # viral protein
        if line.split()[0].split('.')[0].split('_')[0]=='nCoV':
            protein = line.split()[0].split('.')[0].split('_')[1]
            if protein=='Spike':
                protein = 'S'
                    
        if protein==prtn:
            # ligand
            ligand = line.split()[0].split('.')[6]
            
            if ligand not in ligs_leaveout[prtn]: 
            #if ligand not in all_ligs_remove:
                if ligand not in ligand_list:
                    ligand_list.append(ligand)

                # residues
                binding_residues = line.split()[-1].split(',')
                del binding_residues[-1]
    
                for residue in binding_residues:
                    if residue not in residue_list:
                        residue_list.append(residue)
                    if residue in res_lig_dict:
                        if ligand not in res_lig_dict[residue]:
                            res_lig_dict[residue].append(ligand)
                    elif residue not in res_lig_dict:
                        res_lig_dict[residue] = [ligand]
            
    file.close()
        
    residue_list.sort()
    ligand_list.sort()

    return res_lig_dict


In [37]:
# make dictionary with ligands that bind to residues in each cluster

def cluster_dict(final_clusters,reslig_dict):
    cluster_dict = {}
    
    clind = 1
    for clust in final_clusters:  
        cluster_dict[clind] = {}
        cluster_dict[clind]['residues'] = clust
        cluster_ligand_list = []
        
        for res in clust:
            n_res = 0
            
            for lig in reslig_dict[str(res)]:
                n_present = 0
                liginlist = 0
                # calculate fraction of residues in cluster to which ligand binds
                for res1 in clust:
                    if lig in reslig_dict[str(res1)]:
                        n_present = n_present + 1
                                
                lig_present_frac = float(n_present)/float(len(clust))
                
                if ((lig,lig_present_frac)) not in cluster_ligand_list:
                    cluster_ligand_list.append((lig,lig_present_frac))
                 
        cluster_ligand_list_sorted = sorted(cluster_ligand_list, key=lambda x: x[1], reverse=True)
        cluster_dict[clind]['ligands'] = cluster_ligand_list_sorted
        clind = clind+1
    
    return cluster_dict


In [38]:
# Save final ligands to output file
# Filter ligands by SMILES strings and percentage of residues they bind

def save_final_ligands(prtn,cluster_dict,directory): 

    try:
        os.system('mkdir '+directory)
    except:
        pass
    f = open(directory+'/ligands_'+prtn+'.txt','w')
    for key,value in cluster_dict.items():
        f.write('%d:\t' % key)
        for item in value['ligands']:
            if item[0] not in ligs_leaveout[prtn]:
                f.write(str(item)+',')     
        f.write('\n')   
    f.close()
    
    return
        

In [39]:
# bar plot of consensus cluster info
# Filter ligands by SMILES strings and percentage of residues they bind

def pocket_info_plot(prot,clust_dict,lig_bound_frac):
    clsize = []
    nligs = []
    labels = []
    
    for key,value in clust_dict.items():
        clsize.append(len(clust_dict[key]['residues']))
        labels.append(str(key))
        
        lig_count = []
        for lig in clust_dict[key]['ligands']:
            if lig[1]>=lig_bound_frac and lig[0] not in ligs_leaveout[prot]:
                lig_count.append(lig[0])
                
        nligs.append(len(lig_count))

    if len(labels)<13:
        x = 1.25*np.arange(1,13+1)
        diff = 13-len(labels)
        for i in range(0,diff):
            clsize.append(0)
            nligs.append(0)
    elif len(labels)==13:
        x = 1.25*np.arange(1,len(labels)+1)
    
    width = 0.4  # width of bars
 
    fig = plt.figure()
    ax = fig.add_subplot(111) # Create matplotlib axes
    ax2 = ax.twinx() # create another set of axes that shares the same x-axis as ax
    
    rects1 = ax.bar(x - width/2, clsize, width, label='Residues', color='tab:blue')
    rects2 = ax2.bar(x + width/2, nligs, width, label='Ligands', color='tab:orange')

    ax.set_ylabel('Number of residues',fontsize=15)
    ax.set_ylim([0,45])
    ax2.set_ylabel('Number of ligands',fontsize=15)
    ax2.set_ylim([0,250])
    ax.yaxis.label.set_color('tab:blue')
    ax2.yaxis.label.set_color('tab:orange')
    ax.spines['left'].set_color('tab:blue')
    ax2.spines['right'].set_color('tab:orange')
    ax.tick_params(axis='y', colors='tab:blue')
    ax2.tick_params(axis='y', colors='tab:orange')
    ax.set_xlabel('Pocket',fontsize=15)
    ax.set_title(prot,fontsize=15)  
    ax.set_xticks(x)
    ax.set_xticklabels(labels,fontsize=13)
    ax.set_yticks([0,5,10,15,20,25,30,35,40,45])
    ax.set_yticklabels(['0','5','10','15','20','25','30','35','40','45'],fontsize=13)
    ax2.set_yticks([0,25,50,75,100,125,150,175,200,225,250])
    ax2.set_yticklabels(['0','25','50','75','100','125','150','175','200','225','250'],fontsize=13)
    plt.show()
    #plt.savefig('figures/residue_clusters_bar_chart_'+prot+'.png')

       
    return 

In [None]:
# Run clustering on viral proteins 

maindirectory = 'cluster-output-ncov-residues-shortestpath-CCC-'+Nccut+'-10-'+gdccut+'-'+N4cut+'-'+clcut+'.ligs_'+ligsizecut

try:
    os.system('mkdir '+maindirectory)
except:
    pass

directory = 'cluster-output-ncov-residues-shortestpath-CCC-'+Nccut+'-10-'+gdccut+'-'+N4cut+'-'+clcut+'.ligs_'+ligsizecut+'/date_'+datecut+'_res'+resolutioncut

filename = './CCC.confidence_centroid_contacts.'+Nccut+'_10_'+gdccut+'_'+N4cut+'_'+clcut+'.ligs_'+ligsizecut+'.nCoV.'+datecut+'.res'+resolutioncut

cpall=defaultdict(list)
cldict={}
for protnow in ['nsp12','S','nsp5','nsp3','ORF3a','nsp13','nsp14','nsp15','nsp16','nsp9']:
    print(protnow)
    Q=dict(shortest_paths.shortest_path_length(H_all[protnow],weight='invweight'))
    contall=nx.get_node_attributes(H_all[protnow],'contacts')
    cont_thresh=1
    ordered_list_of_res=sorted([x for x in Q.keys() if contall[x]>cont_thresh],key=lambda qk: int(qk[1:-2]))
    pdistmat=[]
    for k,res in enumerate(ordered_list_of_res):
        pdistmat.extend([Q[res][ordered_list_of_res[x]] if ordered_list_of_res[x] in Q[res] else 1 for x in range(k+1,len(ordered_list_of_res))])
    
    try:
        comout=linkage(pdistmat,method='complete',optimal_ordering=True)
        
        #plt.figure(figsize=(100,25))
        #dendrogram(comout,labels=ordered_list_of_res,leaf_font_size=10)
        #plt.title(protnow)
    
        mrc = res_contacts(protnow,filename)
        min_cp_size=10
        
        max_cp_size=mrc[1]
        
        print('min',mrc[0],'max',mrc[1])
        cluster_ligand_dict={}
        cpdict={}
        n_clusters=100
        q=0.001
        qvec = []

        while n_clusters > 1:
            crc=cut_res_clust(protnow,comout,ordered_list_of_res,q,H_all,cluster_ligand_dict,min_cp_size,max_cp_size,all_contacts)
            cplist=crc[0]
            cpdict[q]=crc[0]
            cpall[protnow].append(cplist)
            cluster_ligand_dict=crc[1]
            n_clusters=crc[2]
            qvec.append(q)
            q=q+0.001
        del qvec[-1]
    
        final_clusters_temp=unique_clusters(qvec,cpdict)
        
        if datecut=='current' and (protnow=='nsp12' or protnow=='S'):
            final_clusters = renumber_final_clusters(protnow,final_clusters_temp)
        else:
            final_clusters = final_clusters_temp
                    
        save_final_clusters(protnow,final_clusters,directory)
    
        rldict = reslig_dict(protnow,filename)
        cldict[protnow] = cluster_dict(final_clusters,rldict)
    
        save_final_ligands(protnow,cldict[protnow],directory)
        lig_bound_frac = 0.5
        pocket_info_plot(protnow,cldict[protnow],lig_bound_frac)
        
        pickle.dump(cldict,open(directory+'/cldict.p', 'wb'))
    
    except ValueError:
        print('Empty distance matrix')
    
    print('---:::::---:::::::---')
    
    