In [1]:
import os, time, pickle, sys, math
import numpy as np
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
from pymol import cmd
from sklearn.metrics import pairwise_distances
from sklearn.cluster import AgglomerativeClustering
import pymesh

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import DataStructs
from rdkit.ML.Cluster import Butina
from scipy.cluster.hierarchy import fcluster, linkage, single
from scipy.spatial.distance import pdist

In [2]:
import pymol

### Step 0: load PDBbind data

In [3]:
pdbbind_table = pd.read_csv('index/INDEX_general_PL_name.2020', sep='  ', skiprows=6, header=None)
pdbbind_table.columns = ['pdbid', 'year', 'uniprot', 'name']
pdbbind_table.head(3)

  """Entry point for launching an IPython kernel.


Unnamed: 0,pdbid,year,uniprot,name
0,6mu1,2018,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1"
1,3t8s,2011,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1"
2,1n4k,2002,P11881,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1"


### get protein sequence

In [4]:
def get_seq(pdbid, chains):
    cmd.reinitialize()
    cmd.load("../outputPDB/01-benchmark_pdbs/{}_{}.pdb".format(pdbid, chains))
    cmd.load("../PDBbind2020/pdbbind/{}/{}_ligand.sdf".format(pdbid, pdbid))
    cmd.select('seq', '{}_ligand expand 8 and {}_{}'.format(pdbid, pdbid, chains))
    dict_info = defaultdict(lambda :0)
    cmd.iterate('seq', 'dict_info[chain] += 1', space=locals())
    dict_info = dict(dict_info)
    
    fastastr = cmd.get_fastastr('all')
    dict_seq = {}
    name = ""
    seq = ""
    for line in fastastr.split('\n'):
        if len(line) == 0:
            continue
        if line[0] == '>':
            if name != "":
                dict_seq[name] = seq
            name = line[1:].strip()
            seq = ""
        else:
            seq += line
    dict_seq[name] = seq
    
    list_remove = []
    for chain in dict_info:
        name = '{}_{}_{}'.format(pdbid, chains, chain)
        seq = dict_seq[name]
        
        if (seq.count("?") / len(seq)) > 0.50:
            dict_seq.pop(name)
            list_remove.append(chain)
    for chain in list_remove:
        dict_info.pop(chain)
        
    chain = ""
    max_value = 0
    for key, value in dict_info.items():
        if value > max_value:
            chain = key
            max_value = value
            
    name = '{}_{}_{}'.format(pdbid, chains, chain)
    if name in dict_seq:
        return dict_seq[name]
    else:
        return None

In [5]:
with open('../list_task', 'rb') as f:
    list_task = pickle.load(f)
list_task[:10]

['3ehx_A -2.292_-4.242_-5.148',
 '1zz2_A 33.006_74.208_18.441',
 '5m6m_A -14.204_-18.755_-5.404',
 '3ds6_A 31.704_26.103_62.549',
 '2wks_ABCDEF 3.953_14.697_-22.198',
 '4mho_A 35.105_24.185_31.026',
 '3t6y_ABCF -7.532_26.641_-3.662',
 '5ito_A 19.472_-7.039_-5.581',
 '5fnq_A 17.216_64.595_27.844',
 '1gi4_A 42.854_-2.139_29.450']

In [6]:
dict_seq = {}
for i, item in enumerate(list_task):
    print(i, '\r', end="")
    pdbid_chains, x = item.split(' ')
    pdbid, chains = pdbid_chains.split('_')
    try:
        seq = get_seq(pdbid, chains).replace('?', 'X')
        if seq is None:
            continue
        dict_seq[pdbid] = seq
    except:
        pass

 PyMOL not running, entering library mode (experimental)
19442 

In [7]:
len(dict_seq)

19393

In [8]:
pickle.dump(dict_seq, open('dict_seq', 'wb'))

In [9]:
dict_seq = pickle.load(open('dict_seq', 'rb'))

In [10]:
seq_len = np.array([len(seq) for pdbid,seq in dict_seq.items()])
seq_X = np.array([seq.count("X") for pdbid,seq in dict_seq.items()])
unique_seq, unique_idx = np.unique(list(dict_seq.values()), return_inverse=True)
print(len(unique_seq), len(unique_idx))
seq_2_id = {seq:i for i, seq in enumerate(unique_seq)}

12097 19393


In [14]:
print('min len', seq_len.min())
print('most X', (seq_X / seq_len).max())

min len 24
most X 0.1390728476821192


In [12]:
np.array(list(dict_seq.keys()))[(seq_X / seq_len) > 0.1]

array(['6d6t'], dtype='<U4')

In [13]:
unique_seq[np.array([(seq.count('X') / len(seq)) > 0.1 for seq in unique_seq])]

array(['GDVTVILNNLLEGYDNKLRPDIGVKPTLIHTDMYVNSIGPVNAINMEYTIDIFFAQTWYDRRLKFNSTIKVLRLNSNMVGKIWIPDTFFRNSKKADAHWITTPNRMLRIWNDGRVLYTLRLTIDAECQLQLHNDEHSCPLEFSSYGYPREEIVYQWKRSSVEVGDTRSWRLYQFSFVGLRNTTEVVKTTSGDYVVMSVYFDLSRRMGYFTIQTYIPCTLIVVLSWVSFWINKDAVPARTSLGITTVLTMTTLSTIARKSLXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'],
      dtype='<U1718')

In [76]:
# np.array(list(dict_seq.keys()))[np.array(list(dict_seq.values())) == 'GDQNATXG']

In [17]:
12097  / 350

34.56285714285714

In [18]:
fw = open('./intermediate_outputs/represent_pocket_seqs.fasta', 'w')
for i, seq in enumerate(unique_seq):
    fw.write('>'+str(i)+'\n'+seq+'\n')
fw.close()

In [19]:
list_f = []
for l in range(35):
    list_f.append(open('./intermediate_outputs/represent_pocket_seqs_{}.fasta'.format(l), 'w'))
for i, seq in enumerate(unique_seq):
    l = i // 350
    list_f[l].write('>'+str(i)+'\n'+seq+'\n')
for f in list_f:
    f.close()

In [20]:
f = open('sh05_align.sh', 'w')
f.write('L=$1\ntouch ./tmp_sh/sh05_running_$L\n\
export LD_LIBRARY_PATH=$PWD/smith-waterman-src/:$LD_LIBRARY_PATH\n\
./smith-waterman-src/ssw_test -p ./intermediate_outputs/represent_pocket_seqs_$L.fasta ./intermediate_outputs/represent_pocket_seqs.fasta > ./intermediate_outputs/align_$L.out\n\
rm  ./tmp_sh/sh05_running_$L\n')
f.close()

In [21]:
for l in range(35):
    os.system('sh ./sh05_align.sh {} &'.format(l))

# Wait

In [22]:
os.system('cat ./intermediate_outputs/align_* > ./intermediate_outputs/align.out')

0

In [23]:
def check_symmetric(a, tol=1e-3):
    return np.allclose(a, a.T, atol=tol)

def get_protein_vocabulary_dict(fasta_name):
    protein_vocabulary_dict = {}
    f = open(fasta_name)
    i = 0
    for line in f.readlines():
        if line[0] == '>':
            protein_vocabulary_dict[line[1:-1]] = i
            i += 1
    f.close()
    print('protein_vocabulary_dict', len(protein_vocabulary_dict))
    return protein_vocabulary_dict

def sim_aquire(target_name, query_name, output_name):
    '''without normalization'''
    target_dict = get_protein_vocabulary_dict(target_name)
    query_dict = get_protein_vocabulary_dict(query_name)
    p_simi = np.zeros((len(target_dict), len(query_dict)))
    
    # parse results
    count = 0
    with open(output_name, 'r') as f:
        line = f.readline()
        while line:
            count += 1
            if count %100000 == 0:
                print('processed lines', count // 10000, '\r', end='')
            a = line.strip('\n').split(' ')[-1]
            b = f.readline().strip('\n').split(' ')[-1]
            c = float(int(f.readline().strip('\n').split( )[1]))
            d = f.readline()
            p_simi[target_dict[a], query_dict[b]] = c
            line = f.readline()
    
    assert check_symmetric(p_simi)
    
    
#     f = open(output_name)
#     lines = f.readlines()
#     f.close()
#     print('total lines', len(lines))
#     for i in range(0,len(lines),4):
#         try:
#             a = lines[i].strip('\n').split(' ')[-1]
#             b = lines[i+1].strip('\n').split(' ')[-1]
#             c = float(int(lines[i+2].strip('\n').split( )[1]))
#             p_simi[target_dict[a], query_dict[b]] = c
#         except:
#             print('wrong', i, a, b, c)
#             print('wrong', i, a, b, c, target_dict[a], query_dict[b])
#             xxx
#     assert check_symmetric(p_simi)
    
    # normalize
    for i in range(p_simi.shape[0]):
        for j in range(p_simi.shape[0]):
            if i == j:
                continue
            p_simi[i,j] = p_simi[i,j] / (float(np.sqrt(p_simi[i,i])*np.sqrt(p_simi[j,j]))+1e-12)
    for i in range(len(p_simi)):
        p_simi[i,i] = p_simi[i,i] / float(np.sqrt(p_simi[i,i])*np.sqrt(p_simi[i,i]))
    print('p_simi', p_simi.shape)
    assert check_symmetric(p_simi)
    
    target_list = ['']*len(target_dict)
    for pid, idx in target_dict.items():
        target_list[idx] = pid
    print('target_list', len(target_list))
    
    query_list = ['']*len(query_dict)
    for pid, idx in query_dict.items():
        query_list[idx] = pid
    print('query_list', len(query_list))
    assert target_list == query_list
    return p_simi, target_list

prot_sim_mat, pid_list = sim_aquire('./intermediate_outputs/represent_pocket_seqs.fasta', 
                                    './intermediate_outputs/represent_pocket_seqs.fasta',
                                    './intermediate_outputs/align.out')
prot_sim_mat[np.where(prot_sim_mat>1)] = 1

protein_vocabulary_dict 12097
protein_vocabulary_dict 12097
p_simi (12097, 12097) 0 1140   3350      7710  7970    9910   13570  
target_list 12097
query_list 12097


In [24]:
def protein_clustering(pid_list, sim_mat):
    cluster_dict = {}
    print('start protein clustering...')
    P_dist = []
    for i in range(sim_mat.shape[0]):
        P_dist += (1-sim_mat[i,(i+1):]).tolist()
    P_dist = np.array(P_dist)
    P_link = single(P_dist)
    for thre in [0.3, 0.4, 0.5, 0.6]:
        P_clusters = fcluster(P_link, thre, 'distance')
        len_list = []
        for i in range(1,max(P_clusters)+1):
            len_list.append(P_clusters.tolist().count(i))
        print('thre', thre, 'total num of proteins', len(pid_list), 'num of clusters', max(P_clusters), 'max length', max(len_list))
        P_cluster_dict = {pid_list[i]:P_clusters[i] for i in range(len(pid_list))}
        cluster_dict[thre] = P_cluster_dict
    return cluster_dict

p_cluster_dict = protein_clustering(pid_list, prot_sim_mat)

start protein clustering...
thre 0.3 total num of proteins 12097 num of clusters 3225 max length 202
thre 0.4 total num of proteins 12097 num of clusters 2977 max length 206
thre 0.5 total num of proteins 12097 num of clusters 2698 max length 256
thre 0.6 total num of proteins 12097 num of clusters 2274 max length 786


In [25]:
for thre in [0.3, 0.4, 0.5, 0.6]:
    count_error = 0
    cluster_list = []
    for i in pdbbind_table.index:
        pdbid = pdbbind_table.loc[i, 'pdbid']
        try:
            seq = dict_seq[pdbid]
            pid = str(seq_2_id[seq])
            cluster_list.append(p_cluster_dict[thre][pid])
        except:
            print('no sequence error:', pdbid)
            count_error += 1
            cluster_list.append(max(p_cluster_dict[thre].values())+1)
    print('cluster_list', thre, len(cluster_list))
    print('count_error', count_error)
    pdbbind_table['p_group_'+str(thre)] = cluster_list
pdbbind_table[:3]

no sequence error: 6mu1
no sequence error: 4qsh
no sequence error: 1bxr
no sequence error: 3t09
no sequence error: 5yij
no sequence error: 2z5o
no sequence error: 6mck
no sequence error: 2f2h
no sequence error: 2x6k
no sequence error: 3buw
no sequence error: 2x6x
no sequence error: 6gg4
no sequence error: 4o2a
no sequence error: 1p0y
no sequence error: 4u91
no sequence error: 5bjt
no sequence error: 3m17
no sequence error: 5n70
no sequence error: 3bux
no sequence error: 3bun
no sequence error: 3bum
no sequence error: 4j8g
no sequence error: 4z90
no sequence error: 6hv7
no sequence error: 6hvw
no sequence error: 4qls
no sequence error: 5v13
no sequence error: 4uu7
no sequence error: 5mlw
no sequence error: 5mlo
no sequence error: 5trr
no sequence error: 5ts0
no sequence error: 5try
no sequence error: 5trg
no sequence error: 6ocz
no sequence error: 4bv2
no sequence error: 2hug
no sequence error: 4u2w
no sequence error: 5fsb
no sequence error: 5fsc
no sequence error: 2r1w
no sequence erro

Unnamed: 0,pdbid,year,uniprot,name,p_group_0.3,p_group_0.4,p_group_0.5,p_group_0.6
0,6mu1,2018,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",3226,2978,2699,2275
1,3t8s,2011,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",152,141,133,114
2,1n4k,2002,P11881,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",151,141,133,114


In [26]:
pdbbind_table.shape

(19443, 8)

In [27]:
pdbbind_table.to_csv('./intermediate_outputs/pdbbind_table_1.1.tsv', sep='\t', index=None)

### get mol

In [28]:
def get_mol(pdbid):

    sdf_path = "../PDBbind2020/pdbbind/{}/{}_ligand.sdf".format(pdbid, pdbid)
    mol2_path = "../PDBbind2020/pdbbind/{}/{}_ligand.mol2".format(pdbid, pdbid)
    
    if os.path.exists(sdf_path):
        mol = Chem.MolFromMol2File(mol2_path)
        if mol is None:
            mol = Chem.SDMolSupplier(sdf_path)[0]
        if mol is None:
            mol = Chem.MolFromMol2File(mol2_path, sanitize=False)
        if mol is None:
            mol = Chem.SDMolSupplier(sdf_path, sanitize=False)[0]
    else:
        mol = None
    return mol

In [29]:
success_list = []
failed_list = []
mol_dict = {}
for i in pdbbind_table.index:
    pdbid = pdbbind_table.loc[i, 'pdbid']
    mol = get_mol(pdbid)
#     if mol is None:
#         break
    
    if mol is not None:
        success_list.append(i)
        mol_dict[pdbid] = mol
    else:
        failed_list.append(i)

RDKit ERROR: [23:38:55] Explicit valence for atom # 25 C, 5, is greater than permitted
RDKit ERROR: [23:38:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 7 8 9 10 11 12 14 15 16
RDKit ERROR: 
RDKit ERROR: [23:38:55] ERROR: Could not sanitize molecule ending on line 140
RDKit ERROR: [23:38:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 7 8 9 10 11 12 14 15 16
RDKit ERROR: 
RDKit ERROR: [23:38:55] Can't kekulize mol.  Unkekulized atoms: 36 37 41 42 43
RDKit ERROR: 
RDKit ERROR: [23:38:55] ERROR: Could not sanitize molecule ending on line 153
RDKit ERROR: [23:38:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 36 37 41 42 43
RDKit ERROR: 
RDKit ERROR: [23:38:55] Can't kekulize mol.  Unkekulized atoms: 35 36 37 38 39
RDKit ERROR: 
RDKit ERROR: [23:38:55] ERROR: Could not sanitize molecule ending on line 383
RDKit ERROR: [23:38:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 35 36 37 38 39
RDKit ERROR: 
RDKit ERROR: [23:38:55] Can't kekulize mol.  Unkekulized a

RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Post-condition Violation
RDKit ERROR: Element 'Du' not found
RDKit ERROR: Violation occurred on line 91 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/PeriodicTable.h
RDKit ERROR: Failed Expression: anum > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:38:55] ERROR: Element 'Du' not found
RDKit ERROR: [23:38:55] ERROR: moving to the beginning of the next molecule
RDKit ERROR: [23:38:55] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Post-condition Violation
RDKit ERROR: Element 'Du' not found
RDKit ERROR: Violation occurred on line 91 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/PeriodicTable.h
RDKit ERROR: Failed Expression: anum > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:38:55] ERROR: Element 'Du' not found
RDKit ERROR: [23:38:55] ERROR: moving to the beginning of the next molecule
RDKit ERROR: [23:38:56] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ER

RDKit ERROR: [23:38:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 2
RDKit ERROR: 
RDKit ERROR: [23:38:56] Can't kekulize mol.  Unkekulized atoms: 3 4 5 15 17
RDKit ERROR: 
RDKit ERROR: [23:38:56] Can't kekulize mol.  Unkekulized atoms: 20 21 22 25 29
RDKit ERROR: 
RDKit ERROR: [23:38:56] ERROR: Could not sanitize molecule ending on line 97
RDKit ERROR: [23:38:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 20 21 22 25 29
RDKit ERROR: 
RDKit ERROR: [23:38:57] Can't kekulize mol.  Unkekulized atoms: 0 1 3 4 5 6 8 9 10
RDKit ERROR: 
RDKit ERROR: [23:38:57] Can't kekulize mol.  Unkekulized atoms: 0 3 4 6 7 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [23:38:57] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:57] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:38:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:57] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERR

RDKit ERROR: [23:38:58] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:58] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:58] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:38:58] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:38:59] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:38:59] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR

RDKit ERROR: [23:38:59] Can't kekulize mol.  Unkekulized atoms: 23 24 25 26 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] ERROR: Could not sanitize molecule ending on line 198
RDKit ERROR: [23:38:59] ERROR: Can't kekulize mol.  Unkekulized atoms: 23 24 25 26 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] Can't kekulize mol.  Unkekulized atoms: 23 24 25 26 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] ERROR: Could not sanitize molecule ending on line 198
RDKit ERROR: [23:38:59] ERROR: Can't kekulize mol.  Unkekulized atoms: 23 24 25 26 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] non-ring atom 0 marked aromatic
RDKit ERROR: [23:38:59] ERROR: Could not sanitize molecule ending on line 170
RDKit ERROR: [23:38:59] ERROR: non-ring atom 0 marked aromatic
RDKit ERROR: [23:38:59] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:38:59] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:38:59] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit

RDKit ERROR: [23:39:00] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 4 5 8 9
RDKit ERROR: 
RDKit ERROR: [23:39:00] Can't kekulize mol.  Unkekulized atoms: 9 10 27 30 32
RDKit ERROR: 
RDKit ERROR: [23:39:00] Can't kekulize mol.  Unkekulized atoms: 36 37 41 42 43
RDKit ERROR: 
RDKit ERROR: [23:39:00] ERROR: Could not sanitize molecule ending on line 153
RDKit ERROR: [23:39:00] ERROR: Can't kekulize mol.  Unkekulized atoms: 36 37 41 42 43
RDKit ERROR: 
RDKit ERROR: [23:39:00] Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
RDKit ERROR: 
RDKit ERROR: [23:39:00] ERROR: Could not sanitize molecule ending on line 81
RDKit ERROR: [23:39:00] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
RDKit ERROR: 
RDKit ERROR: [23:39:00] Can't kekulize mol.  Unkekulized atoms: 10 11 17 18 19 20 21 22 23
RDKit ERROR: 
RDKit ERROR: [23:39:00] ERROR: Could not sanitize molecule ending on line 119
RDKit ERROR: [23:39:00] ERROR: Can't kekulize mol.  Unkekulized atoms: 10 11 17 18 19 20 

RDKit ERROR: [23:39:01] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
RDKit ERROR: 
RDKit ERROR: [23:39:01] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:01] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:39:01] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:01] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:01] ERROR: Could not sanitize molecule ending on line 105
RDKit ERROR: [23:39:01] ERROR: Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:01] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:01] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:39:01] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:01] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERRO

RDKit ERROR: [23:39:02] ERROR: Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16
RDKit ERROR: 
RDKit ERROR: [23:39:02] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:02] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:39:02] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:02] Can't kekulize mol.  Unkekulized atoms: 6 7 8 11 15
RDKit ERROR: 
RDKit ERROR: [23:39:02] ERROR: Could not sanitize molecule ending on line 128
RDKit ERROR: [23:39:02] ERROR: Can't kekulize mol.  Unkekulized atoms: 6 7 8 11 15
RDKit ERROR: 
RDKit ERROR: [23:39:02] Can't kekulize mol.  Unkekulized atoms: 36 37 38 41 45
RDKit ERROR: 
RDKit ERROR: [23:39:02] ERROR: Could not sanitize molecule ending on line 152
RDKit ERROR: [23:39:02] ERROR: Can't kekulize mol.  Unkekulized atoms: 36 37 38 41 45
RDKit ERROR: 
RDKit ERROR: [23:39:02] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
R

RDKit ERROR: [23:39:04] ERROR: Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:04] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:04] ERROR: Could not sanitize molecule ending on line 101
RDKit ERROR: [23:39:04] ERROR: Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:04] Can't kekulize mol.  Unkekulized atoms: 0 2 3 4 5
RDKit ERROR: 
RDKit ERROR: [23:39:04] Can't kekulize mol.  Unkekulized atoms: 0 1 3 4 5
RDKit ERROR: 
RDKit ERROR: [23:39:04] Can't kekulize mol.  Unkekulized atoms: 3 4 6 8 11
RDKit ERROR: 
RDKit ERROR: [23:39:04] ERROR: Could not sanitize molecule ending on line 170
RDKit ERROR: [23:39:04] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 6 8 11
RDKit ERROR: 
RDKit ERROR: [23:39:04] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:04] ERROR: Could not sanitize molecule ending on line 103
RDKit ERROR: [23:39

RDKit ERROR: [23:39:05] ERROR: Can't kekulize mol.  Unkekulized atoms: 27 28 29 30 31
RDKit ERROR: 
RDKit ERROR: [23:39:05] Explicit valence for atom # 1 C, 6, is greater than permitted
RDKit ERROR: [23:39:05] Explicit valence for atom # 0 B, 5, is greater than permitted
RDKit ERROR: [23:39:05] ERROR: Could not sanitize molecule ending on line 149
RDKit ERROR: [23:39:05] ERROR: Explicit valence for atom # 0 B, 5, is greater than permitted
RDKit ERROR: [23:39:05] Explicit valence for atom # 1 C, 6, is greater than permitted
RDKit ERROR: [23:39:05] Explicit valence for atom # 0 B, 5, is greater than permitted
RDKit ERROR: [23:39:05] ERROR: Could not sanitize molecule ending on line 149
RDKit ERROR: [23:39:05] ERROR: Explicit valence for atom # 0 B, 5, is greater than permitted
RDKit ERROR: [23:39:05] Explicit valence for atom # 0 B, 4, is greater than permitted
RDKit ERROR: [23:39:05] ERROR: Could not sanitize molecule ending on line 136
RDKit ERROR: [23:39:05] ERROR: Explicit valence fo

RDKit ERROR: [23:39:06] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 22 25 26 27
RDKit ERROR: 
RDKit ERROR: [23:39:07] Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:07] ERROR: Could not sanitize molecule ending on line 147
RDKit ERROR: [23:39:07] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:07] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:07] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:39:07] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:07] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:07] ERROR: Could not sanitize molecule ending on line 103
RDKit ERROR: [23:39:07] ERROR: Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:07] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Post-condition Vio

RDKit ERROR: [23:39:08] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:08] Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:08] ERROR: Could not sanitize molecule ending on line 147
RDKit ERROR: [23:39:08] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:08] Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:08] ERROR: Could not sanitize molecule ending on line 147
RDKit ERROR: [23:39:08] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:08] Can't kekulize mol.  Unkekulized atoms: 13
RDKit ERROR: 
RDKit ERROR: [23:39:08] ERROR: Could not sanitize molecule ending on line 76
RDKit ERROR: [23:39:08] ERROR: Can't kekulize mol.  Unkekulized atoms: 13
RDKit ERROR: 
RDKit ERROR: [23:39:08] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 5
RDKit ERROR: 
RDKit ERROR: [23:39:08] 

RDKit ERROR: [23:39:08] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:08] Can't kekulize mol.  Unkekulized atoms: 17 18 21 24 28
RDKit ERROR: 
RDKit ERROR: [23:39:08] ERROR: Could not sanitize molecule ending on line 99
RDKit ERROR: [23:39:08] ERROR: Can't kekulize mol.  Unkekulized atoms: 17 18 21 24 28
RDKit ERROR: 
RDKit ERROR: [23:39:08] Can't kekulize mol.  Unkekulized atoms: 21 22 25 28 32
RDKit ERROR: 
RDKit ERROR: [23:39:08] ERROR: Could not sanitize molecule ending on line 109
RDKit ERROR: [23:39:08] ERROR: Can't kekulize mol.  Unkekulized atoms: 21 22 25 28 32
RDKit ERROR: 
RDKit ERROR: [23:39:09] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:09] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:39:09] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:09] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERRO

RDKit ERROR: [23:39:09] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 23
RDKit ERROR: 
RDKit ERROR: [23:39:09] Can't kekulize mol.  Unkekulized atoms: 16 17 18 19 20 21 22 23 24
RDKit ERROR: 
RDKit ERROR: [23:39:09] ERROR: Could not sanitize molecule ending on line 666
RDKit ERROR: [23:39:09] ERROR: Can't kekulize mol.  Unkekulized atoms: 16 17 18 19 20 21 22 23 24
RDKit ERROR: 
RDKit ERROR: [23:39:09] Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:09] ERROR: Could not sanitize molecule ending on line 77
RDKit ERROR: [23:39:09] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:10] Can't kekulize mol.  Unkekulized atoms: 13 14 15 16 17
RDKit ERROR: 
RDKit ERROR: [23:39:10] ERROR: Could not sanitize molecule ending on line 322
RDKit ERROR: [23:39:10] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 16 17
RDKit ERROR: 
RDKit ERROR: [23:39:10] Can't kekulize mol.  Unkekulized atoms: 

RDKit ERROR: [23:39:10] ERROR: Can't kekulize mol.  Unkekulized atoms: 18
RDKit ERROR: 
RDKit ERROR: [23:39:10] Can't kekulize mol.  Unkekulized atoms: 4 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [23:39:10] ERROR: Could not sanitize molecule ending on line 82
RDKit ERROR: [23:39:10] ERROR: Can't kekulize mol.  Unkekulized atoms: 4 8 9 10 11
RDKit ERROR: 
RDKit ERROR: [23:39:10] Can't kekulize mol.  Unkekulized atoms: 14 18 21 22 23
RDKit ERROR: 
RDKit ERROR: [23:39:10] ERROR: Could not sanitize molecule ending on line 90
RDKit ERROR: [23:39:10] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 18 21 22 23
RDKit ERROR: 
RDKit ERROR: [23:39:10] Can't kekulize mol.  Unkekulized atoms: 18 22 25 26 27
RDKit ERROR: 
RDKit ERROR: [23:39:10] ERROR: Could not sanitize molecule ending on line 102
RDKit ERROR: [23:39:10] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 22 25 26 27
RDKit ERROR: 
RDKit ERROR: [23:39:10] Can't kekulize mol.  Unkekulized atoms: 17 18 19 20 24
RDKit ERROR: 
RDKit ERROR: [

RDKit ERROR: [23:39:10] ERROR: Can't kekulize mol.  Unkekulized atoms: 24
RDKit ERROR: 
RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:11] ERROR: Could not sanitize molecule ending on line 91
RDKit ERROR: [23:39:11] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 18 19 20 21 22 23 24 25 26
RDKit ERROR: 
RDKit ERROR: [23:39:11] ERROR: Could not sanitize molecule ending on line 119
RDKit ERROR: [23:39:11] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 21 22 23 24 25 26
RDKit ERROR: 
RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [23:39:11] ERROR: Could not sanitize molecule ending on line 309
RDKit ERROR: [23:39:11] ERROR: Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
RDKit ERROR: 
RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 2 6 7 8 9
RDKit ERROR:

RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:11] ERROR: Could not sanitize molecule ending on line 111
RDKit ERROR: [23:39:11] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
RDKit ERROR: 
RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:11] ERROR: Could not sanitize molecule ending on line 101
RDKit ERROR: [23:39:11] ERROR: Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 16 17 23 24 25
RDKit ERROR: 
RDKit ERROR: [23:39:11] ERROR: Could not sanitize molecule ending on line 100
RDKit ERROR: [23:39:11] ERROR: Can't kekulize mol.  Unkekulized atoms: 16 17 23 24 25
RDKit ERROR: 
RDKit ERROR: [23:39:11] Explicit valence for atom # 18 C, 5, is greater than permitted
RDKit ERROR: [23:39:11] Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit E

RDKit ERROR: [23:39:12] ERROR: Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 12
RDKit ERROR: 
RDKit ERROR: [23:39:12] Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
RDKit ERROR: 
RDKit ERROR: [23:39:12] ERROR: Could not sanitize molecule ending on line 81
RDKit ERROR: [23:39:12] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
RDKit ERROR: 
RDKit ERROR: [23:39:12] Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
RDKit ERROR: 
RDKit ERROR: [23:39:12] ERROR: Could not sanitize molecule ending on line 81
RDKit ERROR: [23:39:12] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
RDKit ERROR: 
RDKit ERROR: [23:39:12] non-ring atom 7 marked aromatic
RDKit ERROR: [23:39:12] ERROR: Could not sanitize molecule ending on line 66
RDKit ERROR: [23:39:12] ERROR: non-ring atom 7 marked aromatic
RDKit ERROR: [23:39:13] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
RDKit ERROR: 
RDKit ERROR: [23:39:13] ERROR: Could not sanitize molecule ending

RDKit ERROR: [23:39:13] ERROR: Can't kekulize mol.  Unkekulized atoms: 15
RDKit ERROR: 
RDKit ERROR: [23:39:13] Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:13] ERROR: Could not sanitize molecule ending on line 76
RDKit ERROR: [23:39:13] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
RDKit ERROR: 
RDKit ERROR: [23:39:13] Can't kekulize mol.  Unkekulized atoms: 154 155 156 157 158 159 160 161 162
RDKit ERROR: 
RDKit ERROR: [23:39:13] ERROR: Could not sanitize molecule ending on line 536
RDKit ERROR: [23:39:13] ERROR: Can't kekulize mol.  Unkekulized atoms: 154 155 156 157 158 159 160 161 162
RDKit ERROR: 
RDKit ERROR: [23:39:13] Can't kekulize mol.  Unkekulized atoms: 154 155 156 157 158 159 160 161 162
RDKit ERROR: 
RDKit ERROR: [23:39:13] ERROR: Could not sanitize molecule ending on line 536
RDKit ERROR: [23:39:13] ERROR: Can't kekulize mol.  Unkekulized atoms: 154 155 156 157 158 159 160 161 162
RDKit ERROR: 
RDKit ERROR: [23

RDKit ERROR: [23:39:14] ERROR: Can't kekulize mol.  Unkekulized atoms: 47 48 49 50 51 52 53 54 56
RDKit ERROR: 
RDKit ERROR: [23:39:14] Can't kekulize mol.  Unkekulized atoms: 15
RDKit ERROR: 
RDKit ERROR: [23:39:14] ERROR: Could not sanitize molecule ending on line 74
RDKit ERROR: [23:39:14] ERROR: Can't kekulize mol.  Unkekulized atoms: 15
RDKit ERROR: 
RDKit ERROR: [23:39:14] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16
RDKit ERROR: 
RDKit ERROR: [23:39:14] ERROR: Could not sanitize molecule ending on line 260
RDKit ERROR: [23:39:14] ERROR: Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16
RDKit ERROR: 
RDKit ERROR: [23:39:14] Can't kekulize mol.  Unkekulized atoms: 3 4 19 20 22
RDKit ERROR: 
RDKit ERROR: [23:39:14] Can't kekulize mol.  Unkekulized atoms: 28
RDKit ERROR: 
RDKit ERROR: [23:39:14] ERROR: Could not sanitize molecule ending on line 111
RDKit ERROR: [23:39:14] ERROR: Can't kekulize mol.  Unkekulized atoms: 28
RDKit ERROR: 
RDKit ERROR: [23:39:15] Can't kek

RDKit ERROR: [23:39:15] ERROR: Can't kekulize mol.  Unkekulized atoms: 87 88 89 90 91
RDKit ERROR: 
RDKit ERROR: [23:39:15] Explicit valence for atom # 6 C, 5, is greater than permitted
RDKit ERROR: [23:39:15] Explicit valence for atom # 6 C, 5, is greater than permitted
RDKit ERROR: [23:39:15] ERROR: Could not sanitize molecule ending on line 300
RDKit ERROR: [23:39:15] ERROR: Explicit valence for atom # 6 C, 5, is greater than permitted
RDKit ERROR: [23:39:15] Explicit valence for atom # 9 C, 5, is greater than permitted
RDKit ERROR: [23:39:15] Explicit valence for atom # 9 C, 5, is greater than permitted
RDKit ERROR: [23:39:15] ERROR: Could not sanitize molecule ending on line 295
RDKit ERROR: [23:39:15] ERROR: Explicit valence for atom # 9 C, 5, is greater than permitted
RDKit ERROR: [23:39:15] Can't kekulize mol.  Unkekulized atoms: 41 42 43 44 45 46 47 48 49
RDKit ERROR: 
RDKit ERROR: [23:39:15] ERROR: Could not sanitize molecule ending on line 228
RDKit ERROR: [23:39:15] ERROR: 

RDKit ERROR: [23:39:15] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
RDKit ERROR: 
RDKit ERROR: [23:39:16] Can't kekulize mol.  Unkekulized atoms: 66 67 68 69 70
RDKit ERROR: 
RDKit ERROR: [23:39:16] ERROR: Could not sanitize molecule ending on line 658


In [30]:
print("success_list", len(success_list), 'failed_list', len(failed_list), 'mol_dict', len(mol_dict))

success_list 19439 failed_list 4 mol_dict 19439


In [143]:
with open('casf_mol_dict', 'wb') as f:
    pickle.dump(mol_dict,f)

In [144]:
print('mol_dict', len(mol_dict))

mol_dict 19439


In [33]:
print("success_list", len(success_list), 'failed_list', len(failed_list), 'mol_dict', len(mol_dict))
pdbbind_table = pdbbind_table.iloc[success_list]
print(pdbbind_table.shape)
pdbbind_table[:3]

success_list 19439 failed_list 4 mol_dict 19439
(19439, 8)


Unnamed: 0,pdbid,year,uniprot,name,p_group_0.3,p_group_0.4,p_group_0.5,p_group_0.6
0,6mu1,2018,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",3226,2978,2699,2275
1,3t8s,2011,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",152,141,133,114
2,1n4k,2002,P11881,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",151,141,133,114


In [34]:
def get_fps(mol_list):
    fps = []
    idx = []
    for i, mol in enumerate(mol_list):
        print('fingerprint:', i, '\r', end='')
        try:
            fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024, useChirality=True)
            fps.append(fp)
            idx.append(True)
        except:
            idx.append(False)
    print('fingerprint list',len(fps))
    return fps, idx


def calculate_sims(fps1,fps2,simtype='tanimoto'):
    sim_mat = np.zeros((len(fps1),len(fps2)),dtype=np.float32)
    for i in range(len(fps1)):
        if i%10000 == 0:
            print('similarity:', i)
        fp_i = fps1[i]
        if simtype == 'tanimoto':
            sims = DataStructs.BulkTanimotoSimilarity(fp_i,fps2)
        elif simtype == 'dice':
            sims = DataStructs.BulkDiceSimilarity(fp_i,fps2)
        sim_mat[i,:] = sims
    print('compound sim mat', sim_mat.shape)
    return sim_mat


def compound_clustering(fps):
    cluster_dict = {}
    print ('start compound clustering...')
    C_dist = pdist(fps, 'jaccard')
    C_link = single(C_dist)
    for thre in [0.3, 0.4, 0.5, 0.6]:
        C_clusters = fcluster(C_link, thre, 'distance')
        len_list = []
        for i in range(1,max(C_clusters)+1):
            len_list.append(C_clusters.tolist().count(i))
        print('thre', thre, 'total num of compounds', len(fps), 'num of clusters', max(C_clusters), 'max length', max(len_list))
        #C_cluster_dict = {cid_list[i]:C_clusters[i] for i in range(len(cid_list))}
        cluster_dict[thre] = C_clusters
    return cluster_dict

In [35]:
mol_list = [mol_dict[pdbid] for pdbid in pdbbind_table['pdbid']]
print('mol_list', len(mol_list))

fps, idx = get_fps(mol_list)
cluster_dict = compound_clustering(fps)

mol_list 19439
fingerprint: 913 

RDKit ERROR: [23:39:16] ERROR: Can't kekulize mol.  Unkekulized atoms: 66 67 68 69 70
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitH

fingerprint: 1701 

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

fingerprint: 1702 fingerprint: 1703 fingerprint: 1704 fingerprint: 1705 fingerprint: 1706 fingerprint: 1707 fingerprint: 1708 fingerprint: 1709 fingerprint: 1710 fingerprint: 1711 fingerprint: 1712 fingerprint: 1713 fingerprint: 1714 fingerprint: 1715 fingerprint: 1716 fingerprint: 1717 fingerprint: 1718 fingerprint: 1719 fingerprint: 1720 fingerprint: 1721 fingerprint: 1722 fingerprint: 1723 fingerprint: 1724 fingerprint: 1725 fingerprint: 1726 fingerprint: 1727 fingerprint: 1728 fingerprint: 1729 fingerprint: 1730 fingerprint: 1731 fingerprint: 1732 fingerprint: 1733 fingerprint: 1734 fingerprint: 1735 fingerprint: 1736 fingerprint: 1737 fingerprint: 1738 fingerprint: 1739 fingerprint: 1740 fingerprint: 1741 fingerprint: 1742 fingerprint: 1743 fingerprint: 1744 fingerprint: 1745 fingerprint: 1746 fingerprint: 1747 fingerprint: 1748 fingerprint: 1749 fingerprint: 1750 fingerprint: 1751 fingerprint: 1752 fingerprint: 1753 fingerprint:

 fingerprint: 2245 fingerprint: 2246 fingerprint: 2247 fingerprint: 2248 fingerprint: 2249 fingerprint: 2250 fingerprint: 2251 fingerprint: 2252 fingerprint: 2253 fingerprint: 2254 fingerprint: 2255 fingerprint: 2256 fingerprint: 2257 fingerprint: 2258 fingerprint: 2259 fingerprint: 2260 fingerprint: 2261 fingerprint: 2262 fingerprint: 2263 fingerprint: 2264 fingerprint: 2265 fingerprint: 2266 fingerprint: 2267 fingerprint: 2268 fingerprint: 2269 fingerprint: 2270 fingerprint: 2271 fingerprint: 2272 fingerprint: 2273 fingerprint: 2274 fingerprint: 2275 fingerprint: 2276 fingerprint: 2277 fingerprint: 2278 fingerprint: 2279 fingerprint: 2280 fingerprint: 2281 fingerprint: 2282 fingerprint: 2283 fingerprint: 2284 fingerprint: 2285 fingerprint: 2286 fingerprint: 2287 fingerprint: 2288 fingerprint: 2289 fingerprint: 2290 fingerprint: 2291 fingerprint: 2292 fingerprint: 2293 fingerprint: 2294 fingerprint: 2295 fingerprint: 2296 fingerprin

fingerprint: 3714 

RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:19] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called wi

 3715 fingerprint: 3716 fingerprint: 3717 fingerprint: 3718 fingerprint: 3719 fingerprint: 3720 fingerprint: 3721 fingerprint: 3722 fingerprint: 3723 fingerprint: 3724 fingerprint: 3725 fingerprint: 3726 fingerprint: 3727 fingerprint: 3728 fingerprint: 3729 fingerprint: 3730 fingerprint: 3731 fingerprint: 3732 fingerprint: 3733 fingerprint: 3734 fingerprint: 3735 fingerprint: 3736 fingerprint: 3737 fingerprint: 3738 fingerprint: 3739 fingerprint: 3740 fingerprint: 3741 fingerprint: 3742 fingerprint: 3743 fingerprint: 3744 fingerprint: 3745 fingerprint: 3746 fingerprint: 3747 fingerprint: 3748 fingerprint: 3749 fingerprint: 3750 fingerprint: 3751 fingerprint: 3752 fingerprint: 3753 fingerprint: 3754 fingerprint: 3755 fingerprint: 3756 fingerprint: 3757 fingerprint: 3758 fingerprint: 3759 fingerprint: 3760 fingerprint: 3761 fingerprint: 3762 fingerprint: 3763 fingerprint: 3764 fingerprint: 3765 fingerprint: 3766 fingerprint: 3767 finge

fingerprint: 4196 fingerprint: 4197 fingerprint: 4198 fingerprint: 4199 fingerprint: 4200 fingerprint: 4201 fingerprint: 4202 fingerprint: 4203 fingerprint: 4204 fingerprint: 4205 fingerprint: 4206 fingerprint: 4207 fingerprint: 4208 fingerprint: 4209 fingerprint: 4210 fingerprint: 4211 fingerprint: 4212 fingerprint: 4213 fingerprint: 4214 fingerprint: 4215 fingerprint: 4216 fingerprint: 4217 fingerprint: 4218 fingerprint: 4219 fingerprint: 4220 fingerprint: 4221 fingerprint: 4222 fingerprint: 4223 fingerprint: 4224 fingerprint: 4225 fingerprint: 4226 fingerprint: 4227 fingerprint: 4228 fingerprint: 4229 fingerprint: 4230 fingerprint: 4231 fingerprint: 4232 fingerprint: 4233 fingerprint: 4234 fingerprint: 4235 fingerprint: 4236 fingerprint: 4237 fingerprint: 4238 fingerprint: 4239 fingerprint: 4240 fingerprint: 4241 fingerprint: 4242 fingerprint: 4243 fingerprint: 4244 fingerprint: 4245 fingerprint: 4246 fingerprint: 4247 fingerprint

RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:20] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:20] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:20] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called wi

 4674 fingerprint: 4675 fingerprint: 4676 fingerprint: 4677 fingerprint: 4678 fingerprint: 4679 fingerprint: 4680 fingerprint: 4681 fingerprint: 4682 fingerprint: 4683 fingerprint: 4684 fingerprint: 4685 fingerprint: 4686 fingerprint: 4687 fingerprint: 4688 fingerprint: 4689 fingerprint: 4690 fingerprint: 4691 fingerprint: 4692 fingerprint: 4693 fingerprint: 4694 fingerprint: 4695 fingerprint: 4696 fingerprint: 4697 fingerprint: 4698 fingerprint: 4699 fingerprint: 4700 fingerprint: 4701 fingerprint: 4702 fingerprint: 4703 fingerprint: 4704 fingerprint: 4705 fingerprint: 4706 fingerprint: 4707 fingerprint: 4708 fingerprint: 4709 fingerprint: 4710 fingerprint: 4711 fingerprint: 4712 fingerprint: 4713 fingerprint: 4714 fingerprint: 4715 fingerprint: 4716 fingerprint: 4717 fingerprint: 4718 fingerprint: 4719 fingerprint: 4720 fingerprint: 4721 fingerprint: 4722 fingerprint: 4723 fingerprint: 4724 fingerprint: 4725 fingerprint: 4726 finge

fingerprint: 5345 fingerprint: 5346 fingerprint: 5347 fingerprint: 5348 fingerprint: 5349 fingerprint: 5350 fingerprint: 5351 fingerprint: 5352 fingerprint: 5353 fingerprint: 5354 fingerprint: 5355 fingerprint: 5356 fingerprint: 5357 fingerprint: 5358 fingerprint: 5359 fingerprint: 5360 fingerprint: 5361 fingerprint: 5362 fingerprint: 5363 fingerprint: 5364 fingerprint: 5365 fingerprint: 5366 fingerprint: 5367 fingerprint: 5368 fingerprint: 5369 fingerprint: 5370 fingerprint: 5371 fingerprint: 5372 fingerprint: 5373 fingerprint: 5374 fingerprint: 5375 fingerprint: 5376 fingerprint: 5377 fingerprint: 5378 fingerprint: 5379 fingerprint: 5380 fingerprint: 5381 fingerprint: 5382 fingerprint: 5383 fingerprint: 5384 fingerprint: 5385 fingerprint: 5386 fingerprint: 5387 fingerprint: 5388 fingerprint: 5389 fingerprint: 5390 fingerprint: 5391 fingerprint: 5392 fingerprint: 5393 fingerprint: 5394 fingerprint: 5395 fingerprint: 5396 fingerprint

 5826 fingerprint: 5827 fingerprint: 5828 fingerprint: 5829 fingerprint: 5830 fingerprint: 5831 fingerprint: 5832 fingerprint: 5833 fingerprint: 5834 fingerprint: 5835 fingerprint: 5836 fingerprint: 5837 fingerprint: 5838 fingerprint: 5839 fingerprint: 5840 fingerprint: 5841 fingerprint: 5842 fingerprint: 5843 fingerprint: 5844 fingerprint: 5845 fingerprint: 5846 fingerprint: 5847 fingerprint: 5848 fingerprint: 5849 fingerprint: 5850 fingerprint: 5851 fingerprint: 5852 fingerprint: 5853 fingerprint: 5854 fingerprint: 5855 fingerprint: 5856 fingerprint: 5857 fingerprint: 5858 fingerprint: 5859 fingerprint: 5860 fingerprint: 5861 fingerprint: 5862 fingerprint: 5863 fingerprint: 5864 fingerprint: 5865 fingerprint: 5866 fingerprint: 5867 fingerprint: 5868 fingerprint: 5869 fingerprint: 5870 fingerprint: 5871 fingerprint: 5872 fingerprint: 5873 fingerprint: 5874 fingerprint: 5875 fingerprint: 5876 fingerprint: 5877 fingerprint: 5878 finge

fingerprint: 6368 fingerprint: 6369 fingerprint: 6370 fingerprint: 6371 fingerprint: 6372 fingerprint: 6373 fingerprint: 6374 fingerprint: 6375 fingerprint: 6376 fingerprint: 6377 fingerprint: 6378 fingerprint: 6379 fingerprint: 6380 fingerprint: 6381 fingerprint: 6382 fingerprint: 6383 fingerprint: 6384 fingerprint: 6385 fingerprint: 6386 fingerprint: 6387 fingerprint: 6388 fingerprint: 6389 fingerprint: 6390 fingerprint: 6391 fingerprint: 6392 fingerprint: 6393 fingerprint: 6394 fingerprint: 6395 fingerprint: 6396 fingerprint: 6397 fingerprint: 6398 fingerprint: 6399 fingerprint: 6400 fingerprint: 6401 fingerprint: 6402 fingerprint: 6403 fingerprint: 6404 fingerprint: 6405 fingerprint: 6406 fingerprint: 6407 fingerprint: 6408 fingerprint: 6409 fingerprint: 6410 fingerprint: 6411 fingerprint: 6412 fingerprint: 6413 fingerprint: 6414 fingerprint: 6415 fingerprint: 6416 fingerprint: 6417 fingerprint: 6418 fingerprint: 6419 fingerprint:

RDKit ERROR: [23:39:21] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:21] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:21] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 1

 6790 fingerprint: 6791 fingerprint: 6792 fingerprint: 6793 fingerprint: 6794 fingerprint: 6795 fingerprint: 6796 fingerprint: 6797 fingerprint: 6798 fingerprint: 6799 fingerprint: 6800 fingerprint: 6801 fingerprint: 6802 fingerprint: 6803 fingerprint: 6804 fingerprint: 6805 fingerprint: 6806 fingerprint: 6807 fingerprint: 6808 fingerprint: 6809 fingerprint: 6810 fingerprint: 6811 fingerprint: 6812 fingerprint: 6813 fingerprint: 6814 fingerprint: 6815 fingerprint: 6816 fingerprint: 6817 fingerprint: 6818 fingerprint: 6819 fingerprint: 6820 fingerprint: 6821 fingerprint: 6822 fingerprint: 6823 fingerprint: 6824 fingerprint: 6825 fingerprint: 6826 fingerprint: 6827 fingerprint: 6828 fingerprint: 6829 fingerprint: 6830 fingerprint: 6831 fingerprint: 6832 fingerprint: 6833 fingerprint: 6834 fingerprint: 6835 fingerprint: 6836 fingerprint: 6837 fingerprint: 6838 fingerprint: 6839 fingerprint: 6840 fingerprint: 6841 fingerprint: 6842 finge

 7356 fingerprint: 7357 fingerprint: 7358 fingerprint: 7359 fingerprint: 7360 fingerprint: 7361 fingerprint: 7362 fingerprint: 7363 fingerprint: 7364 fingerprint: 7365 fingerprint: 7366 fingerprint: 7367 fingerprint: 7368 fingerprint: 7369 fingerprint: 7370 fingerprint: 7371 fingerprint: 7372 fingerprint: 7373 fingerprint: 7374 fingerprint: 7375 fingerprint: 7376 fingerprint: 7377 fingerprint: 7378 fingerprint: 7379 fingerprint: 7380 fingerprint: 7381 fingerprint: 7382 fingerprint: 7383 fingerprint: 7384 fingerprint: 7385 fingerprint: 7386 fingerprint: 7387 fingerprint: 7388 fingerprint: 7389 fingerprint: 7390 fingerprint: 7391 fingerprint: 7392 fingerprint: 7393 fingerprint: 7394 fingerprint: 7395 fingerprint: 7396 fingerprint: 7397 fingerprint: 7398 fingerprint: 7399 fingerprint: 7400 fingerprint: 7401 fingerprint: 7402 fingerprint: 7403 fingerprint: 7404 fingerprint: 7405 fingerprint: 7406 fingerprint: 7407 fingerprint: 7408 finge

RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:21] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:21] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/

 7891 fingerprint: 7892 fingerprint: 7893 fingerprint: 7894 fingerprint: 7895 fingerprint: 7896 fingerprint: 7897 fingerprint: 7898 fingerprint: 7899 fingerprint: 7900 fingerprint: 7901 fingerprint: 7902 fingerprint: 7903 fingerprint: 7904 fingerprint: 7905 fingerprint: 7906 fingerprint: 7907 fingerprint: 7908 fingerprint: 7909 fingerprint: 7910 fingerprint: 7911 fingerprint: 7912 fingerprint: 7913 fingerprint: 7914 fingerprint: 7915 fingerprint: 7916 fingerprint: 7917 fingerprint: 7918 fingerprint: 7919 fingerprint: 7920 fingerprint: 7921 fingerprint: 7922 fingerprint: 7923 fingerprint: 7924 fingerprint: 7925 fingerprint: 7926 fingerprint: 7927 fingerprint: 7928 fingerprint: 7929 fingerprint: 7930 fingerprint: 7931 fingerprint: 7932 fingerprint: 7933 fingerprint: 7934 fingerprint: 7935 fingerprint: 7936 fingerprint: 7937 fingerprint: 7938 fingerprint: 7939 fingerprint: 7940 fingerprint: 7941 fingerprint: 7942 fingerprint: 7943 finge

 8493 fingerprint: 8494 fingerprint: 8495 fingerprint: 8496 fingerprint: 8497 fingerprint: 8498 fingerprint: 8499 fingerprint: 8500 fingerprint: 8501 fingerprint: 8502 fingerprint: 8503 fingerprint: 8504 fingerprint: 8505 fingerprint: 8506 fingerprint: 8507 fingerprint: 8508 fingerprint: 8509 fingerprint: 8510 fingerprint: 8511 fingerprint: 8512 fingerprint: 8513 fingerprint: 8514 fingerprint: 8515 fingerprint: 8516 fingerprint: 8517 fingerprint: 8518 fingerprint: 8519 fingerprint: 8520 fingerprint: 8521 fingerprint: 8522 fingerprint: 8523 fingerprint: 8524 fingerprint: 8525 fingerprint: 8526 fingerprint: 8527 fingerprint: 8528 fingerprint: 8529 fingerprint: 8530 fingerprint: 8531 fingerprint: 8532 fingerprint: 8533 fingerprint: 8534 fingerprint: 8535 fingerprint: 8536 fingerprint: 8537 fingerprint: 8538 fingerprint: 8539 fingerprint: 8540 fingerprint: 8541 fingerprint: 8542 fingerprint: 8543 fingerprint: 8544 fingerprint: 8545 finge

RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:22] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:22] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/

 9088 fingerprint: 9089 fingerprint: 9090 fingerprint: 9091 fingerprint: 9092 fingerprint: 9093 fingerprint: 9094 fingerprint: 9095 fingerprint: 9096 fingerprint: 9097 fingerprint: 9098 fingerprint: 9099 fingerprint: 9100 fingerprint: 9101 fingerprint: 9102 fingerprint: 9103 fingerprint: 9104 fingerprint: 9105 fingerprint: 9106 fingerprint: 9107 fingerprint: 9108 fingerprint: 9109 fingerprint: 9110 fingerprint: 9111 fingerprint: 9112 fingerprint: 9113 fingerprint: 9114 fingerprint: 9115 fingerprint: 9116 fingerprint: 9117 fingerprint: 9118 fingerprint: 9119 fingerprint: 9120 fingerprint: 9121 fingerprint: 9122 fingerprint: 9123 fingerprint: 9124 fingerprint: 9125 fingerprint: 9126 fingerprint: 9127 fingerprint: 9128 fingerprint: 9129 fingerprint: 9130 fingerprint: 9131 fingerprint: 9132 fingerprint: 9133 fingerprint: 9134 fingerprint: 9135 fingerprint: 9136 fingerprint: 9137 fingerprint: 9138 fingerprint: 9139 fingerprint: 9140 finge

 9732 fingerprint: 9733 fingerprint: 9734 fingerprint: 9735 fingerprint: 9736 fingerprint: 9737 fingerprint: 9738 fingerprint: 9739 fingerprint: 9740 fingerprint: 9741 fingerprint: 9742 fingerprint: 9743 fingerprint: 9744 fingerprint: 9745 fingerprint: 9746 fingerprint: 9747 fingerprint: 9748 fingerprint: 9749 fingerprint: 9750 fingerprint: 9751 fingerprint: 9752 fingerprint: 9753 fingerprint: 9754 fingerprint: 9755 fingerprint: 9756 fingerprint: 9757 fingerprint: 9758 fingerprint: 9759 fingerprint: 9760 fingerprint: 9761 fingerprint: 9762 fingerprint: 9763 fingerprint: 9764 fingerprint: 9765 fingerprint: 9766 fingerprint: 9767 fingerprint: 9768 fingerprint: 9769 fingerprint: 9770 fingerprint: 9771 fingerprint: 9772 fingerprint: 9773 fingerprint: 9774 fingerprint: 9775 fingerprint: 9776 fingerprint: 9777 fingerprint: 9778 fingerprint: 9779 fingerprint: 9780 fingerprint: 9781 fingerprint: 9782 fingerprint: 9783 fingerprint: 9784 finge

 fingerprint: 10352 fingerprint: 10353 fingerprint: 10354 fingerprint: 10355 fingerprint: 10356 fingerprint: 10357 fingerprint: 10358 fingerprint: 10359 fingerprint: 10360 fingerprint: 10361 fingerprint: 10362 fingerprint: 10363 fingerprint: 10364 fingerprint: 10365 fingerprint: 10366 fingerprint: 10367 fingerprint: 10368 fingerprint: 10369 fingerprint: 10370 fingerprint: 10371 fingerprint: 10372 fingerprint: 10373 fingerprint: 10374 fingerprint: 10375 fingerprint: 10376 fingerprint: 10377 fingerprint: 10378 fingerprint: 10379 fingerprint: 10380 fingerprint: 10381 fingerprint: 10382 fingerprint: 10383 fingerprint: 10384 fingerprint: 10385 fingerprint: 10386 fingerprint: 10387 fingerprint: 10388 fingerprint: 10389 fingerprint: 10390 fingerprint: 10391 fingerprint: 10392 fingerprint: 10393 fingerprint: 10394 fingerprint: 10395 fingerprint: 10396 fingerprint: 10397 fingerprint: 10398 fingerprint: 10399 fingerprint: 10400 fingerprint: 10401

RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:22] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:22] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: F

fingerprint: 10969 fingerprint: 10970 fingerprint: 10971 fingerprint: 10972 fingerprint: 10973 fingerprint: 10974 fingerprint: 10975 fingerprint: 10976 fingerprint: 10977 fingerprint: 10978 fingerprint: 10979 fingerprint: 10980 fingerprint: 10981 fingerprint: 10982 fingerprint: 10983 fingerprint: 10984 fingerprint: 10985 fingerprint: 10986 fingerprint: 10987 fingerprint: 10988 fingerprint: 10989 fingerprint: 10990 fingerprint: 10991 fingerprint: 10992 fingerprint: 10993 fingerprint: 10994 fingerprint: 10995 fingerprint: 10996 fingerprint: 10997 fingerprint: 10998 fingerprint: 10999 fingerprint: 11000 fingerprint: 11001 fingerprint: 11002 fingerprint: 11003 fingerprint: 11004 fingerprint: 11005 fingerprint: 11006 fingerprint: 11007 fingerprint: 11008 fingerprint: 11009 fingerprint: 11010 fingerprint: 11011 fingerprint: 11012 fingerprint: 11013 fingerprint: 11014 fingerprint: 11015 fingerprint: 11016 fingerprint: 11017 fingerprint: 11018 

 11515 fingerprint: 11516 fingerprint: 11517 fingerprint: 11518 fingerprint: 11519 fingerprint: 11520 fingerprint: 11521 fingerprint: 11522 fingerprint: 11523 fingerprint: 11524 fingerprint: 11525 fingerprint: 11526 fingerprint: 11527 fingerprint: 11528 fingerprint: 11529 fingerprint: 11530 fingerprint: 11531 fingerprint: 11532 fingerprint: 11533 fingerprint: 11534 fingerprint: 11535 fingerprint: 11536 fingerprint: 11537 fingerprint: 11538 fingerprint: 11539 fingerprint: 11540 fingerprint: 11541 fingerprint: 11542 fingerprint: 11543 fingerprint: 11544 fingerprint: 11545 fingerprint: 11546 fingerprint: 11547 fingerprint: 11548 fingerprint: 11549 fingerprint: 11550 fingerprint: 11551 fingerprint: 11552 fingerprint: 11553 fingerprint: 11554 fingerprint: 11555 fingerprint: 11556 fingerprint: 11557 fingerprint: 11558 fingerprint: 11559 fingerprint: 11560 fingerprint: 11561 fingerprint: 11562 fingerprint: 11563 fingerprint: 11564 fingerprint:

RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:23] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:23] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: F

12140 fingerprint: 12141 fingerprint: 12142 fingerprint: 12143 fingerprint: 12144 fingerprint: 12145 fingerprint: 12146 fingerprint: 12147 fingerprint: 12148 fingerprint: 12149 fingerprint: 12150 fingerprint: 12151 fingerprint: 12152 fingerprint: 12153 fingerprint: 12154 fingerprint: 12155 fingerprint: 12156 fingerprint: 12157 fingerprint: 12158 fingerprint: 12159 fingerprint: 12160 fingerprint: 12161 fingerprint: 12162 fingerprint: 12163 fingerprint: 12164 fingerprint: 12165 fingerprint: 12166 fingerprint: 12167 fingerprint: 12168 fingerprint: 12169 fingerprint: 12170 fingerprint: 12171 fingerprint: 12172 fingerprint: 12173 fingerprint: 12174 fingerprint: 12175 fingerprint: 12176 fingerprint: 12177 fingerprint: 12178 fingerprint: 12179 fingerprint: 12180 fingerprint: 12181 fingerprint: 12182 fingerprint: 12183 fingerprint: 12184 fingerprint: 12185 fingerprint: 12186 fingerprint: 12187 fingerprint: 12188 fingerprint: 12189 fingerprint: 

RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:23] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:23] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: F

 12780 fingerprint: 12781 fingerprint: 12782 fingerprint: 12783 fingerprint: 12784 fingerprint: 12785 fingerprint: 12786 fingerprint: 12787 fingerprint: 12788 fingerprint: 12789 fingerprint: 12790 fingerprint: 12791 fingerprint: 12792 fingerprint: 12793 fingerprint: 12794 fingerprint: 12795 fingerprint: 12796 fingerprint: 12797 fingerprint: 12798 fingerprint: 12799 fingerprint: 12800 fingerprint: 12801 fingerprint: 12802 fingerprint: 12803 fingerprint: 12804 fingerprint: 12805 fingerprint: 12806 fingerprint: 12807 fingerprint: 12808 fingerprint: 12809 fingerprint: 12810 fingerprint: 12811 fingerprint: 12812 fingerprint: 12813 fingerprint: 12814 fingerprint: 12815 fingerprint: 12816 fingerprint: 12817 fingerprint: 12818 fingerprint: 12819 fingerprint: 12820 fingerprint: 12821 fingerprint: 12822 fingerprint: 12823 fingerprint: 12824 fingerprint: 12825 fingerprint: 12826 fingerprint: 12827 fingerprint: 12828 fingerprint: 12829 fingerprint:

fingerprint: 13375 fingerprint: 13376 fingerprint: 13377 fingerprint: 13378 fingerprint: 13379 fingerprint: 13380 fingerprint: 13381 fingerprint: 13382 fingerprint: 13383 fingerprint: 13384 fingerprint: 13385 fingerprint: 13386 fingerprint: 13387 fingerprint: 13388 fingerprint: 13389 fingerprint: 13390 fingerprint: 13391 fingerprint: 13392 fingerprint: 13393 fingerprint: 13394 fingerprint: 13395 fingerprint: 13396 fingerprint: 13397 fingerprint: 13398 fingerprint: 13399 fingerprint: 13400 fingerprint: 13401 fingerprint: 13402 fingerprint: 13403 fingerprint: 13404 fingerprint: 13405 fingerprint: 13406 fingerprint: 13407 fingerprint: 13408 fingerprint: 13409 fingerprint: 13410 fingerprint: 13411 fingerprint: 13412 fingerprint: 13413 fingerprint: 13414 fingerprint: 13415 fingerprint: 13416 fingerprint: 13417 fingerprint: 13418 fingerprint: 13419 fingerprint: 13420 fingerprint: 13421 fingerprint: 13422 fingerprint: 13423 fingerprint: 13424 

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:24] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:24] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

fingerprint: 13921 fingerprint: 13922 fingerprint: 13923 fingerprint: 13924 fingerprint: 13925 fingerprint: 13926 fingerprint: 13927 fingerprint: 13928 fingerprint: 13929 fingerprint: 13930 fingerprint: 13931 fingerprint: 13932 fingerprint: 13933 fingerprint: 13934 fingerprint: 13935 fingerprint: 13936 fingerprint: 13937 fingerprint: 13938 fingerprint: 13939 fingerprint: 13940 fingerprint: 13941 fingerprint: 13942 fingerprint: 13943 fingerprint: 13944 fingerprint: 13945 fingerprint: 13946 fingerprint: 13947 fingerprint: 13948 fingerprint: 13949 fingerprint: 13950 fingerprint: 13951 fingerprint: 13952 fingerprint: 13953 fingerprint: 13954 fingerprint: 13955 fingerprint: 13956 fingerprint: 13957 fingerprint: 13958 fingerprint: 13959 fingerprint: 13960 fingerprint: 13961 fingerprint: 13962 fingerprint: 13963 fingerprint: 13964 fingerprint: 13965 fingerprint: 13966 fingerprint: 13967 fingerprint: 13968 fingerprint: 13969 fingerprint: 13970 

 14433 fingerprint: 14434 fingerprint: 14435 fingerprint: 14436 fingerprint: 14437 fingerprint: 14438 fingerprint: 14439 fingerprint: 14440 fingerprint: 14441 fingerprint: 14442 fingerprint: 14443 fingerprint: 14444 fingerprint: 14445 fingerprint: 14446 fingerprint: 14447 fingerprint: 14448 fingerprint: 14449 fingerprint: 14450 fingerprint: 14451 fingerprint: 14452 fingerprint: 14453 fingerprint: 14454 fingerprint: 14455 fingerprint: 14456 fingerprint: 14457 fingerprint: 14458 fingerprint: 14459 fingerprint: 14460 fingerprint: 14461 fingerprint: 14462 fingerprint: 14463 fingerprint: 14464 fingerprint: 14465 fingerprint: 14466 fingerprint: 14467 fingerprint: 14468 fingerprint: 14469 fingerprint: 14470 fingerprint: 14471 fingerprint: 14472 fingerprint: 14473 fingerprint: 14474 fingerprint: 14475 fingerprint: 14476 fingerprint: 14477 fingerprint: 14478 fingerprint: 14479 fingerprint: 14480 fingerprint: 14481 fingerprint: 14482 fingerprint:

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:24] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:24] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

fingerprint: 14993 fingerprint: 14994 fingerprint: 14995 fingerprint: 14996 fingerprint: 14997 fingerprint: 14998 fingerprint: 14999 fingerprint: 15000 fingerprint: 15001 fingerprint: 15002 fingerprint: 15003 fingerprint: 15004 fingerprint: 15005 fingerprint: 15006 fingerprint: 15007 fingerprint: 15008 fingerprint: 15009 fingerprint: 15010 fingerprint: 15011 fingerprint: 15012 fingerprint: 15013 fingerprint: 15014 fingerprint: 15015 fingerprint: 15016 fingerprint: 15017 fingerprint: 15018 fingerprint: 15019 fingerprint: 15020 fingerprint: 15021 fingerprint: 15022 fingerprint: 15023 fingerprint: 15024 fingerprint: 15025 fingerprint: 15026 fingerprint: 15027 fingerprint: 15028 fingerprint: 15029 fingerprint: 15030 fingerprint: 15031 fingerprint: 15032 fingerprint: 15033 fingerprint: 15034 fingerprint: 15035 fingerprint: 15036 fingerprint: 15037 fingerprint: 15038 fingerprint: 15039 fingerprint: 15040 fingerprint: 15041 fingerprint: 15042 

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:24] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:24] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

 15479 fingerprint: 15480 fingerprint: 15481 fingerprint: 15482 fingerprint: 15483 fingerprint: 15484 fingerprint: 15485 fingerprint: 15486 fingerprint: 15487 fingerprint: 15488 fingerprint: 15489 fingerprint: 15490 fingerprint: 15491 fingerprint: 15492 fingerprint: 15493 fingerprint: 15494 fingerprint: 15495 fingerprint: 15496 fingerprint: 15497 fingerprint: 15498 fingerprint: 15499 fingerprint: 15500 fingerprint: 15501 fingerprint: 15502 fingerprint: 15503 fingerprint: 15504 fingerprint: 15505 fingerprint: 15506 fingerprint: 15507 fingerprint: 15508 fingerprint: 15509 fingerprint: 15510 fingerprint: 15511 fingerprint: 15512 fingerprint: 15513 fingerprint: 15514 fingerprint: 15515 fingerprint: 15516 fingerprint: 15517 fingerprint: 15518 fingerprint: 15519 fingerprint: 15520 fingerprint: 15521 fingerprint: 15522 fingerprint: 15523 fingerprint: 15524 fingerprint: 15525 fingerprint: 15526 fingerprint: 15527 fingerprint: 15528 fingerprint:

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:25] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:25] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

 16098 fingerprint: 16099 fingerprint: 16100 fingerprint: 16101 fingerprint: 16102 fingerprint: 16103 fingerprint: 16104 fingerprint: 16105 fingerprint: 16106 fingerprint: 16107 fingerprint: 16108 fingerprint: 16109 fingerprint: 16110 fingerprint: 16111 fingerprint: 16112 fingerprint: 16113 fingerprint: 16114 fingerprint: 16115 fingerprint: 16116 fingerprint: 16117 fingerprint: 16118 fingerprint: 16119 fingerprint: 16120 fingerprint: 16121 fingerprint: 16122 fingerprint: 16123 fingerprint: 16124 fingerprint: 16125 fingerprint: 16126 fingerprint: 16127 fingerprint: 16128 fingerprint: 16129 fingerprint: 16130 fingerprint: 16131 fingerprint: 16132 fingerprint: 16133 fingerprint: 16134 fingerprint: 16135 fingerprint: 16136 fingerprint: 16137 fingerprint: 16138 fingerprint: 16139 fingerprint: 16140 fingerprint: 16141 fingerprint: 16142 fingerprint: 16143 fingerprint: 16144 fingerprint: 16145 fingerprint: 16146 fingerprint: 16147 fingerprint:

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:25] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:25] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

 16583 fingerprint: 16584 fingerprint: 16585 fingerprint: 16586 fingerprint: 16587 fingerprint: 16588 fingerprint: 16589 fingerprint: 16590 fingerprint: 16591 fingerprint: 16592 fingerprint: 16593 fingerprint: 16594 fingerprint: 16595 fingerprint: 16596 fingerprint: 16597 fingerprint: 16598 fingerprint: 16599 fingerprint: 16600 fingerprint: 16601 fingerprint: 16602 fingerprint: 16603 fingerprint: 16604 fingerprint: 16605 fingerprint: 16606 fingerprint: 16607 fingerprint: 16608 fingerprint: 16609 fingerprint: 16610 fingerprint: 16611 fingerprint: 16612 fingerprint: 16613 fingerprint: 16614 fingerprint: 16615 fingerprint: 16616 fingerprint: 16617 fingerprint: 16618 fingerprint: 16619 fingerprint: 16620 fingerprint: 16621 fingerprint: 16622 fingerprint: 16623 fingerprint: 16624 fingerprint: 16625 fingerprint: 16626 fingerprint: 16627 fingerprint: 16628 fingerprint: 16629 fingerprint: 16630 fingerprint: 16631 fingerprint: 16632 fingerprint:

fingerprint: 17015 fingerprint: 17016 fingerprint: 17017 fingerprint: 17018 fingerprint: 17019 fingerprint: 17020 fingerprint: 17021 fingerprint: 17022 fingerprint: 17023 fingerprint: 17024 fingerprint: 17025 fingerprint: 17026 fingerprint: 17027 fingerprint: 17028 fingerprint: 17029 fingerprint: 17030 fingerprint: 17031 fingerprint: 17032 fingerprint: 17033 fingerprint: 17034 fingerprint: 17035 fingerprint: 17036 fingerprint: 17037 fingerprint: 17038 fingerprint: 17039 fingerprint: 17040 fingerprint: 17041 fingerprint: 17042 fingerprint: 17043 fingerprint: 17044 fingerprint: 17045 fingerprint: 17046 fingerprint: 17047 fingerprint: 17048 fingerprint: 17049 fingerprint: 17050 fingerprint: 17051 fingerprint: 17052 fingerprint: 17053 fingerprint: 17054 fingerprint: 17055 fingerprint: 17056 fingerprint: 17057 fingerprint: 17058 fingerprint: 17059 fingerprint: 17060 fingerprint: 17061 fingerprint: 17062 fingerprint: 17063 fingerprint: 17064 

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:25] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:25] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

 17450 fingerprint: 17451 fingerprint: 17452 fingerprint: 17453 fingerprint: 17454 fingerprint: 17455 fingerprint: 17456 fingerprint: 17457 fingerprint: 17458 fingerprint: 17459 fingerprint: 17460 fingerprint: 17461 fingerprint: 17462 fingerprint: 17463 fingerprint: 17464 fingerprint: 17465 fingerprint: 17466 fingerprint: 17467 fingerprint: 17468 fingerprint: 17469 fingerprint: 17470 fingerprint: 17471 fingerprint: 17472 fingerprint: 17473 fingerprint: 17474 fingerprint: 17475 fingerprint: 17476 fingerprint: 17477 fingerprint: 17478 fingerprint: 17479 fingerprint: 17480 fingerprint: 17481 fingerprint: 17482 fingerprint: 17483 fingerprint: 17484 fingerprint: 17485 fingerprint: 17486 fingerprint: 17487 fingerprint: 17488 fingerprint: 17489 fingerprint: 17490 fingerprint: 17491 fingerprint: 17492 fingerprint: 17493 fingerprint: 17494 fingerprint: 17495 fingerprint: 17496 fingerprint: 17497 fingerprint: 17498 fingerprint: 17499 fingerprint:

 17973 fingerprint: 17974 fingerprint: 17975 fingerprint: 17976 fingerprint: 17977 fingerprint: 17978 fingerprint: 17979 fingerprint: 17980 fingerprint: 17981 fingerprint: 17982 fingerprint: 17983 fingerprint: 17984 fingerprint: 17985 fingerprint: 17986 fingerprint: 17987 fingerprint: 17988 fingerprint: 17989 fingerprint: 17990 fingerprint: 17991 fingerprint: 17992 fingerprint: 17993 fingerprint: 17994 fingerprint: 17995 fingerprint: 17996 fingerprint: 17997 fingerprint: 17998 fingerprint: 17999 fingerprint: 18000 fingerprint: 18001 fingerprint: 18002 fingerprint: 18003 fingerprint: 18004 fingerprint: 18005 fingerprint: 18006 fingerprint: 18007 fingerprint: 18008 fingerprint: 18009 fingerprint: 18010 fingerprint: 18011 fingerprint: 18012 fingerprint: 18013 fingerprint: 18014 fingerprint: 18015 fingerprint: 18016 fingerprint: 18017 fingerprint: 18018 fingerprint: 18019 fingerprint: 18020 fingerprint: 18021 fingerprint: 18022 fingerprint:

RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:26] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:26] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [

 18462 fingerprint: 18463 fingerprint: 18464 fingerprint: 18465 fingerprint: 18466 fingerprint: 18467 fingerprint: 18468 fingerprint: 18469 fingerprint: 18470 fingerprint: 18471 fingerprint: 18472 fingerprint: 18473 fingerprint: 18474 fingerprint: 18475 fingerprint: 18476 fingerprint: 18477 fingerprint: 18478 fingerprint: 18479 fingerprint: 18480 fingerprint: 18481 fingerprint: 18482 fingerprint: 18483 fingerprint: 18484 fingerprint: 18485 fingerprint: 18486 fingerprint: 18487 fingerprint: 18488 fingerprint: 18489 fingerprint: 18490 fingerprint: 18491 fingerprint: 18492 fingerprint: 18493 fingerprint: 18494 fingerprint: 18495 fingerprint: 18496 fingerprint: 18497 fingerprint: 18498 fingerprint: 18499 fingerprint: 18500 fingerprint: 18501 fingerprint: 18502 fingerprint: 18503 fingerprint: 18504 fingerprint: 18505 fingerprint: 18506 fingerprint: 18507 fingerprint: 18508 fingerprint: 18509 fingerprint: 18510 fingerprint: 18511 fingerprint:

 18906 fingerprint: 18907 fingerprint: 18908 fingerprint: 18909 fingerprint: 18910 fingerprint: 18911 fingerprint: 18912 fingerprint: 18913 fingerprint: 18914 fingerprint: 18915 fingerprint: 18916 fingerprint: 18917 fingerprint: 18918 fingerprint: 18919 fingerprint: 18920 fingerprint: 18921 fingerprint: 18922 fingerprint: 18923 fingerprint: 18924 fingerprint: 18925 fingerprint: 18926 fingerprint: 18927 fingerprint: 18928 fingerprint: 18929 fingerprint: 18930 fingerprint: 18931 fingerprint: 18932 fingerprint: 18933 fingerprint: 18934 fingerprint: 18935 fingerprint: 18936 fingerprint: 18937 fingerprint: 18938 fingerprint: 18939 fingerprint: 18940 fingerprint: 18941 fingerprint: 18942 fingerprint: 18943 fingerprint: 18944 fingerprint: 18945 fingerprint: 18946 fingerprint: 18947 fingerprint: 18948 fingerprint: 18949 fingerprint: 18950 fingerprint: 18951 fingerprint: 18952 fingerprint: 18953 fingerprint: 18954 fingerprint: 18955 fingerprint:

RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:26] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:26] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called without preceding call to calcImplicitValence()
RDKit ERROR: Violation occurred on line 187 in file /opt/conda/conda-bld/rdkit_1591929907781/work/Code/GraphMol/Atom.cpp
RDKit ERROR: Failed Expression: d_implicitValence > -1
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [23:39:26] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: getNumImplicitHs() called wi

fingerprint list 19137
start compound clustering...
thre 0.3 total num of compounds 19137 num of clusters 11159 max length 1085
thre 0.4 total num of compounds 19137 num of clusters 8562 max length 1886
thre 0.5 total num of compounds 19137 num of clusters 5834 max length 5022
thre 0.6 total num of compounds 19137 num of clusters 3141 max length 12526


In [36]:
pdbbind_table = pdbbind_table[np.array(idx)]

In [37]:
print(pdbbind_table.shape)

(19137, 8)


In [38]:
for thre in [0.3, 0.4, 0.5, 0.6]:
    cluster_list = cluster_dict[thre]
    pdbbind_table['c_group_'+str(thre)] = cluster_list
pdbbind_table[:3]

Unnamed: 0,pdbid,year,uniprot,name,p_group_0.3,p_group_0.4,p_group_0.5,p_group_0.6,c_group_0.3,c_group_0.4,c_group_0.5,c_group_0.6
0,6mu1,2018,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",3226,2978,2699,2275,7453,5648,3530,1493
1,3t8s,2011,P29994,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",152,141,133,114,6791,5247,3451,1493
2,1n4k,2002,P11881,"INOSITOL 1,4,5-TRISPHOSPHATE RECEPTOR TYPE 1",151,141,133,114,6791,5247,3451,1493


In [39]:
pdbbind_table.to_csv('./intermediate_outputs/pdbbind_table_1.2.tsv', sep='\t', index=None)

### remove multiple ligands

In [54]:
pdbbind_table = pd.read_table('./intermediate_outputs/pdbbind_table_1.2.tsv')
print(pdbbind_table.shape)

(19137, 12)


In [55]:
success_list = []
for i in pdbbind_table.index:
    pdbid = pdbbind_table.loc[i, 'pdbid']
    mol = mol_dict[pdbid]
    smiles = Chem.MolToSmiles(mol)
    if '.' not in smiles:
        success_list.append(i)
        
print("success_list", len(success_list))
pdbbind_table = pdbbind_table.loc[success_list]

success_list 19131


In [56]:
print(pdbbind_table.shape[0])

19131


### remove empty cases

In [57]:
pdbid2chains = {}
for i, item in enumerate(list_task):
    print(i, '\r', end="")
    pdbid_chains, x = item.split(' ')
    pdbid, chains = pdbid_chains.split('_')
    pdbid2chains[pdbid] = item

19442   

In [107]:
success_list = []
for i in pdbbind_table.index:
    pdbid = pdbbind_table.loc[i, 'pdbid']
    pdbid_chains, x = pdbid2chains[pdbid].split(' ')
    _, chains = pdbid_chains.split('_')
    
    if not os.path.exists("../outputAnchor/{}_center_{}_aug_1/am_list.npy".format(pdbid_chains, x)):
        continue
    if not os.path.exists("../outputAnchor/{}_center_{}_aug_1/atom_feature.pk".format(pdbid_chains, x)):
        continue
    
    success_list.append(i)
    
print("success_list", len(success_list))
print("total", pdbbind_table.shape[0])

success_list 19019
total 19021


In [108]:
pdbbind_table = pdbbind_table.loc[success_list]
print(pdbbind_table.shape[0])

19019


In [109]:
pdbbind_table.to_csv('./intermediate_outputs/pdbbind_table_1.3.tsv', sep='\t', index=None)

# 🔥2021年10月9日23点46分

### step 1: get train-test table


In [110]:
casf16_pdbids = []
with open('../../CASF-2016/power_scoring/CoreSet.dat') as f:
    for line in f.readlines()[1:]:
        pdbid = line[:4]
        if pdbid not in []:
            casf16_pdbids.append(pdbid)
len(casf16_pdbids)

285

In [111]:
pdbid_to_protein = {}
pdbid_to_group = {}
pdbid_to_year = {}
for i in pdbbind_table.index:
    pdbid, protein, group, year = pdbbind_table.loc[i, ['pdbid', 'uniprot', 'p_group_0.3', 'year']]
    pdbid_to_protein[pdbid] = protein
    pdbid_to_group[pdbid] = group
    pdbid_to_year[pdbid] = year

print(len(pdbid_to_protein))
print(len(pdbid_to_group))
print(len(pdbid_to_year))

19019
19019
19019


In [112]:
np.unique(list(pdbid_to_year.values()))

array([1982, 1984, 1986, 1987, 1989, 1990, 1991, 1992, 1993, 1994, 1995,
       1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
       2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
       2018, 2019])

In [113]:
# check casf_proteins
casf_proteins = set()
list_failed = []
for pdbid in casf16_pdbids:
    if pdbid in pdbid_to_protein:
        casf_proteins.add(pdbid_to_protein[pdbid])
    else:
        list_failed.append(pdbid)
        print(pdbid)

In [114]:
for pdbid in list_failed:
    pdbid_chains, x = pdbid2chains[pdbid].split(' ')
    _, chains = pdbid_chains.split('_')
    print(pdbid2chains[pdbid], os.path.exists("../outputAnchor/{}_center_{}_aug_1/am_list.npy".format(pdbid_chains, x)))

In [115]:
len(pdbid_to_protein)

19019

In [116]:
for pdbid in pdbid_to_protein:
    if pdbid not in pdbid_to_group:
        pid = pdbid_to_protein[pdbid]
        subtable = pdbbind_table[pdbbind_table['uniprot']==pid]
        pdbid_to_group[pdbid] = subtable['p_group_0.3'].values[0]
print(len(pdbid_to_group))

19019


In [117]:
# use protein ids in pdbbind_table for all casf proteins 
casf_proteins - set(np.unique(pdbbind_table['uniprot'].values).tolist())

set()

In [118]:
# number of casf proteins (official: 57 groups)
print('casf_proteins', len(casf_proteins))

casf_proteins 68


In [119]:
# number of casf groups (should be 57)
casf_groups = set()
for pid in casf_proteins:
    subtable = pdbbind_table[pdbbind_table['uniprot']==pid]
    casf_groups.add(subtable['p_group_0.3'].values[0])

print('casf_groups', len(casf_groups))

casf_groups 58


In [120]:
total_pdbids = np.unique(np.concatenate([np.array(casf16_pdbids), pdbbind_table['pdbid'].values]))
len(total_pdbids)

19019

In [121]:
refined_set = set()
with open('index/INDEX_refined_data.2020') as f:
    for line in f.readlines()[6:]:
        refined_set.add(line[:4])
len(refined_set)

5316

In [122]:
kikd_set = set()
ic50_set = set()
with open('index/INDEX_general_PL_data.2020') as f:
    for line in f.readlines()[6:]:
        if line[25:27] == 'Kd' or line[25:27] == 'Ki':
            kikd_set.add(line[:4])
        elif line[25:27] == 'IC':
            ic50_set.add(line[:4])
        else:
            print(line[25:27])
len(kikd_set), len(ic50_set)

(12253, 7190)

In [123]:
type_list = []
for pdbid in total_pdbids:
    if pdbid in casf16_pdbids:
        type_list.append('test')
    elif pdbid_to_group[pdbid] in casf_groups:
        type_list.append('np')
    elif pdbid_to_year[pdbid] == 2019:
        type_list.append('v2020')
    elif pdbid in refined_set:
        type_list.append('refined')
    elif pdbid in kikd_set:
        type_list.append('kikd')
    elif pdbid in ic50_set:
        type_list.append('ic50')
    else:
        print('no label', pdbid)
len(type_list)

19019

In [124]:
group_list = [pdbid_to_group[pdbid] for pdbid in total_pdbids]
len(group_list)

19019

In [164]:
def get_pdbid_to_affinity():
    pdbid_to_aff = {}
    with open('index/INDEX_general_PL.2020') as f:
        count_error = 0
        for line in f.readlines():
            if line[0] != '#':
                lines = line.split('/')[0].strip().split('  ')
                pdbid = lines[0]
                if '<=' in lines[3]:
                    measure = '<='
                elif '>=' in lines[3]:
                    measure = '>='
                elif '=' in lines[3]:
                    measure = '='
                elif '>' in lines[3]:
                    measure = '>'
                elif '<' in lines[3]:
                    measure = '<'
                elif '~' in lines[3]:
                    measure = '~'
                else:
                    print (lines[3], measure)
                afftype = lines[3].split(measure)[0]
                assert afftype in ['IC50', 'Kd', 'Ki']
                value = float(lines[3].split(measure)[1][:-2])
                unit = lines[3].split(measure)[1][-2:]
                if unit == 'nM':
                    pvalue = -np.log10(value)+9
                elif unit == 'uM':
                    pvalue = -np.log10(value)+6
                elif unit == 'mM':
                    pvalue = -np.log10(value)+3
                elif unit == 'pM':
                    pvalue = -np.log10(value)+12
                elif unit == 'fM':
                    pvalue = -np.log10(value)+15
                else:
                    print(unit)
#                 pdbid_to_aff[pdbid] = (afftype, measure, pvalue)
                pdbid_to_aff[pdbid] = pvalue
    print ('count_error not = measurement', count_error)
    return pdbid_to_aff
pdbid_to_aff = get_pdbid_to_affinity()
print('pdbid_to_aff', len(pdbid_to_aff))

with open('/data/lishuya/silexon/doghouse2021/CASF-2016/power_scoring/CoreSet.dat') as f:
    for line in f.readlines()[1:]:
        pdbid_to_aff[line[:4]] = float(line[23:29].strip())
len(pdbid_to_aff)

count_error not = measurement 0
pdbid_to_aff 19443


19443

In [165]:
label_list = [pdbid_to_aff[pdbid] for pdbid in total_pdbids]
len(label_list)

19019

In [166]:
casf_table = pd.DataFrame()
casf_table['pdbid'] = total_pdbids
casf_table['type'] = type_list
casf_table['label'] = label_list
casf_table['p_group_0.3'] = group_list

In [167]:
pd.value_counts(casf_table['type'])

kikd       5080
ic50       4780
np         4631
refined    2952
v2020      1291
test        285
Name: type, dtype: int64

In [168]:
casf_table.to_csv('casf2016_table_new_protein.tsv', sep='\t', index=None)

### step 2: define anchors

In [145]:
list_pdbid = casf_table['pdbid'].values
set_pdbid = set(list_pdbid)
len(list_pdbid)

19019

In [146]:
# batch_process

result_dict = {}
for item in list_task:
    pdbid_chains, center = item.split()
    pdbid, chains = pdbid_chains.split('_')
    if pdbid not in set_pdbid:
        continue
    repeat_list = np.load('../outputAnchor/{}_center_{}_aug_1/anchors.npy'.format(pdbid_chains, center))
    result_dict[pdbid] = repeat_list

print(len(result_dict))

19019


In [147]:
with open('casf2016_anchor_dict_thre4', 'wb') as f:
    pickle.dump(result_dict, f)

In [148]:
# def save_centers(pdbid, refid, coords, centers):
#     cmd.reinitialize()
#     cmd.load('../pdb_file/{}_aligned_to_{}.pdb'.format(pdbid, refid))
    
#     for x,y,z in coords:
#         cmd.pseudoatom('pseudo', pos=[x,y,z], vdw=0.01)
        
#     for x,y,z in centers:
#         cmd.pseudoatom('center', pos=[x,y,z], vdw=1)
#     cmd.show('spheres', 'center')
#     cmd.color('yellow', 'center')
#     cmd.save('./check/check2.2_{}_anchors.pse'.format(pdbid))

### Step 3: get compound mol 划掉

In [149]:
# with open('../pdbbind_food/mol_dict', 'rb') as f:
#     mol_dict = pickle.load(f)
# len(mol_dict)

In [150]:
# def get_mol(pdbid):
#     sdf_path = '../../CASF-2016/coreset/{}/{}_ligand.sdf'.format(pdbid, pdbid)
#     mol2_path = '../../CASF-2016/coreset/{}/{}_ligand.mol2'.format(pdbid, pdbid)
    
#     if os.path.exists(sdf_path):
#         mol = Chem.MolFromMol2File(mol2_path)
#         if mol is None:
#             mol = Chem.SDMolSupplier(sdf_path)[0]
#         if mol is None:
#             mol = Chem.MolFromMol2File(mol2_path, sanitize=False)
#         if mol is None:
#             mol = Chem.SDMolSupplier(sdf_path, sanitize=False)[0]
#     else:
#         mol = None
#     return mol

In [151]:
# for pdbid in casf_table['pdbid']:
#     if pdbid not in mol_dict:
#         mol = get_mol(pdbid)
#         if mol is None:
#             print(pdbid)
#         else:
#             mol_dict[pdbid] = mol
# len(mol_dict)

In [152]:
# with open('casf_mol_dict', 'wb') as f:
#     pickle.dump(mol_dict, f)

## step 4 atom features

In [153]:
feature_dict = {}

for item in list_task:
    pdbid_chains, center = item.split()
    pdbid, chains = pdbid_chains.split('_')
    if pdbid not in set_pdbid:
        continue
        
    atom_dict = pickle.load(open('../outputAnchor/{}_center_{}_aug_1/atom_feature.pk'.format(pdbid_chains, center), 'rb'))
#     print(pdbid, atom_dict[0][0].shape)
    feature_dict[pdbid] = atom_dict[0]
len(feature_dict)

19019

In [154]:
# SAVE AS A WHOLE
with open("casf2016_atom_feature_coord_nei_dict_thre4", "wb") as f:
    pickle.dump(feature_dict, f)