In [31]:
from glob import glob
import pandas as pd
import numpy as np
from Bio import PDB
import os
os.environ["PATH"] += os.pathsep + "/opt/conda/envs/team05/bin"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from rdkit import Chem
from rdkit.Chem import MolToSmiles, MolFromMol2File
from rdkit.Chem.rdmolfiles import MolFromSmiles, MolToSmiles
from Bio.PDB import Select, PDBIO
from Bio.PDB.PDBParser import PDBParser

In [32]:
pdb_path = os.path.abspath("/workspace/datasets/PDBbind/refined-set")
info_path = os.path.abspath("/workspace/datasets/PDBbind/")

In [33]:
complex_list = [
    f for f in os.listdir(pdb_path)
    if os.path.isdir(os.path.join(pdb_path, f)) and len(f) == 4
]

In [35]:
from Bio.PDB import PDBParser, PDBIO, Select
import os

def remove_HETATM_PDBbind(input_list, pdb_path):

    class NonHetSelect(Select):
        def accept_residue(self, residue):
            return 1 if residue.id[0] == " " else 0
    
    for pdb in input_list:
        src_file = f"{pdb_path}/{pdb}/{pdb}_protein.pdb"
        des_file = f"{pdb_path}/{pdb}/{pdb}_remove_HEATM_protein.pdb"

        if os.path.exists(des_file):
            print(f"[SKIP] {des_file} already exists")
            continue

        if not os.path.exists(src_file):
            print(f"[WARN] source file not found: {src_file}")
            continue

        structure = PDBParser().get_structure(pdb, src_file)
        io = PDBIO()
        io.set_structure(structure)
        io.save(des_file, NonHetSelect())
        print(f"[OK] saved: {des_file}")


In [36]:
# import os
# import glob

# def delete_remove_HETATM_files(pdb_path):
#     pattern = os.path.join(pdb_path, "*", "*_remove_HEATM_protein.pdb")
#     targets = glob.glob(pattern)
#     for file in targets:
#         try:
#             os.remove(file)
#             print(f"삭제됨: {file}")
#         except Exception as e:
#             print(f"삭제 실패: {file} → {e}")

# delete_remove_HETATM_files(pdb_path)


In [37]:
remove_HETATM_PDBbind(complex_list, pdb_path)

[SKIP] /workspace/datasets/PDBbind/refined-set/3cl0/3cl0_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/1wcq/1wcq_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/4u73/4u73_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/6dj1/6dj1_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/4zip/4zip_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/1ec3/1ec3_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/3b92/3b92_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/5xg5/5xg5_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/2b7d/2b7d_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDBbind/refined-set/5am6/5am6_remove_HEATM_protein.pdb already exists
[SKIP] /workspace/datasets/PDB

In [38]:
pdb_parser = PDB.PDBParser(QUIET = True)

In [39]:
amino_acids_short = {
    "ALA":"A", "ARG":"R", "ASN":"N", "ASP":"D", "CYS":"C",
    "GLU":"E", "GLN":"Q", "GLY":"G", "HIS":"H", "ILE":"I",
    "LEU":"L", "LYS":"K", "MET":"M", "PHE":"F", "PRO":"P",
    "SER":"S", "THR":"T", "TRP":"W", "TYR":"Y", "VAL":"V",
    "SEC":"U", "PYL":"O"
}

In [40]:
data_df = pd.DataFrame({"PDB":complex_list})

In [41]:
def get_info(pdb):
    try:
        """ Load protein info """
        structure = pdb_parser.get_structure(pdb, f"{pdb_path}/{pdb}/{pdb}_remove_HEATM_protein.pdb")
        chain_name_list, pdb_sequence_list, seq_lengths_list, protein_atom_coords, protein_atom_residue_list, reindex = list(), list(), list(), list(), list(), 0
        
        """ Extract protein info """
        for chain_name in list(structure[0].child_dict.keys()):
            chain = structure[0][chain_name]

            pdb_sequence = ""
            for residue in chain.get_residues():
                if residue.resname in amino_acids_short.keys():
                    pdb_sequence += amino_acids_short[residue.resname]

                    for atom in residue:
                        protein_atom_coords.append(atom.get_coord())
                        protein_atom_residue_list.append(reindex)
                    reindex += 1     

            if len(pdb_sequence) != 0:
                chain_name_list.append(chain_name)
                pdb_sequence_list.append(pdb_sequence)
                seq_lengths_list.append(len(pdb_sequence))

        """ Load pocket info """      
        protein_atom_coords, binding_index = np.array(protein_atom_coords), list()
        pocket_structure = pdb_parser.get_structure(pdb, f"{pdb_path}/{pdb}/{pdb}_pocket.pdb")

        pocket_coordi = list()

        for chain_name in list(pocket_structure[0].child_dict.keys()):
            chain = pocket_structure[0][chain_name]
            for residue in chain.get_residues():
                if residue.resname in amino_acids_short.keys():
                    for atom in residue:
                        pocket_coordi.append(atom.get_coord())
        
        """ Matching pocket info """
        pocket_coordi = np.array(pocket_coordi)
        bi_x, bi_y, bi_z = pocket_coordi[:, 0], pocket_coordi[:, 1], pocket_coordi[:, 2]

        for i, j, k in zip(bi_x, bi_y, bi_z):
            tmp_coordi = np.array([i, j, k], dtype = np.float32)
            ind = np.where((protein_atom_coords == tmp_coordi).all(axis = 1))[0][0]
            binding_index.append(protein_atom_residue_list[ind])

        binding_index = sorted(list(set(binding_index)))
        binding_index = list(map(str, binding_index))

        total_seq_lengths = np.sum(np.array(seq_lengths_list))
        seq_lengths_list = list(map(str, seq_lengths_list))

        return ",".join(chain_name_list), ",".join(pdb_sequence_list), total_seq_lengths, ",".join(seq_lengths_list), ",".join(binding_index)

    except Exception as e:
        print(pdb, e)
        return None

In [42]:
from multiprocessing import Process, Queue, Pool

def parallelize_dataframe(df, func, num_partitions=5):
    df_split = np.array_split(df, num_partitions)
    pool = Pool(num_partitions)
    results = pool.map(func, df_split)
    pool.close()
    pool.join()
    return results

In [43]:
def get_pdb_info_bulk(df):
    return df.PDB.map(get_info)

In [None]:
info_results = parallelize_dataframe(data_df, get_pdb_info_bulk, 5)

In [15]:
info_results = pd.concat(info_results)

In [112]:
data_df["Chain"] = info_results.map(lambda a: a[0] if a is not None else None)

In [113]:
data_df["Sequence"] = info_results.map(lambda a: a[1] if a is not None else None)

In [114]:
data_df["Total_seq_lengths"] = info_results.map(lambda a: a[2] if a is not None else None)

In [115]:
data_df["Chain_seq_lengths"] = info_results.map(lambda a: a[3] if a is not None else None)

In [116]:
data_df["BS"] = info_results.map(lambda a: a[4] if a is not None else None)

In [117]:
data_df = data_df.loc[data_df.Sequence.isna()==False].reset_index(drop=True)
data_df = data_df.loc[data_df.Chain != " "].reset_index(drop=True)

In [118]:
data_df

Unnamed: 0,PDB,Chain,Sequence,Total_seq_lengths,Chain_seq_lengths,BS
0,3cl0,"A,B,C,D",VKLAGNSSLCPINGWAVYSKDNSIRIGSKGDVFVIREPFISCSHLE...,1540,385385385385,"35,36,51,53,66,67,68,69,70,73,96,97,98,112,116..."
1,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,601,601,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,..."
2,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,304,304,"27,100,101,102,103,104,105,106,108,110,111,112..."
3,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48..."
4,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4..."
...,...,...,...,...,...,...
5311,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,262,262,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5..."
5312,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,245,245,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14..."
5313,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,287,53234,"53,54,94,98,132,133,134,135,136,137,138,140,17..."
5314,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,201,201,"42,70,71,102,103,104,105,106,107,108,109,110,1..."


In [119]:
import pandas as pd
import re
from math import log10

core_path = "/workspace/datasets/PDBbind/refined-set/index/INDEX_refined_data.2020"
out_csv   = "/workspace/binding_affinity/datasets/validation2020/validation_Afinity.csv"

def to_nM(value: float, unit: str) -> float:
    u = unit.lower()
    if u == "pm":  return value * 1e-3
    if u == "nm":  return value
    if u == "um":  return value * 1e3
    if u == "mm":  return value * 1e6
    if u == "m":   return value * 1e9
    return None

pat_meas = re.compile(r"(Ki|Kd|IC50)=([0-9]*\.?[0-9]+)(pM|nM|uM|mM|M)", re.IGNORECASE)

rows = []
with open(core_path) as f:
    for line in f:
        if line.startswith("#") or not line.strip():
            continue
        parts = line.split()
        pdb_id = parts[0]
        logKa  = float(parts[3])  
        Ka_str = parts[4]

        affinity_nM, pAff = None, None
        m = pat_meas.search(Ka_str)
        if m:
            val, unit = float(m.group(2)), m.group(3)
            affinity_nM = to_nM(val, unit)
            if affinity_nM:
                pAff = 9 - log10(affinity_nM)

        rows.append((pdb_id, affinity_nM, pAff, logKa))

df_aff = pd.DataFrame(rows, columns=["PDB", "Affinity_nM", "pAff", "loaKa"])

df_aff.to_csv(out_csv, index=False)
print(f"✅ Saved parsed affinities to {out_csv}")
df_aff.head()

df_aff = df_aff.where(pd.notnull(df_aff), None)


✅ Saved parsed affinities to /workspace/binding_affinity/datasets/validation/validation_Afinity.csv


In [120]:
data_df = pd.merge(data_df, df_aff, on="PDB", how="inner")

In [121]:
data_df

Unnamed: 0,PDB,Chain,Sequence,Total_seq_lengths,Chain_seq_lengths,BS,Affinity_nM,pAff,loaKa
0,3cl0,"A,B,C,D",VKLAGNSSLCPINGWAVYSKDNSIRIGSKGDVFVIREPFISCSHLE...,1540,385385385385,"35,36,51,53,66,67,68,69,70,73,96,97,98,112,116...",84.800,7.071604,7.07
1,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,601,601,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,...",550.000,6.259637,6.26
2,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,304,304,"27,100,101,102,103,104,105,106,108,110,111,112...",6600.000,5.180456,5.18
3,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48...",0.031,10.508638,10.51
4,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.210,9.677781,9.68
...,...,...,...,...,...,...,...,...,...
5311,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,262,262,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5...",142.000,6.847712,6.85
5312,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,245,245,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14...",88.000,7.055517,7.06
5313,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,287,53234,"53,54,94,98,132,133,134,135,136,137,138,140,17...",2.000,8.698970,8.70
5314,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,201,201,"42,70,71,102,103,104,105,106,107,108,109,110,1...",169000.000,3.772113,3.77


In [122]:
lengths = data_df.Total_seq_lengths.values

In [123]:
data_df = data_df[lengths <= 1500].reset_index(drop=True)
data_df

Unnamed: 0,PDB,Chain,Sequence,Total_seq_lengths,Chain_seq_lengths,BS,Affinity_nM,pAff,loaKa
0,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,601,601,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,...",550.000,6.259637,6.26
1,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,304,304,"27,100,101,102,103,104,105,106,108,110,111,112...",6600.000,5.180456,5.18
2,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48...",0.031,10.508638,10.51
3,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.210,9.677781,9.68
4,1ec3,"A,B",PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.920,9.036212,9.04
...,...,...,...,...,...,...,...,...,...
5163,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,262,262,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5...",142.000,6.847712,6.85
5164,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,245,245,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14...",88.000,7.055517,7.06
5165,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,287,53234,"53,54,94,98,132,133,134,135,136,137,138,140,17...",2.000,8.698970,8.70
5166,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,201,201,"42,70,71,102,103,104,105,106,107,108,109,110,1...",169000.000,3.772113,3.77


In [124]:
data_df = data_df[data_df["BS"] != ""].reset_index(drop=True)
data_df

Unnamed: 0,PDB,Chain,Sequence,Total_seq_lengths,Chain_seq_lengths,BS,Affinity_nM,pAff,loaKa
0,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,601,601,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,...",550.000,6.259637,6.26
1,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,304,304,"27,100,101,102,103,104,105,106,108,110,111,112...",6600.000,5.180456,5.18
2,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48...",0.031,10.508638,10.51
3,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.210,9.677781,9.68
4,1ec3,"A,B",PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.920,9.036212,9.04
...,...,...,...,...,...,...,...,...,...
5163,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,262,262,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5...",142.000,6.847712,6.85
5164,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,245,245,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14...",88.000,7.055517,7.06
5165,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,287,53234,"53,54,94,98,132,133,134,135,136,137,138,140,17...",2.000,8.698970,8.70
5166,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,201,201,"42,70,71,102,103,104,105,106,107,108,109,110,1...",169000.000,3.772113,3.77


In [125]:
def convert_smiles(row):
    pdb = row.PDB
    
    mol = f"{pdb_path}/{pdb}/{pdb}_ligand.mol2"
    command = f'obabel -imol2 "{mol}" -osmi -xC | obabel -ismi -osmi -xk -O tmp.smi'
    os.system(command)
    
    smiles = read_file(open("tmp.smi"))[0].split('\t')[0].strip()
    
    try:
        smiles = MolToSmiles(MolFromSmiles(smiles),isomericSmiles = False, kekuleSmiles = True)
        return smiles
    
    except Exception as e:
        print(pdb, e)
        return None

In [126]:
def read_file(file):
    return [i.strip() for i in file.readlines()]

In [127]:
SMILES = data_df.apply(convert_smiles, axis = 1)

1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4u73_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4u73_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2b7d_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2b7d_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekuli

3uxl Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5lwd_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5lwd_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3bxf_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3bxf_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4zek_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4zek_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4djx_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4djx_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 

5tef Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


  Failed to kekulize aromatic bonds in MOL2 file (title is 5jgq_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5jgq_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1lpz_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1lpz_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule convert

2v8w Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5ja0_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5ja0_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1a4w_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1a4w_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1bxq_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1bxq_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule

3daz Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4uoh_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4uoh_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1h4w_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1h4w_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5tmp_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5tmp_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4zeb_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4zeb_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 

4q8x Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1o36_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1o36_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2wzm_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2wzm_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6h8s_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6h8s_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed t

4yha Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6jdi_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6jdi_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1z6s_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1z6s_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1rnm_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1rnm_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1f5l_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1f5l_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 

5afv Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)
8a3h Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
[10:21:43] Explicit valence for atom # 3 N, 4, is greater than permitted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4zcs_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4zcs_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1m7y_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1m7y_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4u70_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4u70_ligand)



3u6h Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1x1z_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1x1z_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1qb1_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1qb1_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3aau_ligand)

1 molecule converted
  Failed

3exh Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5u8c_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5u8c_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1fpc_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1fpc_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5ey0_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5ey0_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1o0h_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1o0h_ligand)

1 molecule converted
1 

4br3 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4jyc_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4jyc_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2yay_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2yay_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule convert

1tkb Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1qbq_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1qbq_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4xxh_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4xxh_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1gno_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1gno_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed t

1uho Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


  Failed to kekulize aromatic bonds in MOL2 file (title is 4bam_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4bam_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4p5d_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4p5d_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1o2o_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1o2o_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1fzq_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1fzq_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 

5vij Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1sln_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1sln_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4r73_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4r73_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4ha5_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4ha5_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule

1v11 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6d1a_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6d1a_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3c56_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3c56_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5

4emf Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)
5j9x Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


  Failed to kekulize aromatic bonds in MOL2 file (title is 5n3v_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5n3v_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1o30_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1o30_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5sxm_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5sxm_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4nku_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (titl

5efa Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2xbp_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2xbp_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1o33_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1o33_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4j48_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4j48_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1d4p_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1d4p_ligand)

1 molecule converted
1 

4cg9 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6cze_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6cze_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4o9w_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4o9w_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule convert

5ftg Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1lpk_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1lpk_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1o2q_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1o2q_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4w97_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4w97_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule

1e6q Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1xka_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1xka_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6d1d_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6d1d_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5irr_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5irr_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (ti

5nk8 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2zq2_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2zq2_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3q6z_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3q6z_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6cvv_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6cvv_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed t

5fh7 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2wjg_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2wjg_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5za7_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5za7_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4wop_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4wop_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule

3rlb Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2v88_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2v88_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3nkk_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3nkk_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2bok_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2bok_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5uxf_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5uxf_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3cfn_l

3t0x Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


  Failed to kekulize aromatic bonds in MOL2 file (title is 6mln_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6mln_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5bv3_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5bv3_ligand)

1 molecule converted
[10:24:10] Explicit valence for atom # 15 N, 4, is greater than permitted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted


5bv3 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3qx9_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3qx9_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3h8b_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3h8b_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in 

1olx Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5jxq_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5jxq_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2vnp_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2vnp_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1w0z_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1w0z_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5u0w_ligand)

1 molecule converted


4o6w Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1ppl_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1ppl_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4mo4_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4mo4_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6d19_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6d19_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (ti

1v1m Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1ew9_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1ew9_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1ppm_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1ppm_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekuli

5ei3 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2rcn_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2rcn_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3g3r_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3g3r_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule convert

4cga Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


  Failed to kekulize aromatic bonds in MOL2 file (title is 4fsl_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4fsl_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6nco_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6nco_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1owe_ligand)

1 molecule converted
  Failed

1xk5 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1pgp_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1pgp_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1y3x_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1y3x_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1lpg_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1lpg_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6eqx_ligand)

1 molecule converted


2vwm Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2fxv_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2fxv_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1phw_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1phw_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule convert

4n7m Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2oc2_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2oc2_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5d6j_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5d6j_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3iqu_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3iq

5vih Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4k5p_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4k5p_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1v2u_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1v2u_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2clh_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2clh_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1wc1_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1wc1_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 

1ols Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3zhx_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3zhx_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3ahn_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3ahn_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4

4cpw Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4x8o_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4x8o_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1qb6_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1qb6_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3t01_ligand)

1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2nsl_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2nsl_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted


4or4 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1ucn_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1ucn_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6cvf_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6cvf_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2q8m_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2q8m_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3e12_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3e12_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4gah_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4gah_ligand)

1 molecule conv

6r4k Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1k1o_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1k1o_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4cwq_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4cwq_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1olu_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1olu_ligand)

1 molecule converted
[10:25:57] Explicit valence for atom # 7 N, 4, is greater than permitted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5evz_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5evz_l

1olu Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 6n3y_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 6n3y_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3po1_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3po1_ligand)

1 molecule converted
[10:25:58] Explicit valence for atom # 5 N, 4, is greater than permitted
1 molecule converted
1 molecule converted


3po1 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4bao_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4bao_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1afk_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1afk_ligand

4emr Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3ip8_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3ip8_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4bb9_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4bb9_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5m25_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5m25_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1j14_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1j14_ligand)

1 molecule converted
1 

4cg8 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 3ljo_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 3ljo_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1dqn_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1dqn_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 2wos_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 2wos_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (ti

1v16 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1qka_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1qka_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1ghz_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1ghz_ligand)

1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 1bjv_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 1bjv_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed t

4gq4 Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
[10:26:41] Explicit valence for atom # 15 N, 4, is greater than permitted


4mme Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)


1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 4na9_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 4na9_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
  Failed to kekulize aromatic bonds in MOL2 file (title is 5zc5_ligand)

1 molecule converted
  Failed to kekulize aromatic SMILES (title is 5zc5_ligand)

1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule converted
1 molecule convert

In [128]:
data_df["SMILES"] = SMILES

In [129]:
data_df = data_df.loc[data_df.SMILES.isna()==False].reset_index(drop=True)
data_df

Unnamed: 0,PDB,Chain,Sequence,Total_seq_lengths,Chain_seq_lengths,BS,Affinity_nM,pAff,loaKa,SMILES
0,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,601,601,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,...",550.000,6.259637,6.26,CC(O)[NH2+]C1C(O)C=C(C(=O)[O-])OC1C(O)C(O)CO
1,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,304,304,"27,100,101,102,103,104,105,106,108,110,111,112...",6600.000,5.180456,5.18,[NH3+]C(C1=CC=CC=C1)P([O])([O])=O
2,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48...",0.031,10.508638,10.51,CC1=C(OCC(=O)NC(CC2=CC=CC=C2)C(O)CC(CC2=CC=CC=...
3,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.210,9.677781,9.68,COC1=CC=C(S(=O)(=O)N(CC(C)C)CC(O)C(CC2=CC=CC=C...
4,1ec3,"A,B",PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.920,9.036212,9.04,CC(C)C(NC(=O)C(OCC1=CC=CC=C1)C(O)C(O)C(OCC1=CC...
...,...,...,...,...,...,...,...,...,...,...
5117,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,262,262,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5...",142.000,6.847712,6.85,CC1=CC=C(C(=O)NC2=CC=CC(C(F)(F)F)=C2)C=C1NC1=N...
5118,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,245,245,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14...",88.000,7.055517,7.06,N[C](N)NC(=O)C1=NC(C2=CC3=C(C=CC=C3F)O2)=C(N2C...
5119,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,287,53234,"53,54,94,98,132,133,134,135,136,137,138,140,17...",2.000,8.698970,8.70,CC(C)[NH+]1CCC(NS(=O)(=O)CCNC(=O)C2=CC=C(Cl)S2...
5120,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,201,201,"42,70,71,102,103,104,105,106,107,108,109,110,1...",169000.000,3.772113,3.77,NC(=O)CCC(NC(=O)C1CCC(=O)N1)C(=O)NC(CC1=CNC2=C...


In [130]:
def get_SMILES_length(df):
    index = [True if len(smi) <= 150 else False for smi in df.SMILES.values]
    return index

In [131]:
smiles_index = get_SMILES_length(data_df)

In [132]:
data_df = data_df.loc[smiles_index].reset_index(drop=True)

In [133]:
data_df

Unnamed: 0,PDB,Chain,Sequence,Total_seq_lengths,Chain_seq_lengths,BS,Affinity_nM,pAff,loaKa,SMILES
0,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,601,601,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,...",550.000,6.259637,6.26,CC(O)[NH2+]C1C(O)C=C(C(=O)[O-])OC1C(O)C(O)CO
1,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,304,304,"27,100,101,102,103,104,105,106,108,110,111,112...",6600.000,5.180456,5.18,[NH3+]C(C1=CC=CC=C1)P([O])([O])=O
2,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48...",0.031,10.508638,10.51,CC1=C(OCC(=O)NC(CC2=CC=CC=C2)C(O)CC(CC2=CC=CC=...
3,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.210,9.677781,9.68,COC1=CC=C(S(=O)(=O)N(CC(C)C)CC(O)C(CC2=CC=CC=C...
4,1ec3,"A,B",PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...,198,9999,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.920,9.036212,9.04,CC(C)C(NC(=O)C(OCC1=CC=CC=C1)C(O)C(O)C(OCC1=CC...
...,...,...,...,...,...,...,...,...,...,...
5117,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,262,262,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5...",142.000,6.847712,6.85,CC1=CC=C(C(=O)NC2=CC=CC(C(F)(F)F)=C2)C=C1NC1=N...
5118,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,245,245,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14...",88.000,7.055517,7.06,N[C](N)NC(=O)C1=NC(C2=CC3=C(C=CC=C3F)O2)=C(N2C...
5119,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,287,53234,"53,54,94,98,132,133,134,135,136,137,138,140,17...",2.000,8.698970,8.70,CC(C)[NH+]1CCC(NS(=O)(=O)CCNC(=O)C2=CC=C(Cl)S2...
5120,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,201,201,"42,70,71,102,103,104,105,106,107,108,109,110,1...",169000.000,3.772113,3.77,NC(=O)CCC(NC(=O)C1CCC(=O)N1)C(=O)NC(CC1=CNC2=C...


In [134]:
data_df.to_csv("/workspace/binding_affinity/datasets/validation2020/validations2020.tsv", sep="\t", index=False)
data_dff = data_df.iloc[:, [0, 1, 2, 5, 6, 7, 9]]
data_dff

Unnamed: 0,PDB,Chain,Sequence,BS,Affinity_nM,pAff,SMILES
0,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,...",550.000,6.259637,CC(O)[NH2+]C1C(O)C=C(C(=O)[O-])OC1C(O)C(O)CO
1,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,"27,100,101,102,103,104,105,106,108,110,111,112...",6600.000,5.180456,[NH3+]C(C1=CC=CC=C1)P([O])([O])=O
2,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48...",0.031,10.508638,CC1=C(OCC(=O)NC(CC2=CC=CC=C2)C(O)CC(CC2=CC=CC=...
3,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.210,9.677781,COC1=CC=C(S(=O)(=O)N(CC(C)C)CC(O)C(CC2=CC=CC=C...
4,1ec3,"A,B",PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.920,9.036212,CC(C)C(NC(=O)C(OCC1=CC=CC=C1)C(O)C(O)C(OCC1=CC...
...,...,...,...,...,...,...,...
5117,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5...",142.000,6.847712,CC1=CC=C(C(=O)NC2=CC=CC(C(F)(F)F)=C2)C=C1NC1=N...
5118,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14...",88.000,7.055517,N[C](N)NC(=O)C1=NC(C2=CC3=C(C=CC=C3F)O2)=C(N2C...
5119,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,"53,54,94,98,132,133,134,135,136,137,138,140,17...",2.000,8.698970,CC(C)[NH+]1CCC(NS(=O)(=O)CCNC(=O)C2=CC=C(Cl)S2...
5120,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,"42,70,71,102,103,104,105,106,107,108,109,110,1...",169000.000,3.772113,NC(=O)CCC(NC(=O)C1CCC(=O)N1)C(=O)NC(CC1=CNC2=C...


In [135]:
df = data_dff[data_df["pAff"].notna()].reset_index(drop=True)
df

Unnamed: 0,PDB,Chain,Sequence,BS,Affinity_nM,pAff,SMILES
0,1wcq,A,GEPLYTEQDLAVNGREGFPNYRIPALTVTPDGDLLASYDGRPTGID...,"21,22,23,38,40,44,45,46,47,48,50,83,84,85,100,...",550.000,6.259637,CC(O)[NH2+]C1C(O)C=C(C(=O)[O-])OC1C(O)C(O)CO
1,4u73,A,YRYTGKLRPHYPLMPTRPVPSYIQRPDYADHPLGMSESEQALKGTS...,"27,100,101,102,103,104,105,106,108,110,111,112...",6600.000,5.180456,[NH3+]C(C1=CC=CC=C1)P([O])([O])=O
2,6dj1,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,"7,22,23,24,25,26,27,28,29,30,31,44,45,46,47,48...",0.031,10.508638,CC1=C(OCC(=O)NC(CC2=CC=CC=C2)C(O)CC(CC2=CC=CC=...
3,4zip,"A,B",PQITLWKRPLVTIKIGGQLKEALLDTGADDTVIEEMSLPGRWKPKM...,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.210,9.677781,COC1=CC=C(S(=O)(=O)N(CC(C)C)CC(O)C(CC2=CC=CC=C...
4,1ec3,"A,B",PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM...,"7,8,9,22,23,24,25,26,27,28,29,30,31,44,45,46,4...",0.920,9.036212,CC(C)C(NC(=O)C(OCC1=CC=CC=C1)C(O)C(O)C(OCC1=CC...
...,...,...,...,...,...,...,...
5115,6fnj,A,AKEIDVSYVKIEEVIGAGEFGEVCRGRLKAPGKKESCVAIKTLKGG...,"14,15,16,17,18,21,22,23,24,36,37,38,39,40,41,5...",142.000,6.847712,CC1=CC=C(C(=O)NC2=CC=CC(C(F)(F)F)=C2)C=C1NC1=N...
5116,5zc5,U,IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATH...,"1,29,30,43,45,46,86,90,91,93,96,136,141,143,14...",88.000,7.055517,N[C](N)NC(=O)C1=NC(C2=CC3=C(C=CC=C3F)O2)=C(N2C...
5117,4a7i,"A,B",RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYP...,"53,54,94,98,132,133,134,135,136,137,138,140,17...",2.000,8.698970,CC(C)[NH+]1CCC(NS(=O)(=O)CCNC(=O)C2=CC=C(Cl)S2...
5118,1kui,A,QRFPQRYIELAIVVDHGMYTKYSSNFKKIRKRVHQMVSNINEMCRP...,"42,70,71,102,103,104,105,106,107,108,109,110,1...",169000.000,3.772113,NC(=O)CCC(NC(=O)C1CCC(=O)N1)C(=O)NC(CC1=CNC2=C...


In [136]:
df.to_csv("/workspace/binding_affinity/datasets/validation2020/validation2020.tsv", sep="\t", index=False)

In [4]:
import pandas as pd

core_path = "/workspace/binding_affinity/datasets/CSAR-HiQ/CSAR47.tsv"
validation_path = "/workspace/binding_affinity/datasets/validation2020/validation_wo_CASF-2016andcore2016and36.tsv"
# output_path = "/workspace/binding_affinity/datasets/validation2020/validation_wo_core2016.tsv"  
output_path = "/workspace/binding_affinity/datasets/validation2020/validation_wo_CASF-2016andcore2016and3647.tsv"  

core_df = pd.read_csv(core_path, sep='\t')
validation_df = pd.read_csv(validation_path, sep='\t')

compare_column = 'PDB'

core_ids = set(core_df[compare_column].astype(str).str.upper())
validation_df['PDB_upper'] = validation_df[compare_column].astype(str).str.upper()

deduplicated_df = validation_df[~validation_df['PDB_upper'].isin(core_ids)].drop(columns=['PDB_upper'])

deduplicated_df.to_csv(output_path, sep='\t', index=False)

removed_count = len(validation_df) - len(deduplicated_df)
print(f"제거된 중복 항목 수: {removed_count}")
print(f"중복 제거된 검증 세트가 저장되었습니다: {output_path}")


제거된 중복 항목 수: 1
중복 제거된 검증 세트가 저장되었습니다: /workspace/binding_affinity/datasets/validation2020/validation_wo_CASF-2016andcore2016and3647.tsv


In [5]:
import pandas as pd

df = pd.read_csv("/workspace/binding_affinity/datasets/validation2020/validation_wo_CASF-2016andcore2016and3647.tsv", sep="\t")

val_df = df.sample(n=2000, random_state=42)

val_df.to_csv("/workspace/binding_affinity/datasets/validation2020/validation2020.tsv", sep="\t", index=False)

print("Total samples:", len(df))
print("Validation samples:", len(val_df))


Total samples: 4852
Validation samples: 2000


In [6]:
import pandas as pd

val = pd.read_csv("/workspace/binding_affinity/datasets/validation2020/validation_wo_CASF-2016andcore2016and3647.tsv", sep="\t")
val2000 = pd.read_csv("/workspace/binding_affinity/datasets/validation2020/validation2020.tsv", sep="\t")

result = val.merge(val2000, how="outer", indicator=True)
result = result[result["_merge"] == "left_only"].drop(columns=["_merge"])

result.to_csv("/workspace/binding_affinity/datasets/train2020/training_2.tsv", sep="\t", index=False)

print(f"원래 {len(val)}개 → 남은 {len(result)}개")


원래 4852개 → 남은 2852개
