## PADIF - test
### Docking the molecules

In [1]:
from ccdc.docking import Docker
from ccdc.io import MoleculeReader, EntryReader, MoleculeWriter
from ccdc.protein import Protein
from ccdc.io import EntryWriter
import os
from ccdc.entry import Entry
import pandas as pd
from ccdc.molecule import Molecule
from ccdc import conformer
from ccdc import io

Could not find the CSD database in the CSD Software installation.  Data dependent features will not be available.

For further help with installing and configuring data please visit
the support page at https://www.ccdc.cam.ac.uk/csds_install_help



In [2]:
### Select the path 
parentDir = os.getcwd()
dirName = "ada"
path = os.path.join(parentDir,dirName)
os.chdir(path)

In [2]:
### Protein preparation

### Open the protein and prepare
prot1 = "1ndw.pdb"
prot = Protein.from_file(prot1)

### Remove waters
prot.remove_all_waters()

### select only one chain if prot has more than one
if len(prot.chains) >= 2:
    identifiers = []
    for idx, chain in enumerate(prot.chains):
        if idx > 0:
            identifiers.append(chain.identifier)
    for id in identifiers:
        prot.remove_chain(id)

### Reference folder
ref_dir = os.path.join(path, "ref_folder")
os.makedirs(ref_dir, exist_ok=True)

### Save the biggest ligand
weight = []
for lig in prot.ligands:
    weight.append(lig.molecular_weight)
for lig in prot.ligands:
   if lig.molecular_weight == max(weight):
        with MoleculeWriter(f"{dirName}_ligand.mol2") as mol_writer:
            mol_writer.write(lig)
        with MoleculeWriter(f"{ref_dir}/{dirName}_ligand.mol2") as mol_writer:
            mol_writer.write(lig)

### Remove the ligands
ligands = prot.ligands
for l in ligands:
    prot.remove_ligand(l.identifier)

### Add hydrogens
prot.add_hydrogens()

### save the protein
with MoleculeWriter(f"{dirName}_prep.mol2") as proteinWriter:
    proteinWriter.write(prot)

In [3]:
### Open Ligands
actives =  pd.read_csv("actives_final.ism", sep = " ", header=None)
actives.columns = ["SMILES", "num", "ID"]
actives = actives[["SMILES", "ID"]]

decoys = pd.read_csv("decoys_final.ism", sep = " ", header=None)
decoys.columns = ["SMILES", "ID"]
decoys.head()

Unnamed: 0,SMILES,ID
0,Cc1ccc(cc1I)C(=O)NC(=S)NNC(=S)N,C06503852
1,C[C@H]1CCC[C@H]([C@@H]1C)NC(=O)[C@@H](C)Sc2nc(...,C07692711
2,CC[C@@H](C)[C@H](C(=O)NNC(=O)CC12CC3CC(C1)CC(C...,C07978054
3,C[C@@H](CCC(=O)NN)[C@H]1CC[C@@H]2[C@@]1(CC[C@@...,C08497416
4,C[C@@H](C(=O)Nc1cc(ccc1Cl)C(F)(F)F)Sc2nc(cc(n2...,C08723910


In [8]:
print(actives.shape)
decoys.shape

(93, 2)


(5450, 2)

In [4]:
import glob
### Prepare reference ligands

### cahnge to reference directory
os.chdir(ref_dir)
### Open all molecules
all_ref = glob.glob("*.mol2")
counter = 0
### Prepare and save molecules as references
for mol in all_ref:
    mol = MoleculeReader(mol)
    ### Prepare entries, I desactivate protonation protocol, beacuse this is uncompatible with protonation_states
    ligand_prep = Docker.LigandPreparation()
    ligand_prep.settings.protonate = True
    ligand_prep.standardise_bond_types = True
    prep_lig = ligand_prep.prepare(Entry.from_molecule(mol[0]))
    counter += 1
    ### Write molecule 
    with MoleculeWriter(f"{ref_dir}/ref_{counter}.mol2") as mol_writer:
            mol_writer.write(prep_lig.molecule)
os.chdir(path)


In [5]:
### do the function to preparte the molecules

def prep_ligand_from_smiles(smiles, id, dir):
    ### molecule from smiles
    lig_molecule = Molecule.from_string(smiles, format="smiles")
    ### Pass ligands to molecule format for GOLD, generating 3d coordinates
    con_gen = conformer.ConformerGenerator()
    con_gen.settings.max_conformers = 1
    lig_mol_3d = con_gen.generate(lig_molecule)
    ### Prepare entries, I desactivate protonation protocol, beacuse this is uncompatible with protonation_states
    ligand_prep = Docker.LigandPreparation()
    ligand_prep.settings.protonate = False
    ligand_prep.standardise_bond_types = True
    prep_lig = ligand_prep.prepare(Entry.from_molecule(lig_mol_3d[0].molecule))
    ### Write molecule 
    with MoleculeWriter(f"{dir}/{id}.mol2") as mol_writer:
            mol_writer.write(prep_lig.molecule)
    return prep_lig

In [6]:
### Create active directory
act_dir = os.path.join(path, "actives")
os.makedirs(act_dir, exist_ok=True)

In [7]:
### Prepare actives
if __name__ == "__main__":
    from multiprocessing import Pool
    pool = Pool(processes=25)
    for row in actives.iloc:
        pool.apply_async(prep_ligand_from_smiles, (row["SMILES"], row["ID"], act_dir))
    pool.close()
    pool.join()

INFO: INFO: INFO: INFO: INFO: INFO: INFO: INFO: INFO: INFO: INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogulINFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogulINFO: INFO: INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogulINFO: INFO: INFO: 
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul

/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul

INFO: 
INFO: 
/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul
/appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul
/appl/ccdc/CSDS2020/CSD_2020/bin/mogul/appl/ccdc/CSDS2020/CSD_2020/bin/mogul
/appl/c

In [9]:
### Create decoys directory
dec_dir = os.path.join(path, "decoys")
os.makedirs(dec_dir, exist_ok=True)

### Prepare decoys
if __name__ == "__main__":
    from multiprocessing import Pool
    pool = Pool(processes=25)
    for row in decoys.iloc:
        pool.apply_async(prep_ligand_from_smiles, (row["SMILES"], row["ID"], dec_dir))
    pool.close()
    pool.join()

INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /appl/ccdc/CSDS2020/CSD_2020/bin/mogul
INFO: /app

In [20]:
### create the function to do the configuration file

def gold_config(protein, ref_ligand):
    ### Change the directory and work here
    os.chdir(path)

    ### call the functions to dock
    docker = Docker()
    settings = docker.settings
    
    ### call protein and native ligand for select the binding site
    prot_1 = protein
    settings.add_protein_file(prot_1)
    native_ligand = ref_ligand
    native_ligand_mol = MoleculeReader(native_ligand)[0]
    prot_dock = settings.proteins[0]

    ### Select parameters to dock: binding site, fitness_function, autoscale, and others
    settings.binding_site = settings.BindingSiteFromLigand(prot_dock, native_ligand_mol, 8.0)
    settings.fitness_function = "PLP"
    settings.autoscale = 100
    settings.early_termination = False
    settings.write_options = "NO_LOG_FILES NO_LINK_FILES NO_RNK_FILES NO_BESTRANKING_LST_FILE NO_GOLD_PROTEIN_MOL2_FILENO_LGFNAME_FILE NO_PID_FILENO_SEED_LOG_FILE NO_GOLD_ERR_FILE NO_FIT_PTS_FILES NO_GOLD_LIGAND_MOL2_FILE"
    settings.flip_amide_bonds = True
    settings.flip_pyramidal_nitrogen = True
    settings.flip_free_corners = True
    settings.diverse_solutions = (True, 10, 1.5)

    ### save the configuration file to modify
    Docker.Settings.write(settings,"gold.conf")

    ### Add to config file "per_atom_scores"
    with open("gold.conf", "r") as inFile:
        text = inFile.readlines()

    with open("gold.conf", "w") as outFile:
        for line in text:
            if line == "  SAVE OPTIONS\n":
                line = line + "per_atom_scores = 1\n"
            outFile.write(line)
    
    with open("gold.conf", "r") as inFile:
        text = inFile.readlines()

    with open("gold.conf", "w") as outFile:
        for line in text:
            outFile.write(line.replace("make_subdirs = 0\n", "make_subdirs = 1\n"))

In [21]:
### Do for the first ligand and protein
gold_config(f"{dirName}_prep.mol2", f"{dirName}_ligand.mol2")

In [17]:
### create the function to do the docking
def dock_mol(confFile, id, dir):
    conf = confFile
    settings = Docker.Settings.from_file(conf)
    ligand = f"{id}.mol2"
    settings.add_ligand_file(ligand, 100)
    settings.output_directory = dir
    settings.output_file = f"{id}_sln.mol2"
    docker = Docker(settings = settings).dock(f"{dir}/{id}.conf")
    return docker

In [22]:
### Dock the references
if __name__ == "__main__":
        from multiprocessing import Pool
        pool = Pool(processes=25)
        for num in range(counter):
            pool.apply_async(dock_mol, ("gold.conf", f"ref_{num+1}", ref_dir))
        pool.close()
        pool.join()

Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/ref_folder/ref_4.confStarting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/ref_folder/ref_3.confStarting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/ref_folder/ref_2.confStarting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/ref_folder/ref_5.conf



Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/ref_folder/ref_1.conf
Setting up GOLD environment...
Setting up GOLD environment...
Setting up GOLD environment...
Setting up GOLD environment...
GOLD Version 5.8.1
GOLD Version 5.8.1
GOLD Version 5.8.1GOLD Version 5.8.1

Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_linux_64/bin/gold_linux_64" "/nfs/home/dvictori/Documents/Dockings_results/val/ada/ref_folder/ref_5.conf"

Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_linux_64/bin/gold_linux_64" "/nfs/home/d

In [24]:
### Dock the actives
if __name__ == "__main__":
        from multiprocessing import Pool
        pool = Pool(processes=25)
        for row in actives.iloc:
            pool.apply_async(dock_mol, ("gold.conf", row["ID"], act_dir))
        pool.close()
        pool.join()

Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL295003.conf
Setting up GOLD environment...
GOLD Version 5.8.1
Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_linux_64/bin/gold_linux_64" "/nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL295003.conf"

Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL123620.conf
Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL191267.conf
Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL189168.conf
Setting up GOLD environment...
GOLD Version 5.8.1
Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_linux_64/bin/gold_linux_64" "/nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL123620.conf"

Setting up GOLD environment...
GOLD Version 5.8.1
Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_



********************************************************************************
Ligand in file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL306270.mol2,
No torsional FF parameters for C.ar N.pl3 O.3 H torsion, bond type  1

No torsional FF parameters for H N.pl3 O.3 H torsion, bond type  1



Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL606666.conf
Setting up GOLD environment...
GOLD Version 5.8.1
Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_linux_64/bin/gold_linux_64" "/nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL606666.conf"

Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL43261.conf
Setting up GOLD environment...
GOLD Version 5.8.1
Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_linux_64/bin/gold_linux_64" "/nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL43261.conf"

Starting GOLD with conf file /nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL35724.conf
Setting up GOLD environment...
GOLD Version 5.8.1
Running:
 
     "/appl/ccdc/CSDS2020/Discovery_2020/GOLD/gold/d_linux_64/bin/gold_linux_64" "/nfs/home/dvictori/Documents/Dockings_results/val/ada/actives/CHEMBL35724.conf"

St

In [None]:
### Dock the decoys
if __name__ == "__main__":
        from multiprocessing import Pool
        pool = Pool(processes=25)
        for row in decoys.iloc:
            pool.apply_async(dock_mol, ("gold.conf", row["ID"], dec_dir))
        pool.close()
        pool.join()

### ChemPLP extraction and PADIF creation

In [3]:
### Select the folder to work
import os
import pandas as pd
import glob
import random

In [4]:
### do the function to extract chemplp
def chemplp_ext(file):    
    lista = []
    with open(file) as inFile:   
        for num, line in enumerate(inFile):
            if "> <Gold.PLP.Protein.Score.Contributions>\n" in line:
                for num2, line in enumerate(inFile):
                    if num2 < num:
                        lista.append(line)
    
    return lista

### Create a function to convert positives in neagtives
def sign_change(list):
    new_list = []
    for value in list:
        if value > 0:
            x = value*-1
        else:
            x = value
        new_list.append(x)
    return new_list

### Def function to create chemplp dataframes

def chemplp_dataframe(soln_files_names, etiquete):  
    chempl_list = []
    ids =  []
    for name in glob.glob(soln_files_names):
        chempl_list.append(chemplp_ext(name))
        val = name.split("gold_soln_")[1]
        ids.append(val.split("_")[0])

    ### Organize this list and create list of list
    list_0 = []
    for list in chempl_list: 
        list_1 = []
        for row in list:
            list_2 = []
            for value in row.split():
                list_2.append(value)
            list_1.append(list_2)
        list_0.append(list_1)

    ### Pass to Dataframe
    df = pd.DataFrame()
    for list in list_0:
        columns = list[0]
        df_n = pd.DataFrame(list, columns=columns)
        df =  df.append(df_n)

    ### Create a identifier of each molecule  
    counter = 0
    list_of_mols = [None]*len(df)
    for idx, row in enumerate(df["AtomID"]):
        if row == "AtomID":
            counter +=1
        list_of_mols[idx] = etiquete + "-" + str(counter)
    df["RunID"] = list_of_mols
    
    ### Delete the unuseful rows
    df = df.dropna()
    df = df[df["AtomID"] != "AtomID"]
    df = df.drop(columns=["PLP.total"])

    ### Change the type of data of values and sort by molecules
    df = df.astype({"ChemScore_PLP.Hbond": float, "ChemScore_PLP.CHO": float,
                    "ChemScore_PLP.Metal": float, "PLP.S(hbond)": float,
                    "PLP.S(metal)": float, "PLP.S(buried)": float,
                    "PLP.S(nonpolar)": float, "PLP.S(repulsive)": float})
    df = df.sort_values(by = ["RunID"], ignore_index= True)

    ### change the value for the fisrt 3 columns
    df["ChemScore_PLP.Hbond"] = sign_change(df["ChemScore_PLP.Hbond"])
    df["ChemScore_PLP.CHO"] = sign_change(df["ChemScore_PLP.Hbond"])
    df["ChemScore_PLP.Metal"] = sign_change(df["ChemScore_PLP.Hbond"])

    return df 

In [7]:
os.getcwd()
ref_dir = os.path.join(path, "ref_folder")
act_dir = os.path.join(path, "actives")
dec_dir = os.path.join(path, "decoys")

In [8]:
ref_df = chemplp_dataframe(f"{ref_dir}/*/*_1.mol2", "ref")
ref_df.head()

Unnamed: 0,AtomID,ChemScore_PLP.Hbond,ChemScore_PLP.CHO,ChemScore_PLP.Metal,PLP.S(hbond),PLP.S(metal),PLP.S(buried),PLP.S(nonpolar),PLP.S(repulsive),RunID
0,95,0.0,0.0,0.0,0.0,0.0,-0.03,0.0,0.0,ref-1
1,1682,0.0,0.0,0.0,0.0,0.0,0.0,-0.17,0.0,ref-1
2,1677,0.0,0.0,0.0,0.0,0.0,-0.46,0.0,0.0,ref-1
3,1676,0.0,0.0,0.0,0.0,0.0,-0.45,0.0,0.0,ref-1
4,1675,0.0,0.0,0.0,0.0,0.0,-0.04,-0.85,0.0,ref-1


In [11]:
act_df = chemplp_dataframe(f"{act_dir}/*/*_1.mol2", "act")
act_df.head()

Unnamed: 0,AtomID,ChemScore_PLP.Hbond,ChemScore_PLP.CHO,ChemScore_PLP.Metal,PLP.S(hbond),PLP.S(metal),PLP.S(buried),PLP.S(nonpolar),PLP.S(repulsive),RunID
0,97,0.0,0.0,0.0,0.0,0.0,0.0,-0.11,0.0,act-1
1,1656,0.0,0.0,0.0,0.0,0.0,-0.02,0.0,0.0,act-1
2,1654,0.0,0.0,0.0,0.0,0.0,-0.02,-0.07,0.0,act-1
3,1440,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,act-1
4,1438,0.0,0.0,0.0,0.0,0.0,0.0,-0.05,0.0,act-1


In [13]:
dec_df = chemplp_dataframe(f"{dec_dir}/*/*_1.mol2", "dec")
dec_df.head()

Unnamed: 0,AtomID,ChemScore_PLP.Hbond,ChemScore_PLP.CHO,ChemScore_PLP.Metal,PLP.S(hbond),PLP.S(metal),PLP.S(buried),PLP.S(nonpolar),PLP.S(repulsive),RunID
0,110,0.0,0.0,0.0,0.0,0.0,0.0,-0.47,0.0,dec-1
1,1673,0.0,0.0,0.0,0.0,0.0,-0.06,-0.13,0.0,dec-1
2,1659,0.0,0.0,0.0,0.0,0.0,0.0,-0.21,0.0,dec-1
3,1657,0.0,0.0,0.0,0.0,0.0,-0.01,-0.12,0.0,dec-1
4,1654,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,dec-1


In [14]:
print(ref_df.shape)
print(act_df.shape)
print(dec_df.shape)

(419, 10)
(7787, 10)
(304372, 10)


In [15]:
def padif_gen(reference_df, active_df, inactive_df):    

    ### Join the dataframes and do the chemplp
    df_tot = pd.concat([reference_df, active_df, inactive_df])

    ### Do the PADIF for each molecule and organize in a dataframe
    padif = df_tot.pivot(index="RunID", columns="AtomID")
    padif.columns = padif.columns.map('{0[0]}_{0[1]}'.format)
    padif.index = padif.index.tolist()
    padif.fillna(0.0, inplace= True)
    padif = padif.loc[:,(padif.sum(axis=0) != 0)]

    ### Add column to active or inactive classification
    Act_type = []
    for value in padif.index.tolist():
        if value[:3] == "act":
            Act_type.append("Active")
        elif value[:3] == "ref":
            Act_type.append("Reference")
        else:
            Act_type.append("Decoy")
    padif["Activity"] = Act_type

    return padif

In [16]:
padif = padif_gen(ref_df, act_df, dec_df)
padif.head()

Unnamed: 0,ChemScore_PLP.Hbond_111,ChemScore_PLP.Hbond_114,ChemScore_PLP.Hbond_1217,ChemScore_PLP.Hbond_1220,ChemScore_PLP.Hbond_1229,ChemScore_PLP.Hbond_129,ChemScore_PLP.Hbond_130,ChemScore_PLP.Hbond_1429,ChemScore_PLP.Hbond_1432,ChemScore_PLP.Hbond_1433,...,PLP.S(repulsive)_797,PLP.S(repulsive)_880,PLP.S(repulsive)_884,PLP.S(repulsive)_887,PLP.S(repulsive)_895,PLP.S(repulsive)_899,PLP.S(repulsive)_913,PLP.S(repulsive)_919,PLP.S(repulsive)_920,Activity
act-1,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Active
act-10,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Active
act-11,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-0.51,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Active
act-12,0.0,0.0,0.0,0.0,0.0,-0.85,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Active
act-13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,Active


In [19]:
padif["Activity"].value_counts()

Decoy        3773
Active         93
Reference       5
Name: Activity, dtype: int64

In [20]:
ref_padif = padif[padif["Activity"] == "Reference"]
ref_padif

Unnamed: 0,ChemScore_PLP.Hbond_111,ChemScore_PLP.Hbond_114,ChemScore_PLP.Hbond_1217,ChemScore_PLP.Hbond_1220,ChemScore_PLP.Hbond_1229,ChemScore_PLP.Hbond_129,ChemScore_PLP.Hbond_130,ChemScore_PLP.Hbond_1429,ChemScore_PLP.Hbond_1432,ChemScore_PLP.Hbond_1433,...,PLP.S(repulsive)_797,PLP.S(repulsive)_880,PLP.S(repulsive)_884,PLP.S(repulsive)_887,PLP.S(repulsive)_895,PLP.S(repulsive)_899,PLP.S(repulsive)_913,PLP.S(repulsive)_919,PLP.S(repulsive)_920,Activity
ref-1,-0.16,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Reference
ref-2,0.0,-1.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Reference
ref-3,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Reference
ref-4,0.0,0.0,0.0,0.0,0.0,-0.81,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Reference
ref-5,-0.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Reference
