In [1]:
import pandas as pd
from tqdm import tqdm
import os
import requests
import zipfile
from DeepPurpose import oneliner
from DeepPurpose.dataset import *

## Ligand conversion

In [2]:
DATA_FOLDER = "./molecules/"
url = 'https://storage.googleapis.com/indaba-challenge/molecules.zip'
r = requests.get(url, allow_redirects=True)
open('molecules.zip', 'wb').write(r.content)

# # Extract all the files with 
with zipfile.ZipFile('molecules.zip', 'r') as zip_ref:
    zip_ref.extractall(DATA_FOLDER)

In [3]:
extension = ".params"

In [4]:
import os
lines = None
smiles = 'SMILES'
DATA_FOLDER = "molecules/"
data = []
list_ligand_issues = []
start = -20
name = None
for filename in tqdm(os.listdir(DATA_FOLDER)):
    if filename.endswith(extension):
        filename = ".".join(filename.split('.')[:-1])+'.sdf'
        try:
            lines = open('{}{}'.format(DATA_FOLDER, filename)).read().split('\n')
        except:
            continue
        for num, line in enumerate(lines[start:]):
            if 'NAME' in line:
                name = lines[len(lines) + num + start + 1]
                continue
            if smiles in line:
                temp_smile = lines[len(lines) + num + start + 1]
                break
        if name is None:
            name = ".".join(filename.split('.')[:-1])
            
        name = "_".join(name.split()) + "_" + ".".join(filename.split('.')[:-1])
        if temp_smile is not None:
            data.append((name, temp_smile, filename))
        else:
            list_ligand_issues.append(DATA_FOLDER + filename)
        temp_smile = None
        name = None

100%|██████████| 12095/12095 [00:00<00:00, 34953.04it/s]


In [5]:
data = pd.DataFrame(data=data, columns=['Name', 'Smiles', 'Filename'])

In [6]:
data.head()

Unnamed: 0,Name,Smiles,Filename
0,medroxalol_MPQWSYJGFLADEW-PXAZEXFGSA-N,CC(CCc1ccc2c(c1)OCO2)NCC(O)c1ccc(O)c(C(N)=O)c1,MPQWSYJGFLADEW-PXAZEXFGSA-N.sdf
1,leucine_ROHFNLRQFUQHCH-YFKPBYRVSA-N,CC(C)C[C@H](N)C(=O)O,ROHFNLRQFUQHCH-YFKPBYRVSA-N.sdf
2,nortriptyline_PHVGLTMQBUFIQQ-UHFFFAOYSA-N,CNCCC=C1c2ccccc2CCc2ccccc21,PHVGLTMQBUFIQQ-UHFFFAOYSA-N.sdf
3,thiamazole_PMRYVIKBURPHAH-UHFFFAOYSA-N,Cn1cc[nH]c1=S,PMRYVIKBURPHAH-UHFFFAOYSA-N.sdf
4,salicylamide_SKZKKFZAGNVIMN-UHFFFAOYSA-N,NC(=O)c1ccccc1O,SKZKKFZAGNVIMN-UHFFFAOYSA-N.sdf


In [7]:
data.to_csv('Ligand_smiles.csv', index=False)

In [8]:
len(data)

4020

# Sample Drugs and Targets

In [10]:
targets = pd.read_csv('Leishmania_Target_Reduce_1.csv')[:10]
ligands = pd.read_csv('Ligand_smiles.csv')

In [11]:
ligands.head()

Unnamed: 0,Name,Smiles,Filename
0,medroxalol_MPQWSYJGFLADEW-PXAZEXFGSA-N,CC(CCc1ccc2c(c1)OCO2)NCC(O)c1ccc(O)c(C(N)=O)c1,MPQWSYJGFLADEW-PXAZEXFGSA-N.sdf
1,leucine_ROHFNLRQFUQHCH-YFKPBYRVSA-N,CC(C)C[C@H](N)C(=O)O,ROHFNLRQFUQHCH-YFKPBYRVSA-N.sdf
2,nortriptyline_PHVGLTMQBUFIQQ-UHFFFAOYSA-N,CNCCC=C1c2ccccc2CCc2ccccc21,PHVGLTMQBUFIQQ-UHFFFAOYSA-N.sdf
3,thiamazole_PMRYVIKBURPHAH-UHFFFAOYSA-N,Cn1cc[nH]c1=S,PMRYVIKBURPHAH-UHFFFAOYSA-N.sdf
4,salicylamide_SKZKKFZAGNVIMN-UHFFFAOYSA-N,NC(=O)c1ccccc1O,SKZKKFZAGNVIMN-UHFFFAOYSA-N.sdf


In [12]:
targets.head()

Unnamed: 0,Target_Name,Sequence
0,A0A640KCC9.0.apo,YPIINFTTAGATVQSYTNFIRAVRGRLTTGADVRHEIPVLPNRVGL...
1,E9BAY0.0.apo,MSRLMPHYSKGKTAFLCVDLQEAFSKRIENFANCVFVANRLARLHE...
2,O44010.0.apo,QYPIINFTTAGATVQSYTNFIRAVRGRLTTGADVRHEIPVLPNRVG...
3,Q4Q4I0.0.apo,SGRENLYFQGMTETFAFQAEINQLMSLIINTFYSNKEIFLRELISN...
4,Q25318.0.apo,VPDAVDWREKGAVTPVKDQGACGSCWAFSAVGNIEGQWYLAGHELV...


# Drug Selection

In [15]:
def formatLigand(ligands, filename):
    with open(filename, 'w') as f:
        for line in ligands.iterrows():
            f.write('{} {}\n'.format(line[1]['Name'], line[1]['Smiles']))

def formatTarget(target, filename):
    with open(filename, 'w') as f:
        for line in target.iterrows():
            f.write('{} {}\n'.format(line[1]['Target_Name'].split()[0], line[1]['Sequence']))

def BatchDrugRepurposing(ligands, target, step, filenameLigands='repurpose.txt', filenameTarget='target.txt'):
    formatLigand(ligands, filenameLigands)
    formatTarget(target, filenameTarget)
    targetName = target.iloc[0]['Target_Name']
    oneliner.repurpose(*read_file_target_sequence(filenameTarget), *read_file_repurposing_library(filenameLigands))
    outputFilenameRepurposing ="{}_{}".format(targetName, step)
    os.rename(r'./save_folder/results_aggregation/repurposing.txt', r'./save_folder/results_aggregation/' + targetName + str(step) + '.txt')
    
def DrurgRepurposing(ligands, target, batchSize = 100):
    ligandsSize = len(ligands)
    nStep = ligandsSize // batchSize
    remain = ligandsSize % batchSize != 0
    for step in tqdm(range(nStep), position=0, leave=True):
        start, end = step * batchSize, (step + 1) * batchSize
        batchLigands = ligands[start:end]
        BatchDrugRepurposing(batchLigands, target, step)
    
    if remain:
        start, end = nStep * batchSize, ligandsSize
        batchLigands = ligands[start:end]
        BatchDrugRepurposing(batchLigands, target, nStep)

In [16]:
listTargets = targets['Target_Name'].tolist()

In [None]:
%%time
for target in tqdm(listTargets, position=0, leave=True):
    target = targets[targets['Target_Name'] == target]
    DrurgRepurposing(ligands, target)

  0%|          | 0/40 [00:00<?, ?it/s]

Save path not found or given and set to default: './save_folder/'. 
Loading customized repurposing dataset...
Beginning Downloading Pretrained Model...
Note: if you have already download the pretrained model before, please stop the program and set the input parameter 'pretrained_dir' to the path
Downloading finished... Beginning to extract zip file...
Pretrained Models Successfully Downloaded...
Using pretrained model and making predictions...
repurposing...
in total: 100 drug-target pairs
encoding drug...
unique drugs: 100
drug encoding finished...
encoding protein...
unique target sequence: 1
protein encoding finished...
Done.
predicting...
---------------
Predictions from model 1 with drug encoding MPNN and target encoding CNN are done...
-------------
repurposing...
in total: 100 drug-target pairs
encoding drug...
unique drugs: 100
drug encoding finished...
encoding protein...
unique target sequence: 1
protein encoding finished...
Done.
predicting...
---------------
Predictions fro

  2%|▎         | 1/40 [00:29<19:01, 29.28s/it]

models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+--------------------------------------------------------+------------------+---------------+
| Rank |                       Drug Name                        |   Target Name    | Binding Score |
+------+--------------------------------------------------------+------------------+---------------+
|  1   | beclometasone_dipropionate_KUVIULQEHSCUHY-XYWKZLDCSA-N | A0A640KCC9.0.apo |     25.87     |
|  2   |         naltrexone_DQCKKXVULJGBQN-XFWGSAIBSA-N         | A0A640KCC9.0.apo |     167.18    |
|  3   |         hycanthone_MFZWMTSUNYWVBU-UHFFFAOYSA-N         | A0A640KCC9.0.apo |     193.66    |
|  4   |       benzomethamine_XCEPXSCPQIRLCL-UHFFFAOYSA-N       | A0A640KCC9.0.apo |     199.70    |
|  5   |         pinaverium_DDHUTBKXLWCZCO-CEMLEFRQSA-N         | A0A640KCC9.0.apo |     212.54    |
|  6   |         cinalukast_BZMKNPGKXJAIDV-VAWYXSNFSA-N         | A0A640KCC9.0

  5%|▌         | 2/40 [00:58<18:33, 29.30s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+-------------------------------------------------------+------------------+---------------+
| Rank |                       Drug Name                       |   Target Name    | Binding Score |
+------+-------------------------------------------------------+------------------+---------------+
|  1   |  dexamethasone_acefurate_DDIWRLSEGOVQQD-BJRLRHTOSA-N  | A0A640KCC9.0.apo |     13.54     |
|  2   |        deflazacort_FBHSPRKOSMHSIF-GRMWVWQJSA-N        | A0A640KCC9.0.apo |     16.53     |
|  3   |        mazipredone_CZBOZZDZNVIXFC-VRRJBYJJSA-N        | A0A640KCC9.0.apo |     23.43     |
|  4   |      ingenol_mebutate_VDJHFHXMUKFKET-WDUFCVPESA-N     | A0A640KCC9.0.apo |     29.08     |
|  5   |        lynestrenol_YNVGQYHLRCDXFQ-XGXHKTLJSA-N        | A0

  8%|▊         | 3/40 [01:27<17:56, 29.10s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+-----------------------------------------------------------+------------------+---------------+
| Rank |                         Drug Name                         |   Target Name    | Binding Score |
+------+-----------------------------------------------------------+------------------+---------------+
|  1   |     betamethasone_benzoate_SOQJPQZCPBDOMF-YCUXZELOSA-N    | A0A640KCC9.0.apo |     12.40     |
|  2   |    triamcinolone_acetonide_YNDXUCZADRHECN-JNQJZLCISA-N    | A0A640KCC9.0.apo |     13.36     |
|  3   |     loteprednol_etabonate_DMKSVUSAATWOCU-HROMYWEYSA-N     | A0A640KCC9.0.apo |     48.15     |
|  4   |          drospirenone_METQSPRSQINEEU-HXCATZOESA-N         | A0A640KCC9.0.apo |     96.09     |
|  5   |          pipenzolate_WPUKUEMZZ

 10%|█         | 4/40 [01:55<17:14, 28.73s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+---------------------------------------------------+------------------+---------------+
| Rank |                     Drug Name                     |   Target Name    | Binding Score |
+------+---------------------------------------------------+------------------+---------------+
|  1   |     dexamethasone_UREBDLICKHMUKA-CXSFZGCWSA-N     | A0A640KCC9.0.apo |      6.14     |
|  2   |       fytic_acid_IMQLKJBTEOYOSI-GPIVLXJGSA-N      | A0A640KCC9.0.apo |     77.68     |
|  3   |      testosterone_MUMGGOZAMZWBJJ-DYKIIFRCSA-N     | A0A640KCC9.0.apo |     84.88     |
|  4   |       oxitropium_NVOYVOBDTVTBDX-ZVDAJOARSA-N      | A0A640KCC9.0.apo |     94.81     |
|  5   |     fluocortolone_GAKMQHDJQHZUTJ-ULHLPKEOSA-N     | A0A640KCC9.0.apo |     119.96    |

 12%|█▎        | 5/40 [02:43<20:15, 34.73s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+----------------------------------------------------------+------------------+---------------+
| Rank |                        Drug Name                         |   Target Name    | Binding Score |
+------+----------------------------------------------------------+------------------+---------------+
|  1   |         halometasone_GGXMRPUKBWXVHE-MIHLVHIWSA-N         | A0A640KCC9.0.apo |      9.42     |
|  2   |        norvinisterone_VOJYZDFYEHKHAP-XGXHKTLJSA-N        | A0A640KCC9.0.apo |     26.56     |
|  3   |         norgesterone_YPVUHOBTCWJYNQ-SLHNCBLASA-N         | A0A640KCC9.0.apo |     31.42     |
|  4   |          prednival_BOFKYYWJAOZDPB-FZNHGJLXSA-N           | A0A640KCC9.0.apo |     84.03     |
|  5   | methylprednisolone_aceponate_DALKLAYL

 15%|█▌        | 6/40 [03:20<20:04, 35.44s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+-------------------------------------------------------+------------------+---------------+
| Rank |                       Drug Name                       |   Target Name    | Binding Score |
+------+-------------------------------------------------------+------------------+---------------+
|  1   |     mometasone_furoate_WOFMFGQZHJDGCX-ZULDAHANSA-N    | A0A640KCC9.0.apo |      3.88     |
|  2   |         nalbuphine_NETZHAKZCGBWSS-CEDHKZHLSA-N        | A0A640KCC9.0.apo |     33.93     |
|  3   |        formebolone_AMVODTGMYSRMNP-GNIMZFFESA-N        | A0A640KCC9.0.apo |     45.57     |
|  4   |      ethinylestradiol_BFPYWIDHMRZLRN-SLHNCBLASA-N     | A0A640KCC9.0.apo |     72.39     |
|  5   |       androstenediol_QADHLRWLCPCEKT-LOVVWNRFSA-N      | A0

 18%|█▊        | 7/40 [03:55<19:16, 35.04s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+--------------------------------------------------------+------------------+---------------+
| Rank |                       Drug Name                        |   Target Name    | Binding Score |
+------+--------------------------------------------------------+------------------+---------------+
|  1   |          metopon_NPZXCTIHHUUEEJ-CMKMFDCUSA-N           | A0A640KCC9.0.apo |     63.85     |
|  2   |        aclarubicin_USZYSDMBJDPRIF-SVEJIMAYSA-N         | A0A640KCC9.0.apo |     91.42     |
|  3   | methylprednisolone_acetate_PLBHSZGDDKCEHR-LFYFAGGJSA-N | A0A640KCC9.0.apo |     113.53    |
|  4   |        azithromycin_MQTOSJVFKKJCRP-BICOPXKESA-N        | A0A640KCC9.0.apo |     148.27    |
|  5   |        gitoformate_DOMHWKQEPDYUQX-QDMRMOHQSA-N     

 20%|██        | 8/40 [04:26<18:02, 33.83s/it]

Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+-----------------------------------------------------+------------------+---------------+
| Rank |                      Drug Name                      |   Target Name    | Binding Score |
+------+-----------------------------------------------------+------------------+---------------+
|  1   |       mestanolone_WYZDXEKUWRCKOB-YDSAWKJFSA-N       | A0A640KCC9.0.apo |     40.52     |
|  2   |        sirolimus_QFJCIRLUMZQUOT-BLBZBZKNSA-N        | A0A640KCC9.0.apo |     51.60     |
|  3   |       testolactone_BPEWUONYVDABNZ-DZBHQSCQSA-N      | A0A640KCC9.0.apo |     73.62     |
|  4   |        enzacamene_HEOCBCNFKCOKBX-RELGSGGGSA-N       | A0A640KCC9.0.apo |     97.10     |
|  5   |    dihydroergocornine_SEALOBQTUQIVGU-QNIJNHAOSA-N   | A0A640KCC9.0.apo |     108.69    |

 22%|██▎       | 9/40 [05:02<17:53, 34.63s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+---------------------------------------------------------+------------------+---------------+
| Rank |                        Drug Name                        |   Target Name    | Binding Score |
+------+---------------------------------------------------------+------------------+---------------+
|  1   |      dihydrotachysterol_ILYCWAKSDCYMBB-OPCMSESCSA-N     | A0A640KCC9.0.apo |     74.25     |
|  2   |        hydromorphone_WVLOADHCBXTIJK-YNHQPCIGSA-N        | A0A640KCC9.0.apo |     152.80    |
|  3   |         calcifediol_JWUBBDSIWDLEOM-DTOXIADCSA-N         | A0A640KCC9.0.apo |     271.99    |
|  4   |   hydroxyestrone_diacetate_QZQSENRWYLQIPC-JPVHLGFFSA-N  | A0A640KCC9.0.apo |     279.69    |
|  5   |          romidepsin_OHRURASPPZQGQM-HMLXJHLFS

 25%|██▌       | 10/40 [05:33<16:47, 33.59s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+-----------------------------------------------------------------------+------------------+---------------+
| Rank |                               Drug Name                               |   Target Name    | Binding Score |
+------+-----------------------------------------------------------------------+------------------+---------------+
|  1   |               norethisterone_VIKNJXKGJWUCNN-XGXHKTLJSA-N              | A0A640KCC9.0.apo |     27.41     |
|  2   |                artemisinin_BLUAFEHZUWYNDE-NNWCWBAJSA-N                | A0A640KCC9.0.apo |     27.54     |
|  3   |               androisoxazole_NSYTUNFHWYMMHU-IYRCEVNGSA-N              | A0A640KCC9.0.apo |     34.71     |
|  4   |                elcometrine_CKFBRGLGTWAVLG-GOMYTPFNSA-N        

 28%|██▊       | 11/40 [06:08<16:25, 33.98s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+-----------------------------------------------------+------------------+---------------+
| Rank |                      Drug Name                      |   Target Name    | Binding Score |
+------+-----------------------------------------------------+------------------+---------------+
|  1   |       calusterone_IVFYLRMMHVYGJH-PVPPCFLZSA-N       | A0A640KCC9.0.apo |     17.93     |
|  2   | dexamethasone_phosphate_VQODGRNSFPNSQE-CXSFZGCWSA-N | A0A640KCC9.0.apo |     32.42     |
|  3   |       halcinonide_MUQNGPZZQDCDFT-JNQJZLCISA-N       | A0A640KCC9.0.apo |     43.59     |
|  4   |        lorajmine_LAHDERDHXJFFJU-ZWNKPRIXSA-N        | A0A640KCC9.0.apo |     205.52    |
|  5   | erythromycin_propionate_TYQXKHPOXXXCTP-CSLYCKPJSA-N | A0A640KCC9.0.apo |

 30%|███       | 12/40 [06:42<15:46, 33.80s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+----------------------------------------------------------+------------------+---------------+
| Rank |                        Drug Name                         |   Target Name    | Binding Score |
+------+----------------------------------------------------------+------------------+---------------+
|  1   |        betamethasone_UREBDLICKHMUKA-DVTGEIKXSA-N         | A0A640KCC9.0.apo |      5.93     |
|  2   |       fluprednisolone_MYYIMZRZXIQBGI-HVIRSNARSA-N        | A0A640KCC9.0.apo |     28.55     |
|  3   |   etiprednol_dicloacetate_QAIOVDNCIZSSSF-RFAJLIJZSA-N    | A0A640KCC9.0.apo |     29.54     |
|  4   |        ethylestrenol_AOXRBFRFYPMWLR-XGXHKTLJSA-N         | A0A640KCC9.0.apo |     36.32     |
|  5   |    norethindrone_acetate_IMONTRJLAWHY

 32%|███▎      | 13/40 [07:17<15:23, 34.21s/it]

Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+-------------------------------------------------------+------------------+---------------+
| Rank |                       Drug Name                       |   Target Name    | Binding Score |
+------+-------------------------------------------------------+------------------+---------------+
|  1   |   betamethasone_valerate_SNHRLVCMMWUAJD-SUYDQAKGSA-N  | A0A640KCC9.0.apo |     15.02     |
|  2   |        griseofulvin_DDUHZTYCFQRHIY-RBHXEPJQSA-N       | A0A640KCC9.0.apo |     51.85     |
|  3   |  fluorometholone_acetate_YRFXGQHBPBMFHW-SBTZIJSASA-N  | A0A640KCC9.0.apo |     80.98     |
|  4   |         rubitecan_VHXNKPBCCMUMSW-FQEVSTJZSA-N         | A0A640KCC9.0.apo |     190.30    |
|  5   |          estriol_PROQIPRRNZUXQM-ZXXIGWHRSA-N          | A0A640KCC9.0.apo |

 35%|███▌      | 14/40 [07:48<14:30, 33.48s/it]

---------------
Predictions from model 5 with drug encoding Daylight and target encoding AAC are done...
-------------
models prediction finished...
aggregating results...
---------------
Drug Repurposing Result for A0A640KCC9.0.apo
+------+----------------------------------------------------------+------------------+---------------+
| Rank |                        Drug Name                         |   Target Name    | Binding Score |
+------+----------------------------------------------------------+------------------+---------------+
|  1   |        triamcinolone_GFNANZIMVAIWHM-OBYCQNJPSA-N         | A0A640KCC9.0.apo |      8.07     |
|  2   |    fluocinolone_acetonide_FEBLZLNTKCEFIT-VSXGLTOVSA-N    | A0A640KCC9.0.apo |      9.88     |
|  3   |          moxestrol_MTMZZIPTQITGCY-OLGWUGKESA-N           | A0A640KCC9.0.apo |     20.61     |
|  4   |    isoflupredone_acetate_ZOCUOMKMBMEYQV-GSLJADNHSA-N     | A0A640KCC9.0.apo |     37.88     |
|  5   |         oxymesterone_RXXBBHGCAXVBES-X

## Data Merging

In [31]:
rootPath = './save_folder/results_aggregation/'

In [33]:
def readFile(rootPath, filename):
    data = []
    with open(rootPath+filename) as file:
        lines = file.readlines()[3:-1]
        for line in lines:
            line = line.replace(' ', '')
            items = line.split('|')
            data.append([items[2], items[3], items[4], None])
    
    return data

def mergeFile(targetName, rootPath, original=True):
    data = []
    for filename in tqdm(os.listdir(rootPath), position=0, leave=True):
        if targetName in filename:
            data.extend(readFile(rootPath, filename))
    data = pd.DataFrame(data=data, columns=['LigandName', 'TargetName', 'BindingScore', 'InterfE'])
    data = data.sort_values(by = ['BindingScore'])
    data.to_csv("Ligand_{}.csv".format(targetName), index=False)
    return data

In [34]:
for target in tqdm(listTargets, position=0, leave=True):
    mergeFile(target, rootPath)

100%|██████████| 411/411 [00:00<00:00, 9043.29it/s]
100%|██████████| 411/411 [00:00<00:00, 9449.69it/s]
100%|██████████| 411/411 [00:00<00:00, 10538.71it/s]
100%|██████████| 411/411 [00:00<00:00, 9281.23it/s]
100%|██████████| 411/411 [00:00<00:00, 8195.08it/s]
100%|██████████| 411/411 [00:00<00:00, 9327.74it/s]
100%|██████████| 411/411 [00:00<00:00, 8467.26it/s]
100%|██████████| 411/411 [00:00<00:00, 8388.20it/s]
100%|██████████| 411/411 [00:00<00:00, 10015.97it/s]
100%|██████████| 411/411 [00:00<00:00, 11126.98it/s]
100%|██████████| 411/411 [00:00<00:00, 1805087.90it/s]
100%|██████████| 411/411 [00:00<00:00, 1773517.43it/s]
100%|██████████| 411/411 [00:00<00:00, 1396968.35it/s]
100%|██████████| 13/13 [00:00<00:00, 18.43it/s]


In [48]:
for file in os.listdir('.'):
    if file.endswith('lig.csv') or file.endswith('apo.csv'):
        data = pd.read_csv(file).sort_values(by=['BindingScore'])
        data.to_csv(file, index=False)

In [49]:
result = pd.read_csv('Ligand_D0VWU6.0.apo.csv')

In [50]:
result.head()

Unnamed: 0,LigandName,TargetName,BindingScore,InterfE
0,mometasone_furoate_WOFMFGQZHJDGCX-ZULDAHANSA-N,D0VWU6.0.apo,4.29,
1,ulobetasol_propionate_BDSYKGHYMJNPAB-LICBFIPMSA-N,D0VWU6.0.apo,7.76,
2,clobetasone_butyrate_FBRAWBYQGRLCEK-AVVSTMBFSA-N,D0VWU6.0.apo,8.15,
3,clobetasol_propionate_CBGUOGMQLZIXBE-XGQKBEPLSA-N,D0VWU6.0.apo,8.16,
4,flumetasone_WXURHACBFYSXBI-GQKYHHCASA-N,D0VWU6.0.apo,10.53,


In [39]:
result[-1:]

Unnamed: 0,LigandName,TargetName,BindingScore,InterfE
1135,glucose_WQZGKKKJIJFFOK-VFUOTHLCSA-N,D0VWU6.0.apo,1829498.66,
