In [15]:
import pandas as pd
import collections
from collections import Counter

### Input DrugBank files

In [16]:

data = pd.read_csv("process_drug_uniprot_link_update.txt", sep="\t", index_col=False)
print(data.count())
data.head()

data1 = pd.read_csv("processed_drugs_name_uniq_March2020.txt", sep="\t")
data2=data1.drop_duplicates(subset='Drugbank_ids', keep='first', inplace=False)
data2.head()

Drugbank_ids    20744
Uniprot_ids     20744
dtype: int64


Unnamed: 0,Drugbank_ids,Drugs_name,Drug_type
0,DB00001,Lepirudin,BiotechDrug
1,DB00002,Cetuximab,BiotechDrug
2,DB00004,Denileukin diftitox,BiotechDrug
3,DB00005,Etanercept,BiotechDrug
4,DB00006,Bivalirudin,SmallMoleculeDrug


### Group the Drug targets with coressponding to individual drugs

In [17]:

grouped = data.groupby("Drugbank_ids")['Uniprot_ids'].apply(lambda Uniprot_ids: '#'.join(Uniprot_ids)).to_frame('Uniprot_ids').reset_index()
grouped.head()

Unnamed: 0,Drugbank_ids,Uniprot_ids
0,DB00001,P00734
1,DB00002,P00533#O75015#P00736#P02745#P02746#P02747#P086...
2,DB00004,P01589#P14784#P31785
3,DB00005,P01375#P20333#P12314#P08637#P12318#P31994#P319...
4,DB00006,P00734


### Group the Drugs with corresponding to individual drug targets

In [19]:
grouped1 = data.groupby("Uniprot_ids")['Drugbank_ids'].apply(lambda Drugbank_ids: '#'.join(Drugbank_ids)).to_frame('Drugbank_ids').reset_index()
grouped1.tail()

Unnamed: 0,Uniprot_ids,Drugbank_ids
4838,Q9ZIV5,DB02153#DB02175#DB02649
4839,Q9ZK53,DB03247
4840,Q9ZLT0,DB08698
4841,Q9ZNA2,DB03366
4842,T1RTG8,DB11638


### Map the Drugs with the reaminining drugs on the basis of common drug targets

In [21]:
grouped_updated = pd.merge(data, grouped1, left_on='Uniprot_ids', right_on='Uniprot_ids', how='inner')
grouped_updated.tail()


Unnamed: 0,Drugbank_ids_x,Uniprot_ids,Drugbank_ids_y
20739,DB15493,Q8IXJ6,DB15493
20740,DB15495,P35232,DB15495#DB15496
20741,DB15496,P35232,DB15495#DB15496
20742,DB15570,P51679,DB15570
20743,DB15593,P11532,DB15593


### Refinment of the Drug with repurposed drugs from whole DrugBank

In [23]:
# grouped_update = grouped_updated.groupby("Drugbank_ids_x")['Drugbank_ids_y'].apply(lambda Drugbank_ids_y: '|'.join(Drugbank_ids_y)).to_frame('Repurposed_drugs_ids').reset_index()
# grouped_update.head()

grouped_update = grouped_updated.groupby("Drugbank_ids_x")['Drugbank_ids_y'].apply(lambda Drugbank_ids_y: '#'.join(Drugbank_ids_y)).to_frame('Repurposed_drugs_ids').reset_index()

print(type(grouped_update))

print(grouped_update.head())

<class 'pandas.core.frame.DataFrame'>
  Drugbank_ids_x                               Repurposed_drugs_ids
0        DB00001  DB00001#DB00006#DB00055#DB00100#DB00170#DB0027...
1        DB00002  DB00002#DB00072#DB00281#DB00317#DB00530#DB0125...
2        DB00004  DB00004#DB00041#DB00074#DB00111#DB00004#DB0004...
3        DB00005  DB00002#DB00005#DB00028#DB00054#DB00056#DB0007...
4        DB00006  DB00001#DB00006#DB00055#DB00100#DB00170#DB0027...


### Determine the frequency of each repurposed drugs (i.e. checking the number of targets)

In [24]:

rep_drug = grouped_update['Repurposed_drugs_ids']
print(type(rep_drug))

rep_split=grouped_update['Repurposed_drugs_ids'].str.split('#')
print(rep_split.head())
print(type(rep_split))
rep_split1=rep_split.tolist()




rep_a=[]
for rep in rep_split:
    ctr=collections.Counter(rep).most_common()

    rep_a.append(ctr)
    
rep_a
print(type(rep_a))

    

<class 'pandas.core.series.Series'>
0    [DB00001, DB00006, DB00055, DB00100, DB00170, ...
1    [DB00002, DB00072, DB00281, DB00317, DB00530, ...
2    [DB00004, DB00041, DB00074, DB00111, DB00004, ...
3    [DB00002, DB00005, DB00028, DB00054, DB00056, ...
4    [DB00001, DB00006, DB00055, DB00100, DB00170, ...
Name: Repurposed_drugs_ids, dtype: object
<class 'pandas.core.series.Series'>
<class 'list'>


### Refinment of all Drugbank files from our pipeline

In [25]:
FT = pd.DataFrame(rep_a)
FT.head()

result=pd.concat([grouped_update,FT], axis=1,sort=False)
result.head()


Unnamed: 0,Drugbank_ids_x,Repurposed_drugs_ids,0,1,2,3,4,5,6,7,...,617,618,619,620,621,622,623,624,625,626
0,DB00001,DB00001#DB00006#DB00055#DB00100#DB00170#DB0027...,"(DB00001, 1)","(DB00006, 1)","(DB00055, 1)","(DB00100, 1)","(DB00170, 1)","(DB00278, 1)","(DB01123, 1)","(DB01593, 1)",...,,,,,,,,,,
1,DB00002,DB00002#DB00072#DB00281#DB00317#DB00530#DB0125...,"(DB00002, 12)","(DB00072, 12)","(DB00005, 11)","(DB00054, 11)","(DB00056, 11)","(DB00074, 11)","(DB00075, 11)","(DB00078, 11)",...,,,,,,,,,,
2,DB00004,DB00004#DB00041#DB00074#DB00111#DB00004#DB0004...,"(DB00004, 3)","(DB00041, 3)","(DB00074, 2)","(DB00111, 2)","(DB05943, 1)",,,,...,,,,,,,,,,
3,DB00005,DB00002#DB00005#DB00028#DB00054#DB00056#DB0007...,"(DB00005, 14)","(DB00002, 11)","(DB00054, 11)","(DB00056, 11)","(DB00072, 11)","(DB00074, 11)","(DB00075, 11)","(DB00078, 11)",...,,,,,,,,,,
4,DB00006,DB00001#DB00006#DB00055#DB00100#DB00170#DB0027...,"(DB00001, 1)","(DB00006, 1)","(DB00055, 1)","(DB00100, 1)","(DB00170, 1)","(DB00278, 1)","(DB01123, 1)","(DB01593, 1)",...,,,,,,,,,,


### Check the repurposed drugs with experimenatlly validated individual viruses

In [26]:


input_expt = pd.read_csv("Done_6639/Lassa/updated/pos.txt", sep="\t", index_col=False)
input_expt=input_expt.drop_duplicates(subset='DrugBank_ids', keep='first', inplace=False)
input_expt=input_expt.dropna(subset=['Uniprot_id'])
input_expt1=input_expt[['DrugBank_ids','Uniprot_id']]
print(input_expt1.count())
rep_split=input_expt1['Uniprot_id'].str.split('; ')
print(rep_split.count())
rep_split.head()


DrugBank_ids    6
Uniprot_id      6
dtype: int64
6


0                                     [P12758, P00441]
1                                     [P12268, P20839]
2     [P36888, P10721, P16234, P08581, P07949, Q06609]
3    [P14416, P21728, P21918, P28223, P35462, P2191...
5    [Q13936, Q01668, O60840, Q13698, Q02641, Q0828...
Name: Uniprot_id, dtype: object

### Finalizing the results

In [27]:
data_drugs_name = pd.merge(input_expt1, data2, left_on='DrugBank_ids', right_on='Drugbank_ids', how='inner')
data_drugs_name

data_merge = pd.merge(data_drugs_name, result, left_on='DrugBank_ids', right_on='Drugbank_ids_x', how = 'inner')
data_merge




Unnamed: 0,DrugBank_ids,Uniprot_id,Drugbank_ids,Drugs_name,Drug_type,Drugbank_ids_x,Repurposed_drugs_ids,0,1,2,...,617,618,619,620,621,622,623,624,625,626
0,DB01629,P12758; P00441,DB01629,5-fluorouridine,SmallMoleculeDrug,DB01629,DB00988#DB01064#DB01593#DB01629#DB03382#DB0502...,"(DB01629, 2)","(DB00988, 1)","(DB01064, 1)",...,,,,,,,,,,
1,DB01024,P12268; P20839,DB01024,Mycophenolic acid,SmallMoleculeDrug,DB01024,DB00157#DB00688#DB00811#DB01024#DB01033#DB0307...,"(DB00157, 2)","(DB00688, 2)","(DB00811, 2)",...,,,,,,,,,,
2,DB12742,P36888; P10721; P16234; P08581; P07949; Q06609,DB12742,Amuvatinib,SmallMoleculeDrug,DB12742,DB00102#DB00619#DB01268#DB05146#DB06043#DB0658...,"(DB12742, 6)","(DB12010, 5)","(DB08901, 4)",...,,,,,,,,,,
3,DB00246,P14416; P21728; P21918; P28223; P35462; P21917...,DB00246,Ziprasidone,SmallMoleculeDrug,DB00246,DB00182#DB00211#DB00243#DB00246#DB00248#DB0026...,"(DB00408, 27)","(DB00246, 25)","(DB00543, 25)",...,,,,,,,,,,
4,DB09236,Q13936; Q01668; O60840; Q13698; Q02641; Q08289...,DB09236,Lacidipine,SmallMoleculeDrug,DB09236,DB00153#DB00243#DB00252#DB00270#DB00273#DB0030...,"(DB13746, 19)","(DB00153, 9)","(DB00252, 9)",...,,,,,,,,,,
5,DB00539,P03372; P04278,DB00539,Toremifene,SmallMoleculeDrug,DB00539,DB00179#DB00255#DB00367#DB00396#DB00421#DB0053...,"(DB00255, 2)","(DB00367, 2)","(DB00396, 2)",...,,,,,,,,,,


In [28]:
data_merge.to_csv("Done_6639/Lassa/updated/result_all.txt", index=None)