In [1]:
import os
from os import path

import pygtop

from biopandas.pdb import PandasPdb

import pubchempy as pcp
import time

#used on checking the files/folder name
import re
from unidecode import unidecode

### FUNCTIONS

In [87]:
def get_pubchem_id(ligand):
    dbs = ligand.database_links()
    for db in dbs:
        if db.database() == "PubChem CID":
            return db.accession()
        
def update_pdb_obsolete(id_pdb):
    ppdb = PandasPdb().fetch_pdb(id_pdb)
    idx = ppdb.df["OTHERS"]["record_name"] == "OBSLTE"
    if sum(idx) != 0: #if OBSLTE give me the current id
        id_pdb = list(ppdb.df["OTHERS"]["entry"][idx])[0].split(" ")[-1]
        ppdb = PandasPdb().fetch_pdb(id_pdb)
    return([id_pdb, ppdb])

def ismutated(pdb):
    return(any("MUTATION" in e for e in pdb.df["OTHERS"][pdb.df["OTHERS"]["record_name"] == "COMPND"]["entry"]))

### Get all receptors of the GPCR family

Class A: https://www.guidetopharmacology.org/GRAC/GPCRListForward?class=A

In [2]:
targets = pygtop.get_targets_by({"type": "GPCR"})

In [3]:
len(targets), targets

(410,
 [<Target 1 (5-HT<sub>1A</sub> receptor)>,
  <Target 2 (5-HT<sub>1B</sub> receptor)>,
  <Target 3 (5-HT<sub>1D</sub> receptor)>,
  <Target 4 (5-ht<sub>1e</sub> receptor)>,
  <Target 5 (5-HT<sub>1F</sub> receptor)>,
  <Target 6 (5-HT<sub>2A</sub> receptor)>,
  <Target 7 (5-HT<sub>2B</sub> receptor)>,
  <Target 8 (5-HT<sub>2C</sub> receptor)>,
  <Target 9 (5-HT<sub>4</sub> receptor)>,
  <Target 10 (5-HT<sub>5A</sub> receptor)>,
  <Target 648 (5-ht<sub>5b</sub> receptor)>,
  <Target 11 (5-HT<sub>6</sub> receptor)>,
  <Target 12 (5-HT<sub>7</sub> receptor)>,
  <Target 316 (ACKR1)>,
  <Target 314 (ACKR2)>,
  <Target 80 (ACKR3)>,
  <Target 315 (ACKR4)>,
  <Target 197 (ADGRA1)>,
  <Target 198 (ADGRA2)>,
  <Target 199 (ADGRA3)>,
  <Target 174 (ADGRB1)>,
  <Target 175 (ADGRB2)>,
  <Target 176 (ADGRB3)>,
  <Target 202 (ADGRD1)>,
  <Target 204 (ADGRD2)>,
  <Target 182 (ADGRE1)>,
  <Target 183 (ADGRE2)>,
  <Target 184 (ADGRE3)>,
  <Target 185 (ADGRE4P)>,
  <Target 177 (ADGRE5)>,
  <Target 19

In [5]:
targets[0].database_links()

[<ChEMBL Target link (CHEMBL214) for Human>,
 <ChEMBL Target link (CHEMBL3737) for Mouse>,
 <ChEMBL Target link (CHEMBL273) for Rat>,
 <DrugBank Target link (P08908) for Human>,
 <Ensembl Gene link (ENSG00000178394) for Human>,
 <Ensembl Gene link (ENSMUSG00000021721) for Mouse>,
 <Ensembl Gene link (ENSRNOG00000010254) for Rat>,
 <Entrez Gene link (3350) for Human>,
 <Entrez Gene link (15550) for Mouse>,
 <Entrez Gene link (24473) for Rat>,
 <GPCRDB link (5ht1a_human) for Human>,
 <GPCRDB link (5ht1a_mouse) for Mouse>,
 <GPCRDB link (5ht1a_rat) for Rat>,
 <OMIM link (109760) for Human>,
 <Protein GI link (55956923) for Human>,
 <Protein GI link (31542972) for Mouse>,
 <Protein GI link (6981054) for Rat>,
 <RefSeq Nucleotide link (NM_000524) for Human>,
 <RefSeq Nucleotide link (NM_008308) for Mouse>,
 <RefSeq Nucleotide link (NM_012585) for Rat>,
 <RefSeq Protein link (NP_000515) for Human>,
 <RefSeq Protein link (NP_032334) for Mouse>,
 <RefSeq Protein link (NP_036717) for Rat>,
 <Un

In [21]:
target_interactions = {}
for target in targets:
    interactions = target.interactions(species="human")
    if len(interactions) > 2:
        target_interactions[target.target_id()] = {"target": target,
                                                  "interactions": interactions
                                                 }

In [22]:
target_interactions

{1: {'target': <Target 1 (5-HT<sub>1A</sub> receptor)>,
  'interactions': [<Interaction (3252 --> Human 1)>,
   <Interaction (34 --> Human 1)>,
   <Interaction (104 --> Human 1)>,
   <Interaction (1 --> Human 1)>,
   <Interaction (77 --> Human 1)>,
   <Interaction (2 --> Human 1)>,
   <Interaction (3227 --> Human 1)>,
   <Interaction (78 --> Human 1)>,
   <Interaction (79 --> Human 1)>,
   <Interaction (67 --> Human 1)>,
   <Interaction (3218 --> Human 1)>,
   <Interaction (3251 --> Human 1)>,
   <Interaction (31 --> Human 1)>,
   <Interaction (3925 --> Human 1)>,
   <Interaction (61 --> Human 1)>,
   <Interaction (84 --> Human 1)>,
   <Interaction (7191 --> Human 1)>,
   <Interaction (36 --> Human 1)>,
   <Interaction (7427 --> Human 1)>,
   <Interaction (7427 --> Human 1)>,
   <Interaction (52 --> Human 1)>,
   <Interaction (53 --> Human 1)>,
   <Interaction (54 --> Human 1)>,
   <Interaction (55 --> Human 1)>,
   <Interaction (56 --> Human 1)>,
   <Interaction (57 --> Human 1)>,
   

In [111]:
for key, value in target_interactions.items():
    target = value["target"]
    print(re.sub("[ -]","_", target.name(strip_html=True)))
    print(len(value["interactions"]))

In [110]:
data_path = "DATA_old/"

if not path.exists(data_path):
    os.mkdir(data_path)

    
for i in range(1, 60):    
    try:     
        
        for key, value in dict(target_interactions).items():
            target = value["target"]

            receptor_name = unidecode(re.sub("[ -/]","_", target.name(strip_html=True)))
            receptor_path = data_path+receptor_name+"/"

            if not path.exists(receptor_path):
                os.mkdir(receptor_path)

            ligands_path = receptor_path+"Ligands/"

            if not path.exists(ligands_path):
                os.mkdir(ligands_path)

            interactions = value["interactions"]
            for interaction in list(interactions):
                ligand = interaction.ligand()

                pubchem_id = None
                if(ligand is not None):
                    pubchem_id = get_pubchem_id(ligand)

                ligand_path = None
                if(pubchem_id and pcp.get_properties(identifier=pubchem_id, properties="ConformerCount3D")[0]["ConformerCount3D"] != 0):
                    ligand_path = ligands_path + str(ligand.ligand_id()) + "_" + re.sub(" ", "_", interaction.action().lower()) + ".sdf"

                    if not os.path.isfile(ligand_path):
                        pcp.download("sdf", ligand_path, pubchem_id, record_type="3d")                
                        print(ligand_path)
                interactions.remove(interaction)
            del target_interactions[key]

    except Exception as e:
        print(e)
        print('Restarting!')
        time.sleep(60)
        continue   

('Connection aborted.', BadStatusLine('<html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><title>Page not available</title><style type="text/css"><!-- *{font:14px Arial,Helvetica,sans-serif;margin:0;padding:0;}body{background-color:#e6e6e6;}h1{font-size:30px;}h2{font-size:21px;font-weight:bold;font-family:\'lucida grande\', \'tahoma\', arial, helvetica, sans-serif;}h2,h3{color:#D03;}h3{font-size:18px;font-weight:bold;font-family:\'lucida grande\', \'tahoma\', arial, helvetica, sans-serif;}#c{background-color:#fff;padding:40px;}#f{color:#fff;background-color:#666;padding:14px;}#g{font-size:12px;}#g,#c{margin:0 auto;width:800px;}h1,h2,h3,#h,p,li{padding:0 0 14px 0}li{margin:0 0 0 20px;list-style-type:square;color:red;}li span{color:#000;} --> </style></head><body><div id="c"><div id="h"><h1 id="h1o">The University of Edinburgh</h1></div><div id="e"><h2>Page unavailable</h2><p>We\'re sorry, the page you requested is currently unavailable. We apologise for this and hope to resolve 

DATA_old/M5_receptor/Ligands/309_antagonist.sdf
DATA_old/M5_receptor/Ligands/3264_antagonist.sdf
DATA_old/M5_receptor/Ligands/318_antagonist.sdf
DATA_old/M5_receptor/Ligands/3257_positive.sdf
DATA_old/M5_receptor/Ligands/7459_antagonist.sdf
DATA_old/M5_receptor/Ligands/7549_antagonist.sdf
('Connection aborted.', BadStatusLine('<html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><title>Page not available</title><style type="text/css"><!-- *{font:14px Arial,Helvetica,sans-serif;margin:0;padding:0;}body{background-color:#e6e6e6;}h1{font-size:30px;}h2{font-size:21px;font-weight:bold;font-family:\'lucida grande\', \'tahoma\', arial, helvetica, sans-serif;}h2,h3{color:#D03;}h3{font-size:18px;font-weight:bold;font-family:\'lucida grande\', \'tahoma\', arial, helvetica, sans-serif;}#c{background-color:#fff;padding:40px;}#f{color:#fff;background-color:#666;padding:14px;}#g{font-size:12px;}#g,#c{margin:0 auto;width:800px;}h1,h2,h3,#h,p,li{padding:0 0 14px 0}li{margin:0 0 0 20px;list-style

DATA_old/m_receptor/Ligands/1638_antagonist.sdf
DATA_old/m_receptor/Ligands/1626_full_agonist.sdf
DATA_old/m_receptor/Ligands/1670_partial_agonist.sdf
DATA_old/m_receptor/Ligands/1673_full_agonist.sdf
DATA_old/m_receptor/Ligands/1629_antagonist.sdf
DATA_old/m_receptor/Ligands/1628_antagonist.sdf
DATA_old/m_receptor/Ligands/1639_antagonist.sdf
DATA_old/m_receptor/Ligands/7081_agonist.sdf
DATA_old/m_receptor/Ligands/7082_agonist.sdf
DATA_old/m_receptor/Ligands/7221_agonist.sdf
DATA_old/m_receptor/Ligands/1663_agonist.sdf
DATA_old/m_receptor/Ligands/5458_agonist.sdf
DATA_old/m_receptor/Ligands/7477_agonist.sdf
DATA_old/m_receptor/Ligands/7563_antagonist.sdf
DATA_old/m_receptor/Ligands/7591_partial_agonist.sdf
DATA_old/m_receptor/Ligands/7691_agonist.sdf
DATA_old/m_receptor/Ligands/1630_full_agonist.sdf
DATA_old/m_receptor/Ligands/7595_agonist.sdf
DATA_old/m_receptor/Ligands/8868_agonist.sdf
DATA_old/m_receptor/Ligands/8866_agonist.sdf
DATA_old/m_receptor/Ligands/8867_agonist.sdf
DATA_old/

DATA_old/OT_receptor/Ligands/2253_antagonist.sdf
DATA_old/OT_receptor/Ligands/2201_antagonist.sdf
DATA_old/OT_receptor/Ligands/8403_antagonist.sdf
DATA_old/OXE_receptor/Ligands/3416_agonist.sdf
DATA_old/OXE_receptor/Ligands/3392_full_agonist.sdf
DATA_old/OXE_receptor/Ligands/6164_full_agonist.sdf
DATA_old/OXE_receptor/Ligands/6167_full_agonist.sdf
DATA_old/OXE_receptor/Ligands/2483_full_agonist.sdf
DATA_old/OXE_receptor/Ligands/6169_full_agonist.sdf
DATA_old/OXE_receptor/Ligands/6174_antagonist.sdf
DATA_old/OXE_receptor/Ligands/3390_full_agonist.sdf
DATA_old/OXE_receptor/Ligands/6173_negative.sdf
DATA_old/OXE_receptor/Ligands/11073_antagonist.sdf
DATA_old/OXE_receptor/Ligands/6171_antagonist.sdf
DATA_old/OXE_receptor/Ligands/3391_full_agonist.sdf
DATA_old/OX1_receptor/Ligands/2886_antagonist.sdf
DATA_old/OX1_receptor/Ligands/4461_antagonist.sdf
DATA_old/OX1_receptor/Ligands/10280_antagonist.sdf
DATA_old/OX1_receptor/Ligands/9465_antagonist.sdf
DATA_old/OX1_receptor/Ligands/1706_antagon

DATA_old/P2Y6_receptor/Ligands/1712_partial_agonist.sdf
DATA_old/P2Y6_receptor/Ligands/1748_full_agonist.sdf
DATA_old/P2Y6_receptor/Ligands/1750_full_agonist.sdf
DATA_old/P2Y6_receptor/Ligands/1734_partial_agonist.sdf
DATA_old/P2Y6_receptor/Ligands/1753_antagonist.sdf
DATA_old/P2Y6_receptor/Ligands/1752_antagonist.sdf
DATA_old/P2Y6_receptor/Ligands/1749_full_agonist.sdf
DATA_old/P2Y6_receptor/Ligands/1745_full_agonist.sdf
DATA_old/P2Y6_receptor/Ligands/1747_full_agonist.sdf
DATA_old/P2Y6_receptor/Ligands/1725_antagonist.sdf
DATA_old/PAC1_receptor/Ligands/9135_antagonist.sdf
DATA_old/PAF_receptor/Ligands/10103_antagonist.sdf
DATA_old/PAF_receptor/Ligands/6078_antagonist.sdf
DATA_old/PAF_receptor/Ligands/6080_antagonist.sdf
DATA_old/PAF_receptor/Ligands/1856_antagonist.sdf
DATA_old/PAF_receptor/Ligands/1839_antagonist.sdf
DATA_old/PAF_receptor/Ligands/1851_antagonist.sdf
DATA_old/PAF_receptor/Ligands/1853_antagonist.sdf
DATA_old/PAF_receptor/Ligands/1854_antagonist.sdf
DATA_old/PAF_recep

DATA_old/TP_receptor/Ligands/6073_antagonist.sdf
DATA_old/TP_receptor/Ligands/6071_full_agonist.sdf
DATA_old/TP_receptor/Ligands/6072_antagonist.sdf
DATA_old/TP_receptor/Ligands/3332_antagonist.sdf
DATA_old/TP_receptor/Ligands/1973_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1982_antagonist.sdf
DATA_old/TP_receptor/Ligands/1985_antagonist.sdf
DATA_old/TP_receptor/Ligands/1911_antagonist.sdf
DATA_old/TP_receptor/Ligands/1981_antagonist.sdf
DATA_old/TP_receptor/Ligands/1978_antagonist.sdf
DATA_old/TP_receptor/Ligands/1939_agonist.sdf
DATA_old/TP_receptor/Ligands/6069_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1970_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1971_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1913_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1883_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1884_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1972_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1974_full_agonist.sdf
DATA_old/TP_receptor/Ligands/1892_full_agonist.sdf
D

### Delete elements on the dict that doesn't have a pdb

In [75]:
print("Receptors without pdb, or to be checked manually")
for key, value in dict(target_interactions).items():
    target = value["target"]
    pdbs = target.gtop_pdbs()
    if pdbs:
        target_interactions[key]["pdbs"] = pdbs
    else:
        print(re.sub("[ -]","_", target.name(strip_html=True)))
        del target_interactions[key]

5_HT1B_receptor
5_HT1D_receptor
5_HT2A_receptor
5_HT2B_receptor
5_HT2C_receptor
5_HT6_receptor
5_HT7_receptor
A1_receptor
A2A_receptor
A2B_receptor
A3_receptor
δ_receptor
D2_receptor
D3_receptor
EP4_receptor
GAL1_receptor
GAL2_receptor
GnRH1_receptor
H1_receptor
H3_receptor
κ_receptor
mGlu5_receptor
M1_receptor
M2_receptor
M3_receptor
M4_receptor
M5_receptor
μ_receptor
SST2_receptor
SST3_receptor
VPAC1_receptor
VPAC2_receptor
V1A_receptor
V2_receptor


### Download all pdb on the receptor folder inside the data folder

In [91]:
data_path = "DATA/"

if not path.exists(data_path):
    os.mkdir(data_path)

for key, value in target_interactions.items():
    target = value["target"]
    
    receptor_name = unidecode(re.sub("[ -]","_", target.name(strip_html=True)))    
    receptor_path = data_path+receptor_name+"/"
    
    if not path.exists(receptor_path):
        os.mkdir(receptor_path)
    
    for pdb_id in value["pdbs"]:
        pdb_id, ppdb = update_pdb_obsolete(pdb_id)
        
        pdb_path = receptor_path+pdb_id+".pdb"
        if not os.path.isfile(pdb_path):
            ppdb.to_pdb(path=pdb_path, 
                records=None, 
                gz=False, 
                append_newline=True)  
            print(pdb_path)

### Download all ligands on the ligands folder of the receptor folder

In [None]:
data_path = "DATA/"

if not path.exists(data_path):
    os.mkdir(data_path)

for key, value in target_interactions.items():
    target = value["target"]
    
    receptor_name = unidecode(re.sub("[ -]","_", target.name(strip_html=True)))    
    receptor_path = data_path+receptor_name+"/"

    if not path.exists(receptor_path):
        os.mkdir(receptor_path)

    ligands_path = receptor_path+"Ligands/"
    
    if not path.exists(ligands_path):
        os.mkdir(ligands_path)
    
    for interaction in value["interactions"]:
        ligand = interaction.ligand()
        pubchem_id = get_pubchem_id(ligand)
        
        if(pubchem_id and pcp.get_properties(identifier=pubchem_id, properties="ConformerCount3D")[0]["ConformerCount3D"] != 0):
            ligand_path = ligands_path + str(ligand.ligand_id()) + "_" + re.sub(" ", "_", interaction.action().lower()) + ".sdf"
            
            if not os.path.isfile(ligand_path):
                pcp.download("sdf", ligand_path, pubchem_id, record_type="3d")                
                print(ligand_path)

In [122]:
target_interactions[372]["interactions"]

[<Interaction (2264 --> Human 372)>,
 <Interaction (2261 --> Human 372)>,
 <Interaction (2277 --> Human 372)>,
 <Interaction (1155 --> Human 372)>,
 <Interaction (1152 --> Human 372)>,
 <Interaction (2278 --> Human 372)>,
 <Interaction (2272 --> Human 372)>,
 <Interaction (2278 --> Human 372)>,
 <Interaction (2268 --> Human 372)>,
 <Interaction (2266 --> Human 372)>,
 <Interaction (2278 --> Human 372)>,
 <Interaction (2278 --> Human 372)>,
 <Interaction (2273 --> Human 372)>,
 <Interaction (2275 --> Human 372)>,
 <Interaction (1155 --> Human 372)>,
 <Interaction (2280 --> Human 372)>,
 <Interaction (2257 --> Human 372)>,
 <Interaction (2258 --> Human 372)>,
 <Interaction (1152 --> Human 372)>,
 <Interaction (2257 --> Human 372)>,
 <Interaction (2258 --> Human 372)>,
 <Interaction (2257 --> Human 372)>,
 <Interaction (1152 --> Human 372)>,
 <Interaction (1152 --> Human 372)>,
 <Interaction (1152 --> Human 372)>,
 <Interaction (1152 --> Human 372)>,
 <Interaction (1152 --> Human 372)>,
 

### Download all ligands on the ligands folder of the receptor folder

In [None]:
path_ligands = data_path+"ligands/"+id_pdb+"/"

if not path.exists(path_ligands):    
    os.mkdir(path_ligands)
    
for i in range(0, len(list_dict)):      
    if(pcp.get_properties(identifier=list_dict[i]["pubchem_id"], properties="ConformerCount3D")[0]["ConformerCount3D"] != 0):
        
        print(str(i)+" downloading...")
        
        pcp.download("sdf", 
                     path_ligands + str(list_dict[i]["id"]) + "_" + list_dict[i]["action"].lower() + ".sdf", 
                     list_dict[i]["pubchem_id"], record_type="3d", overwrite=True)

### list of ligand with pubchem ids

In [8]:
list_dict = []
for interaction in interactions:
    time.sleep(1)
    pubchem_id = get_pubchem_id(interaction.ligand())
    if pubchem_id:
        list_dict.append({
            "id": interaction.ligand_id(),
            "pubchem_id": pubchem_id,
            "type": interaction.interaction_type(),
            "action": interaction.action()
        })

In [32]:
list_dict

[{'id': 86,
  'pubchem_id': '3559',
  'type': 'Antagonist',
  'action': 'Antagonist'},
 {'id': 948,
  'pubchem_id': '5281881',
  'type': 'Antagonist',
  'action': 'Antagonist'},
 {'id': 963,
  'pubchem_id': '2159',
  'type': 'Antagonist',
  'action': 'Antagonist'},
 {'id': 960,
  'pubchem_id': '643497',
  'type': 'Antagonist',
  'action': 'Antagonist'},
 {'id': 37,
  'pubchem_id': '54746',
  'type': 'Agonist',
  'action': 'Partial agonist'},
 {'id': 35,
  'pubchem_id': '31101',
  'type': 'Agonist',
  'action': 'Full agonist'},
 {'id': 953,
  'pubchem_id': '119570',
  'type': 'Agonist',
  'action': 'Full agonist'},
 {'id': 968,
  'pubchem_id': '5281878',
  'type': 'Antagonist',
  'action': 'Antagonist'},
 {'id': 38,
  'pubchem_id': '2818',
  'type': 'Antagonist',
  'action': 'Antagonist'},
 {'id': 940, 'pubchem_id': '681', 'type': 'Agonist', 'action': 'Full agonist'},
 {'id': 34,
  'pubchem_id': '60795',
  'type': 'Agonist',
  'action': 'Partial agonist'},
 {'id': 50,
  'pubchem_id': '5

### download sdf of ligands

In [33]:
path_ligands = "ligands/"+id_pdb+"/"
path_ligands

'ligands/6CM4/'

In [34]:
if not path.exists(path_ligands):    
    os.mkdir(path_ligands)
    
for i in range(0, len(list_dict)):      
    if(pcp.get_properties(identifier=list_dict[i]["pubchem_id"], properties="ConformerCount3D")[0]["ConformerCount3D"] != 0):
        
        print(str(i)+" downloading...")
        
        pcp.download("sdf", 
                     path_ligands + str(list_dict[i]["id"]) + "_" + list_dict[i]["action"].lower() + ".sdf", 
                     list_dict[i]["pubchem_id"], record_type="3d", overwrite=True)

0 downloading...
1 downloading...
2 downloading...
3 downloading...
4 downloading...
5 downloading...
6 downloading...
7 downloading...
8 downloading...
9 downloading...
10 downloading...
11 downloading...
12 downloading...
13 downloading...
14 downloading...
15 downloading...
16 downloading...
17 downloading...
18 downloading...
19 downloading...
20 downloading...
21 downloading...
22 downloading...
23 downloading...
24 downloading...
25 downloading...
26 downloading...
27 downloading...
28 downloading...
29 downloading...
30 downloading...
31 downloading...
32 downloading...
33 downloading...
34 downloading...
35 downloading...
36 downloading...
37 downloading...
38 downloading...
39 downloading...
40 downloading...
41 downloading...
42 downloading...
43 downloading...
44 downloading...
45 downloading...
46 downloading...
47 downloading...
48 downloading...
49 downloading...
50 downloading...
51 downloading...
52 downloading...
53 downloading...
54 downloading...
55 downloading...
56

In [None]:
pcp.__dict__