# Enviornment Setup

In [None]:
!pip install -q pubchempy

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pubchempy (setup.py) ... [?25l[?25hdone


In [None]:
import sys
sys.path.append("/usr/local/lib/python3.12/site-packages")

In [None]:
!which python
!python --version

/usr/local/bin/python
Python 3.10.12


In [None]:
for path in sys.path:
    print(path)

/content
/env/python
/usr/lib/python310.zip
/usr/lib/python3.10
/usr/lib/python3.10/lib-dynload

/usr/local/lib/python3.10/dist-packages
/usr/lib/python3/dist-packages
/usr/local/lib/python3.10/dist-packages/IPython/extensions
/root/.ipython
/usr/local/lib/python3.12/site-packages


In [None]:
import requests
from pathlib import Path
import subprocess
import pubchempy as pcp
from typing import NamedTuple
import time
from pprint import pprint



In [None]:
# @title Make Directories
!pwd
!mkdir SDF
!mkdir mol2

/content


In [None]:
# @title Installing OBABEL
!apt install -y openbabel

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libinchi1 libmaeparser1 libopenbabel7
The following NEW packages will be installed:
  libinchi1 libmaeparser1 libopenbabel7 openbabel
0 upgraded, 4 newly installed, 0 to remove and 38 not upgraded.
Need to get 3,903 kB of archives.
After this operation, 16.9 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libinchi1 amd64 1.03+dfsg-4 [455 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libmaeparser1 amd64 1.2.4-1build1 [88.2 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopenbabel7 amd64 3.1.1+dfsg-6ubuntu5 [3,231 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/universe amd64 openbabel amd64 3.1.1+dfsg-6ubuntu5 [128 kB]
Fetched 3,903 kB in 2s (2,155 kB/s)
Selecting previously unselected package libinchi1.
(Reading database ... 121752 files and dire

In [None]:
# @title Defining Function
def convert_all_to_mol2():

    mol2_path = Path.cwd()/"mol2"
    sdf_paths = Path.cwd()/"SDF"
    all_sdf_files = sdf_paths.glob("*.sdf")

    for sdf_file in all_sdf_files:

        r = subprocess.run(f'obabel -isdf "{sdf_file}" -omol2 -O "{mol2_path/sdf_file.stem}.mol2"',shell=True,capture_output=True,text=True)

def convert_all_to_pdbqt():
    Path(Path.cwd()/'pdbqt_files').mkdir(parents=True, exist_ok=True)

    mol2_path = Path.cwd()/"mol2"
    pbdqt_paths = Path.cwd()/'pdbqt_files'

    all_mol2_files = mol2_path.glob("*.mol2")

    for mol2_file in all_mol2_files:

        r = subprocess.run(f'obabel -imol2 "{mol2_file}" -opdbqt -O "{pbdqt_paths/mol2_file.stem}.pdbqt"',shell=True,capture_output=True,text=True)


def compound_name_to_sdf_url(compound_name):
    class CompoundLinks(NamedTuple):
        conformer_3d_link:str
        regular_2d_link:str
        json_link:str

    try:
      compound = pcp.get_compounds(compound_name, 'name')[0]
    except IndexError as e:
      print(f"!!!!COULD NOT FIND {compound_name} IN PUBCHEM SEARCH, FIND MANUALLY!!!!!!")
      return False



    urls = CompoundLinks(
        conformer_3d_link=f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{compound.cid}/SDF?record_type=3d',
        regular_2d_link=f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{compound.cid}/SDF',
        json_link=f'https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{compound.cid}/JSON/'
    )

    return urls


def get_parent_url(json_link):
    class ParentResponse(NamedTuple):
        parent_cid:int
        url:str
        json_data:dict

    r = requests.get(json_link)
    dic = r.json()
    parent_cid = dic["Record"]["Section"][0]["Section"][1]["Information"][0]["Value"]["Number"][0]
    url = f'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{parent_cid}/SDF?record_type=3d'
    return ParentResponse(url=url,parent_cid=parent_cid,json_data=dic)

# DOWNLOAD AND CONVERT

In [None]:
# @title Compound Input
compound_name_list = """
RP 67580
IEM 1460
SR 142948
""".strip().split("\n")

for i,compound in enumerate(compound_name_list):
  print(F"{i}.)     {compound}")

0.)     RP 67580
1.)     IEM 1460
2.)     SR 142948


In [None]:
# @title Clear SDFs and Mol2s and pdbqt_files

!rm -f SDF/*
!rm -f mol2/*
!rm -f SDF.zip
!rm -f mol2.zip
!rm -f pdbqt_files/*
!rm -f pdbqt.zip

In [None]:
# @title Run to get SDFS and MOL2s

class Request404Error(Exception):
    def __init__(self, message="Resource not found (404)"):
        # Initialize the exception with a custom message
        self.message = message
        super().__init__(self.message)

class Request500Error(Exception):
    def __init__(self, message="500 ERROR"):
        # Initialize the exception with a custom message
        self.message = message
        super().__init__(self.message)

def save_sdf(compound_name,res):
  replace_chars_map ={"/" : "or","," : ".",":" : "sc","*" : "ast","?" : "que","<" : "lt",">" : "gt","|" : "bar","~" : "tilde"}

  chars_to_replace = list(replace_chars_map.keys())

  file_name = F"{compound_name.replace(' ','_')}.sdf"
  for char in chars_to_replace:
    file_name = file_name.replace(char,replace_chars_map[char])

  sdf_path = Path.cwd()/"SDF"/ file_name

  with open(sdf_path,'wb') as f:
      f.write(res.content)
      print(F"\t\tSaved as {sdf_path}")
      print("=================")

i=0
consecutive_fail_counter = 0
while i<len(compound_name_list):
    sdf_urls = None
    json_tuple_response = None
    json_data = None
    compound_synoynms=None

    compound_name = compound_name_list[i]


    sdf_urls = compound_name_to_sdf_url(compound_name)
    if sdf_urls == False:
      # skip compound
      i+=1
      continue

    print(F"Getting {compound_name}")

    try:
      json_tuple_response = get_parent_url(sdf_urls.json_link)
      json_data = json_tuple_response.json_data

    except:
      print(F"Failed to fetch JSON data for {compound_name}")
      pass


    try:
        # Try normal 3d
        print("Trying normal 3d.")
        res = requests.get(sdf_urls.conformer_3d_link)
        if res.status_code==404:
            raise Request404Error("")
        elif res.status_code==200:
            print(F"\tFound 3D of {compound_name}")
            save_sdf(compound_name,res)
            i+=1
    except Request404Error as e:

        try:
            # try parent 3d
            print("Trying conformer parent 3d;   ",end="")

            try:

                parent_url = json_tuple_response.url
                parent_cid = json_tuple_response.parent_cid

                print(F" Parent CID: {parent_cid}")
                res = requests.get(parent_url)
                if res.status_code==404:
                    raise Request404Error("")
                elif res.status_code==200:
                    print(F"\tFound 3D Conformer Parent of {compound_name}; Parent CID: {parent_cid}")
                    save_sdf(compound_name,res)
                    i+=1

            except:


                # Failed For Some Reason so just throw an error to try 2D
                raise Request404Error("Bad JSON")

        except Request404Error as e:

            # lastly try 2d
            print("\nTrying 2d.")
            res = requests.get(sdf_urls.regular_2d_link)
            if res.status_code==404:
                print("skipping")
                i+=1
                continue
            elif res.status_code==200:
                print(F"\tFound 2D of {compound_name}")
                save_sdf(compound_name,res)
                i+=1

    if res.status_code != 200 and res.status_code != 404:
        if(consecutive_fail_counter>5):
            print(F"Trying again {consecutive_fail_counter}")
            consecutive_fail_counter+=1
        else:
            print(F"\nFailed to find {compound_name} in {consecutive_fail_counter} tries... skipping.\n")

            consecutive_fail_counter = 0
        continue



Getting RP 67580
Trying normal 3d.
	Found 3D of RP 67580
		Saved as /content/SDF/RP_67580.sdf
Getting IEM 1460
Trying normal 3d.
Trying conformer parent 3d;    Parent CID: 3248290
	Found 3D Conformer Parent of IEM 1460; Parent CID: 3248290
		Saved as /content/SDF/IEM_1460.sdf
Getting SR 142948
Trying normal 3d.
	Found 3D of SR 142948
		Saved as /content/SDF/SR_142948.sdf


In [None]:
# @title Convert All SDF Files In ./SDF Folder to Mol2 Result is in ./mol2 Folder, and all mol2 to .pbqt in ./pbqt_files
convert_all_to_mol2()
convert_all_to_pdbqt()
print("ALL FILES CONVERTED")

In [None]:
# @title Print file names with .pbqt extension
files = list(  (Path.cwd()/"SDF").glob("*.sdf"))
files = [i.stem for i in files]
for i in files:
    print(i+".pdbqt")

In [None]:
# @title Download MOL2s and pdbqts
from google.colab import files
from pathlib import Path

!zip -r mol2.zip mol2
!zip -r pdbqt.zip pdbqt_files

files.download("mol2.zip")
files.download("pdbqt.zip")

# made by https://github.com/seambr