## Usage:
1. find ligands of complex with entry_id(pdb_id)
2. get smiles and sdf files of ligands

#### Based on data API of RCSB web services (https://www.rcsb.org/docs/programmatic-access/web-services-overview)

#### reference: williamgilpin/pypdb

In [None]:
import json
import requests
import warnings
from typing import Optional
import os
import time

In [17]:
def request_limited(url: str,
                    rtype: str = "GET",
                    num_attempts: int = 3,
                    sleep_time=0.5,
                    **kwargs) -> Optional[requests.models.Response]:
    """
    HTML request with rate-limiting base on response code

    
    Parameters
    ----------
    url : str
        The url for the request
    rtype : str
        The request type (oneof ["GET", "POST"])
    num_attempts : int
        In case of a failed retrieval, the number of attempts to try again
    sleep_time : int
        The amount of time to wait between requests, in case of
        API rate limits
    **kwargs : dict
        The keyword arguments to pass to the request

    Returns
    -------

    response : requests.models.Response
        The server response object. Only returned if request was successful,
        otherwise returns None.

    """

    if rtype not in ["GET", "POST"]:
        warnings.warn("Request type not recognized")
        return None

    total_attempts = 0
    while (total_attempts <= num_attempts):
        if rtype == "GET":
            response = requests.get(url, **kwargs)
        elif rtype == "POST":
            response = requests.post(url, **kwargs)

        if response.status_code == 200:
            return response

        if response.status_code == 429:
            curr_sleep = (1 + total_attempts) * sleep_time
            warnings.warn("Too many requests, waiting " + str(curr_sleep) +
                          " s")
            time.sleep(curr_sleep)
        elif 500 <= response.status_code < 600:
            warnings.warn("Server error encountered. Retrying")
        total_attempts += 1

    warnings.warn("Too many failures on requests. Exiting...")
    return None



In [None]:
def get_nonpolymer_bound_components(entry_id='7rfs', 
                                    url_root='https://data.rcsb.org/rest/v1/core/entry/'):
    
    url = url_root + entry_id
    response = request_limited(url)
    if response is None or response.status_code != 200:
        warnings.warn("Retrieval failed, returning None")
        return None
    result = str(response.text)
    all_info = json.loads(result)
    try:
        ligands = all_info['rcsb_entry_info']['nonpolymer_bound_components']
        return ligands
    except:
        warnings.warn("No ligands found, returning None")
        return None



def get_comp_smiles(comp_id='4WI', 
                    url_root='https://data.rcsb.org/rest/v1/core/chemcomp/', 
                    stero=True):
    
    url = url_root + comp_id
    response = request_limited(url)
    if response is None or response.status_code != 200:
        warnings.warn("Retrieval failed, returning None")
        return None
    result = str(response.text)
    comp_info = json.loads(result)
    try:
        if stero:
            smiles = comp_info['rcsb_chem_comp_descriptor']['smilesstereo']
        else:
            smiles = comp_info['rcsb_chem_comp_descriptor']['smiles']
        return smiles
    except:
        warnings.warn("No smiles found, returning None")
        return None


def get_ligands_smiles(entry_id='7rfs', 
                       stero=True):
    
    ligands = get_nonpolymer_bound_components(entry_id=entry_id)
    ligands2smiles = {}
    for ligand in ligands:
        if ligand in ligands2smiles:
            continue
        ligands2smiles[ligand] = get_comp_smiles(comp_id=ligand, stero=stero)
    return ligands2smiles

In [12]:
data = get_ligands_smiles(entry_id='7rfs', stero=True)
print(json.dumps(data, indent=2))

{
  "4WI": "[H]/N=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@@H]2[C@@H]3[C@@H](C3(C)C)CN2C(=O)[C@H](C(C)(C)C)NC(=O)C(F)(F)F"
}


In [15]:
def download_comp_sdf_file(comp_id='4WI', 
                           ideal=True, 
                           url_root='https://files.rcsb.org/ligands/download/', 
                           save_root='./'):
    
    if ideal:
        file_name = comp_id + '_ideal.sdf'
    else:
        file_name = comp_id + '_model.sdf'
    url = url_root + file_name
    response = request_limited(url)
    if response is None or response.status_code != 200:
        warnings.warn("Retrieval failed, returning None")
        return None
    sdf_text = str(response.text)
    save_path = os.path.join(save_root, file_name)
    with open(save_path, 'w') as f:
        f.write(sdf_text)
    return save_path

In [18]:
download_comp_sdf_file(comp_id='4WI', ideal=True, save_root='./')

'./4WI_ideal.sdf'