# Dubai
Quantum-accurate bond inference and partial charge calculations.

## 0) Setup
This is where we prepare the tengu client, directories, and input data we'll be working with.

## 0.0) Imports

In [133]:
import os
import json
import requests
import tengu
from pathlib import Path

## 0.1) Credentials
Add a API URl and a access token to authenticate to the Tengu API.

In [134]:
# Set our token - ensure you have exported TENGU_TOKEN in your shell; or just replace the os.getenv statement with your token
TOKEN = os.getenv("TENGU_TOKEN")

# You might have a custom deployment url, by default it will use https://tengu.qdx.ai
TENGU_URL = os.getenv("TENGU_URL") or "https://tengu.qdx.ai"

# If you have environment variables set, they will be automatically used by the Tengu client, so you can
# skip this step in your future work with the Tengu client. 

# 0.2) Configuration
Let's set some global variables that define our project.

In [135]:
# Define our project information
DESCRIPTION = "tengu-py demo notebook"
TAGS = ["tengu-py", "demo", "dubai", "convert"]

## 0.3) Build your tengu client
In this step, we create a Tengu client that serves as the Python interface for the individual cheminformatics modules within the Tengu API.

In [136]:
# Get our client, for calling modules and using the tengu API.
# Note, access_token and url are optional if you have exported the env variables TENGU_TOKEN and TENGU_URL.
# Workspace sets the location where we will store our session history file and module lock file.
# By using the `build_provider_with_functions` method, we will also build helper functions calling each module.

# Delete and recreate the working directory if it exists
WORK_DIR = Path.home() / "qdx" / "cairo-tengu-py-demo"

if not WORK_DIR.exists():
    Path(WORK_DIR).mkdir(parents=True)  
    client = await tengu.build_provider_with_functions(
        access_token=TOKEN, url=TENGU_URL, workspace=WORK_DIR, batch_tags=TAGS
    )
else:
    client = await tengu.build_provider_with_functions(
        access_token=TOKEN, url=TENGU_URL, workspace=WORK_DIR, batch_tags=TAGS
    )
    await client.nuke()
    Path(WORK_DIR).mkdir(parents=True)
    client = await tengu.build_provider_with_functions(
        access_token=TOKEN, url=TENGU_URL, workspace=WORK_DIR, batch_tags=TAGS
    )



## 0.4) Download Aspirin from PubChem and convert to QDXF format
QDXF is the central molecule format of the Tengu API, so before we use the Dubai module to infer connectivity (bonds) for our molecule, we must convert our SDF file to QDXF.

In [137]:
# Convert aspirin to a QDXF file so we can use it for this demo
SMILES_STRING = "CC(=O)OC1=CC=CC=C1C(=O)O"
SDF_LINK = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{SMILES_STRING}/record/SDF?record_type=3d"

file_path = f'{WORK_DIR}/aspirin.sdf'
with open(file_path, 'wb') as f:
    f.write(requests.get(SDF_LINK).content)

# We need to specify storage > file size to ensure that we have allocated enough resources for the convert module
(ligand,) = await client.convert("SDF", Path(file_path), resources={"storage": 5000})
await ligand.get()

ligand = await ligand.get()

In [138]:
# Remove connectivity, as we will be perceiving bonds (quantum-accurately) using Dubai in the next step
EXPECTED_CONNECTIVITY = ligand[0]['topology']['connectivity']

ligand = ligand[0]
# Remove connectivity as dubai will perceive the bonds for us
del ligand['topology']['connectivity']

# Use a fragment multiplicity of 1.
ligand['topology']['fragment_multiplicities'] = [1]


## 1.0) Set Dubai module specific configuration
In this stage, we set configuration for the Dubai module, as well as saving our QDXF Aspirin to disk, as the Dubai module needs the file itself.


In [139]:
DUBAI_RESOURCES = {
    "gpus": 1,
    "storage": 1024_000,
    "walltime": 60,
}
TEMP_FILEPATH = Path(f'{WORK_DIR}/temp/aspirin.qdxf.json')

# Create the temp directory, so we can save our temporary QDXF file later.
Path(f'{WORK_DIR}/temp').mkdir(parents=True)  


## 1.1) Run Dubai
Finally, we run Dubai to perform quantum-accurate bond inference, as well the calculation of partial charges.

In [140]:
json.dump(ligand, open(TEMP_FILEPATH, 'w'))

(ligand_with_bonds,) = await client.dubai(TEMP_FILEPATH, resources=DUBAI_RESOURCES)

In [141]:
from pprint import pprint
output_ligand = (await ligand_with_bonds.get())

for expected_bond, outputed_bond in zip(EXPECTED_CONNECTIVITY, output_ligand['topology']['connectivity']):
    # Check start atoms are the same
    assert expected_bond[0] == outputed_bond[0]
    # Check ending atoms are the same
    assert expected_bond[1] == outputed_bond[1]
    # NB: we don't check the third item of the bond -- the bond type, as Dubai accurately outputs ring bonds as 
    # a specific 'RINGBOND' type, whereas SDF aspirin was interleaving single and double bonds.