# DiffDock: Diffusion Steps, Twists, and Turns for Molecular Docking

DiffDock is a state-of-the-art diffusion generative model designed for molecular docking, which predicts the binding structure of small molecule ligands to target proteins. This process is essential in drug design. Traditional docking methods are slow and often inaccurate due to the complexity of the search space, while deep learning-based methods have yet to achieve significant improvements in accuracy.

Unlike previous methods, DiffDock frames the task as learning a distribution over ligand poses, efficiently refining random poses by modeling translations, rotations, and torsions. This approach significantly improves prediction accuracy compared to both search-based and deep learning-based docking methods.

DiffDock achieves a 38% top-1 success rate, surpassing the state-of-the-art traditional and deep learning methods, and also performs well in handling computationally generated protein structures. It provides multiple predictions with confidence estimates, enabling more robust analysis of protein-ligand interactions.


## Sample Application

The application below facilitates the interaction with the self-hosted model.

1. Start by installing the dependencies:


In [None]:
!pip3 install gradio

2. Run the application:

In [None]:
import hashlib, json, requests, traceback
from pathlib import Path

import gradio as gr

invoke_url = "http://diffdock:8000/molecular-docking/diffdock/generate"

def get_confidence(score):
    if score > 0:
        return 'High'
    if score < -1.5:
        return 'Low'
    return 'Moderate'

def process_request(
    protein_file,
    ligand_file,
    samples_nr,
    inference_steps,
    actual_inference_steps):

    
    try:
        ligand_file_path = ligand_file.get('path')
        
        if ligand_file_path.lower().endswith('.sdf'):
            ligand_file_type = 'sdf'
        else:
            ligand_file_type = 'mol2'
        
        with open(ligand_file_path) as l:
            ligand_content = l.read()
            
        protein_file_path = protein_file.get('path')
        with open(protein_file_path) as p:
            protein_content = p.read()

        payload = {
            "ligand": ligand_content,
            "ligand_file_type": ligand_file_type,
            "protein": protein_content,
            "num_poses": samples_nr,
            "time_divisions": inference_steps,
            "steps": actual_inference_steps,
            "save_trajectory": False,
            "is_staged": False,  
        }
        
        headers = {"Content-Type": "application/json"}

        response = requests.post(invoke_url, headers=headers, json=payload)
        
        if response.status_code == 200:
            file_hash = hashlib.md5(json.dumps(response.json()).encode('utf-8')).hexdigest()
            
            temp_output_file_path = Path(ligand_file_path).parent.joinpath(f'{file_hash}.out').as_posix()
    
            with open(temp_output_file_path, 'wb') as f:
                f.write(json.dumps(response.json()).encode('utf-8'))
            
            
            ranks = [f'Rank {index+1}. Score: {score:.3f}. Confidence: {get_confidence(float(score))}' for index, score in enumerate(response.json().get('position_confidence', {}))]
            
            inference_output = response.json()
            with open(f"result.json", "w") as f:
                json.dump(inference_output, f)
            
            return temp_output_file_path, {**inference_output, 'ligand_file_type': ligand_file_type} , dict(zip(ranks, response.json().get('ligand_positions'))), gr.Dropdown(choices=ranks, value=ranks[0])
        else:
            print(f'Unexpected response code received from the NIM API: {response.status_code}. Error: {response.text}')
    except Exception:
        print(traceback.format_exc())
        
    return None, None, None, None


def update_view(output_selector_content, output_selector, full_output, default_content="Output visualisation unavailable"):
    if output_selector_content and output_selector:
        mol = output_selector_content.get(output_selector, default_content)

    structure = full_output.get('protein')

    try:
        ligand = full_output.get('ligand')
        ext = full_output.get('ligand_file_type')
        lig_str_1 = """let original_ligand = `""" + ligand + """`"""
        lig_str_2 = f"""
        viewer.addModel( original_ligand, "{ext}" );
        viewer.getModel(2).setStyle({{stick:{{colorscheme:"greenCarbon"}}}});"""
    except AttributeError as e:
        print(e)
        ligand = None
        lig_str_1 = ""
        lig_str_2 = ""

    
    js = f"""<!DOCTYPE html>
        <html>
        <head>    
    <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
    <style>
    body{{
        font-family:sans-serif
        text-align: center
    }}
    .mol-container {{
    width: 100%;
    height: 600px;
    position: relative;
    margin: 0 auto;
    }}
    .mol-container select{{
        background-image:None;
    }}
    .green{{
        width:20px;
        height:20px;
        background-color:#33ff45;
        display:inline-block;
    }}
    .magenta{{
        width:20px;
        height:20px;
        background-color:magenta;
        display:inline-block;
    }}
    </style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js" integrity="sha512-STof4xm1wgkfm7heWqFJVn58Hm3EtS31XFaagaa8VMReCXAkQnJZ+jEy8PCC/iT18dFy95WcExNHFTqLyp72eQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
    <script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
    </head>
    <body>  
     <button id="startanimation">Replay diffusion process</button> 
     <button id="togglesurface">Toggle surface representation</button> 
     <div>
     <span class="green"></span> Uploaded ligand position
     <span class="magenta"></span> Predicted ligand position
     </div>
    <div id="container" class="mol-container"></div>
  
            <script>
              let ligand = `{mol}`
              let structure = `{structure}`
              {lig_str_1}
      
              let viewer = null;
              let surface = false;
              let surf = null;
              $(document).ready(function () {{
                let element = $("#container");
                let config = {{ backgroundColor: "white" }};
                viewer = $3Dmol.createViewer(element, config);
                viewer.addModel( structure, "pdb" );
                viewer.setStyle({{}}, {{cartoon: {{color: "gray"}}}});
                viewer.zoomTo();
                viewer.zoom(0.7);
                viewer.addModelsAsFrames(ligand, "{full_output.get('ligand_file_type')}");
                viewer.animate({{loop: "forward",reps: 1}});
                
                viewer.getModel(1).setStyle({{stick:{{colorscheme:"magentaCarbon"}}}});
                
             {lig_str_2}
    
                viewer.render();
                
              }})
              $("#startanimation").click(function() {{
                viewer.animate({{loop: "forward",reps: 1}});
              }});
              $("#togglesurface").click(function() {{
               if (surface != true) {{
                    surf = viewer.addSurface($3Dmol.SurfaceType.VDW, {{ "opacity": 0.9, "color": "white" }}, {{ model: 0 }});  
                    surface = true;
                }} else {{
                    viewer.removeAllSurfaces()
                    surface = false;
                }}
              }});
        </script>
        </body></html>"""
    

    return f"""<iframe style="border:1;display:block;margin:auto;width: 100%; height: 700px" name="result" allow="midi; geolocation; microphone; camera; 
    display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
    allow-scripts allow-same-origin allow-popups 
    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
    allowpaymentrequest="" frameborder="0" srcdoc='{js}'></iframe>"""

    
with gr.Blocks(title="DiffDock Sample App") as demo:
    gr.Markdown("# DiffDock Sample App")
    
    with gr.Accordion(label="Input", open=True) as input:
        with gr.Row(equal_height=False):
            with gr.Column():
                gr.Markdown("## Protein")
                gr.Markdown("Upload PDB file below")
                protein_file = gr.File(label="Input Protein File", file_types=[".pdb"], file_count="single")
                
            with gr.Column():
                gr.Markdown("## Ligand")
                gr.Markdown("Upload mol2/sdf file below")
                ligand_file = gr.File(label="Input Ligand File", file_types=[".mol2", ".sdf"], file_count="single")

        with gr.Row(equal_height=False):
            samples_nr = gr.Slider(label="Number of samples to generate", value=20, minimum=1, maximum=100, step=1, interactive=True)
            inference_steps = gr.Slider(label="Number of denoising steps", value=20, minimum=4, maximum=20, step=1, interactive=True)
            actual_inference_steps = gr.Slider(label="Actual number of denoising steps", value=18, minimum=2, maximum=18, step=1, interactive=True)
            
    with gr.Accordion(label="Output", open=False) as output:
        output_selector = gr.Dropdown(label="Ranked samples", interactive=True)
        output_viewer = gr.HTML(label="Protein Viewer", show_label=True)
        output_file = gr.File(label="Output Files")

    with gr.Row():
        run_btn = gr.Button("Run DiffDock")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("## Examples")
            gr.Examples(
                [
                    [
                        "samples/mpro_sarscov2.pdb",
                        "samples/compound_1.sdf",
                        20,
                        20,
                        18
                    ],
                    [
                        "samples/mpro_sarscov2.pdb",
                        "samples/compound_2.sdf",
                        20,
                        20,
                        18
                    ],
                    [
                        "samples/mpro_sarscov2.pdb",
                        "samples/compound_3.sdf",
                        20,
                        20,
                        18
                    ],
                    [
                        "samples/mpro_sarscov2.pdb",
                        "samples/compound_4.sdf",
                        20,
                        20,
                        18
                    ],
                    [
                        "samples/protein.pdb",
                        "samples/ligand.sdf",
                        20,
                        20,
                        18
                    ],
                ],
                [protein_file, ligand_file, samples_nr, inference_steps, actual_inference_steps],
            )

    with gr.Row():
        gr.Markdown("Many thanks to [Simon Duerr](https://huggingface.co/simonduerr), who created the "
                    "[original DiffDock web interface](https://huggingface.co/spaces/simonduerr/diffdock), "
                    "on which this interface is based.")

    full_output = gr.State({})
    output_selector_content = gr.State({})
    run_btn.click(lambda : gr.Accordion(open=True), None, output).then(
        fn=process_request,
        inputs=[protein_file, ligand_file, samples_nr, inference_steps, actual_inference_steps], 
        outputs=[output_file, full_output, output_selector_content, output_selector], 
        preprocess=False
    )

    output_selector.change(fn=update_view,
                                inputs=[output_selector_content, output_selector, full_output],
                                outputs=output_viewer)


if __name__ == "__main__":
    demo.launch(share=True)