# Alphafold2

## Introduction

[AlphaFold](https://github.com/google-deepmind/alphafold) is a deep learning model developed by Google's AI company, DeepMind, to predict protein structures. It uses neural networks trained on known protein structures to estimate the 3D shape of proteins based on their amino acid sequences. AlphaFold made a significant breakthrough in 2020 by achieving unprecedented accuracy in the Critical Assessment of protein Structure Prediction (CASP) competition, effectively solving the long-standing protein folding problem. The model's ability to rapidly and accurately predict protein structures has wide-ranging implications for biological research, drug discovery, and understanding diseases at the molecular level.

AlphaFold2's initial training focused on individual protein chains, making it highly proficient in predicting their structures. Subsequently, a variant called [AlphaFold-Multimer](https://doi.org/10.1101/2021.10.04.463034) was developed to address protein-protein complexes. This version can predict structures of both homo-multimers (complexes of identical protein chains) and hetero-multimers (complexes of different protein chains).

Importantly, AlphaFold2 doesn't merely reproduce known protein structures. Independent studies have demonstrated its ability to predict previously unseen protein folds, expanding our understanding of protein structural diversity. This capability has been verified by multiple research groups, highlighting AlphaFold2's potential to uncover novel protein architectures not yet observed in the Protein Data Bank (PDB).

## Demo

Below is a sample application, developed using [Gradio](https://www.gradio.app/), to facilitate the interaction with the [NVIDIA's Alphafold2 NIM](https://catalog.ngc.nvidia.com/orgs/nim/teams/deepmind/containers/alphafold2) container deployed to [OKE](https://www.oracle.com/cloud/cloud-native/kubernetes-engine/).

### Prerequisites

We need to install `gradio` Python package:


In [None]:
!pip install gradio

In [None]:
import hashlib, json, requests, traceback
from pathlib import Path

import gradio as gr

invoke_host = "http://alphafold2:8000"
invoke_paths = {
    "Predict MSA from sequence": "/protein-structure/alphafold2/predict-msa-from-sequence",
    "Predict structure from MSA": "/protein-structure/alphafold2/predict-structure-from-msa",
    "Predict structure from sequence": "/protein-structure/alphafold2/predict-structure-from-sequence",
    # "multimer_predict_msa_from_sequence": "/protein-structure/alphafold2/multimer/predict-msa-from-sequence",
    # "multimer_predict_structure_from_msa": "/protein-structure/alphafold2/multimer/predict-structure-from-msa",
    # "multimer_predict_structure_from_sequences": "/protein-structure/alphafold2/multimer/predict-structure-from-sequences"
}

molstar_css = "https://cdn.jsdelivr.net/npm/molstar@4.7.1/build/viewer/molstar.min.css"
molstar_js = "https://cdn.jsdelivr.net/npm/molstar@4.7.1/build/viewer/molstar.min.js"

def update_view(output_selector, full_output):

    try:
        if output_selector and "Structure" in output_selector:
            index = int(output_selector.split()[-1])
        else:
            return None
            
        structure = full_output[index-1]

        js = f"""
<head>
    <link rel="stylesheet" type="text/css" href="{molstar_css}" />
</head>
<body>
    <div id="app" style="position: relative;margin: 0 auto; height: 650px; width: 100%"></div>
    <script type="text/javascript" src="{molstar_js}"></script>
    <script type="text/javascript">
    const pdbString = `
{structure}
`;        
        molstar.Viewer.create("app", {{
            layoutIsExpanded: false,
            layoutShowControls: false,
            layoutShowRemoteState: false,
            layoutShowSequence: true,
            layoutShowLog: false,
            layoutShowLeftPanel: false,
            viewportShowExpand: false,
            viewportShowSelectionMode: false,
            viewportShowAnimation: false,
        }}).then(viewer => {{
            viewer.loadStructureFromData(pdbString, "pdb", false);
        }});
    </script>
</body>
"""

        return f"""<iframe style="border:1;display:block;margin:auto;width: 100%; height: 700px" srcdoc='{js}'></iframe>"""
    except Exception as e:
        raise gr.Error("Can't render the structure!")
        print(traceback.format_exc())
    
    return ""

def predict_msa_from_sequence(
    sequence,
    algorithm,
    bit_score,
    databases,
    e_value,
    iterations):

    try:
        payload = {
            "sequence": sequence,
            "algorithm": algorithm,
            "bit_score": bit_score,
            "databases": databases,
            "e_value": e_value,
            "iterations": iterations
        }
        
        headers = {"Content-Type": "application/json"}

        response = requests.post(f"{invoke_host}{invoke_paths['Predict MSA from sequence']}", headers=headers, json=payload)
        
        if response.status_code == 200:
            file_hash = hashlib.md5(json.dumps(response.json()).encode('utf-8')).hexdigest()
            
            alignments_file = Path(gr.utils.tempfile.gettempdir()).joinpath(f'{file_hash}-alignments.out').as_posix()
            templates_file = Path(gr.utils.tempfile.gettempdir()).joinpath(f'{file_hash}-templates.out').as_posix()
    
            with open(alignments_file, 'wb') as f:
                f.write(json.dumps(response.json().get('alignments')).encode('utf-8'))

            with open(templates_file, 'wb') as f:
                f.write(json.dumps(response.json().get('templates')).encode('utf-8'))
                
            return alignments_file, templates_file
        elif response.status_code == 422:
            raise gr.Warning("Invalid input!")
        else:
            raise gr.Error("Could not process the request!")
   
    except Exception:
        print(traceback.format_exc())  
        raise
        
    return None, None

def predict_structure_from_msa(
    sequence,
    alignments,
    templates,
    relax_prediction):

    try:
        with open(alignments.get('path')) as alignments_file:
            alignments_content = alignments_file.read()

        with open(templates.get('path')) as templates_file:
            templates_content = templates_file.read()
            
        payload = {
            "sequence": sequence,
            "alignments": json.loads(alignments_content),
            "templates": json.loads(templates_content),
            "relax_prediction": relax_prediction
        }

        headers = {"Content-Type": "application/json"}

        response = requests.post(f"{invoke_host}{invoke_paths['Predict structure from MSA']}", headers=headers, json=payload)
        
        if response.status_code == 200:
            file_hash = hashlib.md5(json.dumps(response.json()).encode('utf-8')).hexdigest()
            
            structure_file = Path(gr.utils.tempfile.gettempdir()).joinpath(f'{file_hash}-structure.out').as_posix()
    
            with open(structure_file, 'wb') as f:
                f.write(json.dumps(response.json()).encode('utf-8'))
            
            returned_structures = [f'Structure {index+1}' for index, structure in enumerate(response.json())]
            inference_output = response.json()
            
            return gr.Dropdown(choices=returned_structures, value=returned_structures[0]), inference_output, structure_file
        
        elif response.status_code == 422:
            raise gr.Warning("Invalid input!")
        
        else:
            raise gr.Error("Could not process the request!")
        
    except Exception:
        print(traceback.format_exc())
        raise
        
        
    return None, None, None


def predict_structure_from_sequence(
    sequence,
    algorithm,
    bit_score,
    databases,
    e_value,
    iterations,
    relax_prediction):

    try:
        payload = {
            "sequence": sequence,
            "algorithm": algorithm,
            "bit_score": bit_score,
            "databases": databases,
            "e_value": e_value,
            "iterations": iterations,
            "relax_prediction": relax_prediction
        }

        headers = {"Content-Type": "application/json"}

        response = requests.post(f"{invoke_host}{invoke_paths['Predict structure from sequence']}", headers=headers, json=payload)
        
        if response.status_code == 200:
            file_hash = hashlib.md5(json.dumps(response.json()).encode('utf-8')).hexdigest()
            
            structure_file = Path(gr.utils.tempfile.gettempdir()).joinpath(f'{file_hash}-structure.out').as_posix()
    
            with open(structure_file, 'wb') as f:
                f.write(json.dumps(response.json()).encode('utf-8'))
                
            returned_structures = [f'Structure {index+1}' for index, structure in enumerate(response.json())]
            inference_output = response.json()

            return gr.Dropdown(choices=returned_structures, value=returned_structures[0]), inference_output, structure_file
            
        elif response.status_code == 422:
            raise gr.Warning("Invalid input!")
        else:
            raise gr.Error("Could not process the request!")
   
    except Exception:
        print(traceback.format_exc())  
        raise
        
    return None, None, None

with gr.Blocks(title="AlphaFold Sample App", delete_cache=(172800, 172800)) as demo:
    gr.Markdown("# AlphaFold Sample App")

    with gr.Tab("Predict MSA from sequence", id="a"):
        gr.Markdown("## Perform a Multiple Sequence Alignment (MSA) and return the MSA and templates for alphafold inference.")

        t1_sequence = gr.Textbox(
            label="Input amino acid sequence."
        )

        t1_databases = gr.Dropdown(
            choices=["uniref90", "mgnify", "small_bfd"],
            value=["uniref90", "mgnify", "small_bfd"],
            multiselect=True,
            interactive=True,
            label="Select the databases to be used for Multiple Sequence Alignment prediction",
            info="Passing all three will provide the most accurate structural prediction at the cost of requiring the longest runtime."
        )

        t1_algorithm = gr.Dropdown(
            choices=["jackhmmer"],
            value="jackhmmer",
            interactive=False,
            label="The algorithm used for Multiple Sequence Alignment"
        )

        t1_e_value = gr.Number(
            value=0.0001,
            label="The sequence e-value for filtering sequences in the MSA",
            info="Value should be between 0 and 1."
        )

        t1_bit_score = gr.Number(
            value=0,
            label="The sequence bit-score to use for filtering before MSA",
            info="If passed, this is used in place of e-value for filtering"
        )

        t1_iterations = gr.Number(
            value=1,
            label="The number of MSA iterations to perform"
        )

        gr.Examples(
            [
                [
                    "MAKTIKITQTRSAIGRLPKHKATLLGLGLRRIGHTVEREDTPAIRGMINAVSFMVKVEE",
                    ["uniref90", "mgnify", "small_bfd"],
                    "jackhmmer",
                    0.0001,
                    0,
                    1
                ],
            ],
            [t1_sequence, t1_databases, t1_algorithm, t1_e_value, t1_bit_score, t1_iterations],
        )

        t1_run_button = gr.Button(
            value="Run inference")
        
        with gr.Accordion(visible=False) as t1_out_block:
            t1_alignments_file = gr.File(label="Alignmnets Output File.")
            t1_templates_file = gr.File(label="Templates Output File.")
        
        t1_run_button.click(lambda : gr.Accordion(visible=True), None, t1_out_block).then(
            fn=predict_msa_from_sequence,
            inputs=[t1_sequence, t1_algorithm, t1_bit_score, t1_databases, t1_e_value, t1_iterations], 
            outputs=[t1_alignments_file, t1_templates_file], 
            preprocess=False
        )
                    
    with gr.Tab("Predict structure from MSA", id="b"):
        gr.Markdown("## Perform structural prediction from an input MSA and templates.")

        t2_sequence = gr.Textbox(
            label="Input amino acid sequence."
        )

        t2_alignments = gr.File(
            label="The MSA results from predict-msa-from-sequence",
        )

        t2_templates = gr.File(
            label="Templates from the structural database search",
        )

        t2_relax_prediction = gr.Checkbox(
            label="Set to True to run structural relaxation after predictio",
            value=True
        )
        
        gr.Examples(
            [
                [
                    "MAKTIKITQTRSAIGRLPKHKATLLGLGLRRIGHTVEREDTPAIRGMINAVSFMVKVEE",
                    "examples/alignments.json",
                    "examples/templates.json",
                    True
                ],
            ],
            [t2_sequence, t2_alignments, t2_templates, t2_relax_prediction],
        )

        t2_run_button = gr.Button(
            value="Run inference")

        t2_full_output = gr.State({})
        
        with gr.Accordion(visible=False) as t2_out_block:
            t2_output_selector = gr.Dropdown(label="Returned Structures", interactive=True)
            t2_output_viewer = gr.HTML(label="Protein Structure Viewer", show_label=True)
            t2_output_file = gr.File(label="Inference result.")
        
        t2_run_button.click(lambda : gr.Accordion(visible=True), None, t2_out_block).then(
            fn=predict_structure_from_msa,
            inputs=[t2_sequence, t2_alignments, t2_templates, t2_relax_prediction], 
            outputs=[t2_output_selector, t2_full_output, t2_output_file], 
            preprocess=False
        )

        t2_output_selector.change(fn=update_view,
                inputs=[t2_output_selector, t2_full_output],
                outputs=t2_output_viewer)
         
    with gr.Tab("Predict structure from sequence", id="c"):
        gr.Markdown("## Predict a protein structure given an input amino acid sequence.")

        
        t3_sequence = gr.Textbox(
            label="Input amino acid sequence."
        )

        t3_databases = gr.Dropdown(
            choices=["uniref90", "mgnify", "small_bfd"],
            value=["uniref90", "mgnify", "small_bfd"],
            multiselect=True,
            interactive=True,
            label="Select the databases to be used for Multiple Sequence Alignment prediction",
            info="Passing all three will provide the most accurate structural prediction at the cost of requiring the longest runtime."
        )

        t3_algorithm = gr.Dropdown(
            choices=["jackhmmer"],
            value="jackhmmer",
            interactive=False,
            label="The algorithm used for Multiple Sequence Alignment"
        )

        t3_e_value = gr.Number(
            value=0.0001,
            label="The sequence e-value for filtering sequences in the MSA",
            info="Value should be between 0 and 1."
        )

        t3_bit_score = gr.Number(
            value=0,
            label="The sequence bit-score to use for filtering before MSA",
            info="If passed, this is used in place of e-value for filtering"
        )

        t3_iterations = gr.Number(
            value=1,
            label="The number of MSA iterations to perform"
        )            

        t3_relax_prediction = gr.Checkbox(
            label="Relax prediction.",
            info="Set to True to run structural relaxation after prediction",
            value=True
        )

        gr.Examples(
            [
                [
                    "MAKTIKITQTRSAIGRLPKHKATLLGLGLRRIGHTVEREDTPAIRGMINAVSFMVKVEE",
                    ["uniref90", "mgnify", "small_bfd"],
                    "jackhmmer",
                    0.0001,
                    0,
                    1,
                    True
                ],
            ],
            [t3_sequence, t3_databases, t3_algorithm, t3_e_value, t3_bit_score, t3_iterations, t3_relax_prediction],
        )
        
        t3_run_button = gr.Button(
            value="Run inference")

        t3_full_output = gr.State({})
        
        with gr.Accordion(visible=False) as t3_out_block:
            t3_output_selector = gr.Dropdown(label="Returned Structures", interactive=True)
            t3_output_viewer = gr.HTML(label="Protein Structure Viewer", show_label=True)
            t3_output_file = gr.File(label="Inference result.")
        
        t3_run_button.click(lambda : gr.Accordion(visible=True), None, t3_out_block).then(
            fn=predict_structure_from_sequence,
            inputs=[t3_sequence, t3_algorithm, t3_bit_score, t3_databases, t3_e_value, t3_iterations, t3_relax_prediction], 
            outputs=[t3_output_selector, t3_full_output, t3_output_file], 
            preprocess=False
        )

        t3_output_selector.change(fn=update_view,
                inputs=[t3_output_selector, t3_full_output],
                outputs=t3_output_viewer)

if __name__ == "__main__":
    demo.launch(share=True)