<a href="https://colab.research.google.com/github/justinpezulu3/Interactive-PiFold-Inverse-Folding-Pipeline/blob/main/PiFoldTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Clone the PiFold repository
!git clone https://github.com/A4Bio/PiFold.git
%cd PiFold

# Install core dependencies (ensure PyTorch matches Colab's current CUDA version)
!pip install torch torchvision torchaudio
!pip install torch-geometric torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.1.0+cu121.html
!pip install biopython gradio

Cloning into 'PiFold'...
remote: Enumerating objects: 178, done.[K
remote: Counting objects: 100% (2/2), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 178 (delta 1), reused 0 (delta 0), pack-reused 176 (from 1)[K
Receiving objects: 100% (178/178), 2.80 MiB | 20.61 MiB/s, done.
Resolving deltas: 100% (91/91), done.
/content/PiFold
Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html
Collecting torch-geometric
  Downloading torch_geometric-2.7.0-py3-none-any.whl.metadata (63 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m63.7/63.7 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-scatter
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m108.0/108.0 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00

In [None]:
import gradio as gr
import subprocess
import os
from Bio.PDB import PDBParser
from Bio.SeqUtils import seq1

# --- Helper Functions ---

def extract_native_sequence(pdb_path):
    """Extracts the native sequence from the uploaded PDB for metric comparison."""
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_path)
    seq = ""
    for model in structure:
        for chain in model:
            for residue in chain:
                # Only process standard amino acids
                if residue.id[0] == " ":
                    try:
                        seq += seq1(residue.resname)
                    except Exception:
                        seq += "X"
    return seq

def calculate_recovery(native_seq, designed_seq):
    """Calculates Native Sequence Recovery (NSR)."""
    if len(native_seq) != len(designed_seq) or len(native_seq) == 0:
        return "N/A (Length mismatch)"
    matches = sum(1 for n, d in zip(native_seq, designed_seq) if n == d)
    return f"{(matches / len(native_seq)) * 100:.2f}%"

# --- Main Pipeline ---

def run_pifold_pipeline(pdb_file):
    if pdb_file is None:
        return "Please upload a valid PDB file.", "N/A"

    pdb_path = pdb_file.name
    native_seq = extract_native_sequence(pdb_path)

    # ---------------------------------------------------------
    # ‚öôÔ∏è PIFOLD INFERENCE INTEGRATION POINT
    # Once you download the PiFold checkpoint (e.g., cath_4.2.pt),
    # uncomment the subprocess call below to run the actual model.
    #
    # command = f"python main.py --mode predict --pdb {pdb_path} --checkpoint ./checkpoints/cath_4.2.pt --out output.fasta"
    # subprocess.run(command.split(), check=True)
    #
    # with open("output.fasta", "r") as f:
    #     designed_seq = f.readlines()[1].strip() # Assuming standard FASTA output
    # ---------------------------------------------------------

    # Placeholder for demonstration until the model checkpoint is linked
    designed_seq = native_seq

    # Calculate metrics
    recovery = calculate_recovery(native_seq, designed_seq)
    metrics = (
        f"Sequence Length: {len(designed_seq)} AA\n"
        f"Native Sequence Recovery: {recovery}\n"
        f"Model: PiFold (One-shot PiGNN)"
    )

    return designed_seq, metrics

# --- Minimalist Gradio UI ---

with gr.Blocks(theme=gr.themes.Base()) as pifold_app:
    gr.Markdown("# üß¨ PiFold Inverse Folding Pipeline")
    gr.Markdown("Upload a protein backbone (`.pdb`). PiFold will generate an amino acid sequence designed to fold into this exact structure.")

    with gr.Row():
        with gr.Column(scale=1):
            pdb_input = gr.File(label="Upload Target PDB File", file_types=[".pdb"])
            design_btn = gr.Button("Generate Sequence", variant="primary")

        with gr.Column(scale=2):
            seq_out = gr.Textbox(label="Designed Sequence", lines=4, show_copy_button=True)
            metrics_out = gr.Textbox(label="Design Metrics", lines=3)

    design_btn.click(
        fn=run_pifold_pipeline,
        inputs=pdb_input,
        outputs=[seq_out, metrics_out]
    )

# Launching with share=True creates a public URL so you can interact with it outside the Colab cell
pifold_app.launch(debug=True, share=True)

  with gr.Blocks(theme=gr.themes.Base()) as pifold_app:


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://cd93d7bc0ecdfe319c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://cd93d7bc0ecdfe319c.gradio.live


