<a href="https://colab.research.google.com/github/pcmay/ALyzer3D.AI/blob/main/ALyzer3D_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



<div style="display: flex; justify-content: space-between; align-items: center;">
<img src="https://raw.githubusercontent.com/petercmay89/ALyzer3D.AI/main/white.png" width="10%">
<img src="https://raw.githubusercontent.com/petercmay89/ALyzer3D.AI/main/ALyzer3D.AI_logo.png" width="25%">
<img src="https://raw.githubusercontent.com/petercmay89/ALyzer3D.AI/main/white.png" width="25%">
<img src="https://raw.githubusercontent.com/petercmay89/ALyzer3D.AI/main/ColabFold_logo.png" width="25%">
<img src="https://raw.githubusercontent.com/petercmay89/ALyzer3D.AI/main/white.png" width="10%">
</div>



Welcome to **ALyzer3D.AI**. This notebook allows you to predict the amyloidogenicity of a VJ region of a light chain by first generating its 3D structure with [ColabFold](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb) and then automatically analyzing it with the ALyzer3D.AI model.

**Instructions:**

1. **Enter Your Sequence**: In the first cell (Input protein sequence), paste the amino acid sequence of your light chain's VJ region.
2. **Run Everything**: Click on the menu Runtime -> Run all.

The notebook will now execute all the steps for you: it will install dependencies, run the ColabFold structure prediction, and finally, perform the ALyzer3D.AI analysis on the resulting top-ranked structure. The final prediction will be displayed at the bottom of the page.





In [None]:
#@title Input protein sequence(s), then hit `Runtime` -> `Run all`
from google.colab import files
import os
import re
import hashlib

# --- User Input ---
query_sequence = 'QSVLTQPISASGTPGQRVSISCSGGYYNIRTNTVHWYQQLPGTAPKLLIYNNDQRPSGVPDRFSGSKSGPSASLAIGGLLSEDEADYYCATWDDSLNGPVFGGGTKL' #@param {type:"string"}

# --- Hardcoded Parameters ---
jobname = "prediction"
# Remove whitespaces and sanitize jobname
query_sequence = "".join(query_sequence.split())
basejobname = re.sub(r'\W+', '', jobname)
# Create a unique jobname by hashing the sequence
jobname = f"{basejobname}_{hashlib.sha1(query_sequence.encode()).hexdigest()[:5]}"

# Create a directory for the job
os.makedirs(jobname, exist_ok=True)

# Save the sequence to a query file
queries_path = os.path.join(jobname, f"{jobname}.csv")
with open(queries_path, "w") as text_file:
    text_file.write(f"id,sequence\n{jobname},{query_sequence}")

print(f"Job Name: {jobname}")
print(f"Sequence Length: {len(query_sequence.replace(':', ''))}")

In [None]:
#@title Install Dependencies
%%time
import os

if not os.path.isfile("COLABFOLD_READY"):
    print("Installing ColabFold...")
    os.system("pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold'")
    os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold")
    os.system("ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold")
    os.system("touch COLABFOLD_READY")

print("Dependencies are installed.")

In [None]:
#@title Run ColabFold Prediction
import sys
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from Bio import BiopythonDeprecationWarning
warnings.simplefilter(action='ignore', category=BiopythonDeprecationWarning)
from pathlib import Path
from colabfold.download import download_alphafold_params
from colabfold.utils import setup_logging
from colabfold.batch import get_queries, run, set_model_type
import os

# Warn if using a K80 GPU
try:
    K80_chk = os.popen('nvidia-smi | grep "Tesla K80" | wc -l').read()
    if "1" in K80_chk:
        print("WARNING: Found GPU Tesla K80: limited to total length < 1000")
except:
    pass

# Setup logging
result_dir = jobname
log_filename = os.path.join(jobname, "log.txt")
setup_logging(Path(log_filename))

# Get queries and set model type
queries, is_complex = get_queries(queries_path)
model_type = set_model_type(is_complex, "auto") # auto selects multimer for complexes

# Download model parameters
download_alphafold_params(model_type, Path("."))

# Run the prediction with hardcoded settings
results = run(
    queries=queries,
    result_dir=result_dir,
    use_templates=False,
    num_relax=0,
    msa_mode="mmseqs2_uniref_env", # Fast and effective MSA generation
    model_type=model_type,
    num_models=5,
    num_recycles=3, # Standard number of recycles
    num_seeds=1,    # Single seed for speed
    use_dropout=False,
    model_order=[1, 2, 3, 4, 5],
    is_complex=is_complex,
    data_dir=Path("."),
    keep_existing_results=False,
    rank_by="auto", # plddt for monomers, iptm for complexes
    pair_mode="unpaired_paired",
    stop_at_score=100.0,
    zip_results=False, # We will zip manually later
    user_agent="colabfold/google-colab-main",
)

# Package results into a zip file
results_zip = f"{jobname}.result.zip"
os.system(f"zip -r {results_zip} {jobname}")

print("Prediction complete.")

In [None]:
#@title ▶️ Run ALyzer3D.AI Analysis
import os
import sys
import glob
from IPython.display import display, HTML

print("✅ Step 1: Installing ALyzer3D.AI and its dependencies...")
# Clone your project repository
!git clone https://github.com/petercmay89/ALyzer3D.AI.git > /dev/null 2>&1
# Add your project to the Python path to make it importable
sys.path.insert(0, '/content/ALyzer3D.AI')
# Install only the extra dependencies your tool needs
!pip install -q transformers scikit-learn joblib > /dev/null 2>&1

print("\n✅ Step 2: Loading your AI model...")
from prediction_tool import AmyloidPredictor

try:
    predictor = AmyloidPredictor(
        model_dir="/content/ALyzer3D.AI/champion_v8_ensemble_modelv3",
        scaler_path="/content/ALyzer3D.AI/champion_v8_ensemble_modelv3/scalar_scaler_v8_ensemble.joblib"
    )
    print(" ✔️ Model loaded successfully.")
except Exception as e:
    print(f"❗️ Error loading model: {e}")

print("\n✅ Step 3: Finding ColabFold output and running analysis...")

# Correctly use the `jobname` variable defined in the first cell
search_path_pdb = f"{jobname}/{jobname}_unrelaxed_rank_001*.pdb"
search_path_json = f"{jobname}/{jobname}_scores_rank_001*.json"

pdb_files = glob.glob(search_path_pdb)
json_files = glob.glob(search_path_json)

if not pdb_files or not json_files:
    print(f"❗️ Error: Could not find output files inside the '{jobname}' folder. Please check the file browser on the left.")
else:
    pdb_filename = pdb_files[0]
    json_filename = json_files[0]
    print(f" - Found PDB: {pdb_filename}")
    print(f" - Found JSON: {json_filename}")

    # Run the prediction
    result = predictor.predict(pdb_path=pdb_filename, json_path=json_filename)

    # Display the final formatted results
    if result.get("error"):
        print(f"❗️ An error occurred during analysis: {result['error']}")
    else:
        prob = result['prediction_probability']
        confidence_percent = prob * 100
        risk_level = "High Risk" if prob > 0.7 else "Medium Risk" if prob > 0.4 else "Low Risk"
        risk_color = "#D32F2F" if prob > 0.7 else "#F57C00" if prob > 0.4 else "#388E3C"
        html_output = f"""
        <div style="border: 2px solid {risk_color}; border-radius: 10px; padding: 20px; font-family: sans-serif; background-color: #f9f9f9; margin-top: 1em;">
        <h2 style="color: {risk_color}; margin-top: 0;">ANALYSIS COMPLETE: {risk_level.upper()}</h2><hr>
        <div style="display: grid; grid-template-columns: 150px 1fr; gap: 10px; align-items: center;">
        <strong style="font-size: 1.1em;">Prediction:</strong>
        <span style="font-size: 1.1em; font-weight: bold; color: {risk_color};">{result['prediction_label'].upper()}</span>
        <strong style="font-size: 1.1em;">Confidence Score:</strong>
        <div style="width: 100%; background-color: #e0e0e0; border-radius: 5px;">
        <div style="width: {confidence_percent}%; background-color: {risk_color}; color: white; text-align: center; padding: 2px 0; border-radius: 5px;">
        {confidence_percent:.2f}%
        </div>
        </div>
        <strong style="vertical-align: top;">Sequence:</strong>
        <textarea readonly style="width: 100%; height: 60px; resize: none; border: 1px solid #ccc; font-family: monospace;">{result['sequence']}</textarea>
        </div>
        </div>
        """
        display(HTML(html_output))

In [None]:
#@title Download Results
from google.colab import files

# Download the zip file created in the prediction cell
files.download(f"{jobname}.result.zip")

# Instructions <a name="Instructions"></a>
For detailed instructions, tips and tricks on ColabFold, see recently published paper at [Nature Protocols](https://www.nature.com/articles/s41596-024-01060-5)