In [None]:
# Setup and Dependencies
# Input Protein Sequence
# Protein Structural Modeling with ESMFold (ESM-3)
# B-cell Epitope Prediction
# Linear Epitope Prediction
# Conformational Epitope Prediction
# T-cell Epitope Prediction
# Population Coverage Analysis
# Molecular Docking Simulation
# Visualization and Analysis

In [None]:
# poetry add biopython
# poetry add esm

In [2]:
# import os
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
from Bio import SeqIO
# from Bio.SeqUtils.ProtParam import ProteinAnalysis
# from Bio.PDB import PDBParser, Selection
# import torch
# import esm  # For ESM-3 and ESMFold
# Additional imports for visualization and docking

In [3]:
def load_fasta(file_path):
    """
    Load protein sequences from a FASTA file.
    """
    sequences = []
    for record in SeqIO.parse(file_path, "fasta"):
        sequences.append(str(record.seq))
    return sequences


# Example usage:
sequences = load_fasta("../allergen-analysis/ara-h-2/AAN77576.fasta")
protein_sequence = sequences[0]  # Use the first sequence for this example

In [None]:
import os

from dotenv import load_dotenv
from esm.models.esm3 import ESM3
from esm.sdk.api import ESM3InferenceClient, ESMProtein, GenerationConfig
from huggingface_hub import login

# Load the environment variables from the .env file
load_dotenv()

# Retrieve the Hugging Face token from the environment
hf_token = os.getenv("HUGGINGFACE_TOKEN")

# Will instruct you how to get an API key from huggingface hub, make one with "Read" permission.
login(token=hf_token)

# This will download the model weights and instantiate the model on your machine.
model: ESM3InferenceClient = ESM3.from_pretrained("esm3_sm_open_v1").to(
    "cuda"
)  # or "cpu"

# Generate a completion for a partial Carbonic Anhydrase (2vvb)
prompt = "___________________________________________________DQATSLRILNNGHAFNVEFDDSQDKAVLKGGPLDGTYRLIQFHFHWGSLDGQGSEHTVDKKKYAAELHLVHWNTKYGDFGKAVQQPDGLAVLGIFLKVGSAKPGLQKVVDVLDSIKTKGKSADFTNFDPRGLLPESLDYWTYPGSLTTPP___________________________________________________________"
protein = ESMProtein(sequence=prompt)
# Generate the sequence, then the structure. This will iteratively unmask the sequence track.
protein = model.generate(
    protein, GenerationConfig(track="sequence", num_steps=8, temperature=0.7)
)
# We can show the predicted structure for the generated sequence.
protein = model.generate(protein, GenerationConfig(track="structure", num_steps=8))
protein.to_pdb("./generation.pdb")
# Then we can do a round trip design by inverse folding the sequence and recomputing the structure
protein.sequence = None
protein = model.generate(protein, GenerationConfig(track="sequence", num_steps=8))
protein.coordinates = None
protein = model.generate(protein, GenerationConfig(track="structure", num_steps=8))
protein.to_pdb("./round_tripped.pdb")

In [None]:
print("Partial Sequence:")
print(
    "___________________________________________________DQATSLRILNNGHAFNVEFDDSQDKAVLKGGPLDGTYRLIQFHFHWGSLDGQGSEHTVDKKKYAAELHLVHWNTKYGDFGKAVQQPDGLAVLGIFLKVGSAKPGLQKVVDVLDSIKTKGKSADFTNFDPRGLLPESLDYWTYPGSLTTPP___________________________________________________________"
)
print("\nGenerated Sequence:")
print(protein.sequence)