# Generate Fv Sequences using Annotated Inputs for Antigens

In [None]:
from peft import PeftModel, PeftConfig
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re

In [None]:
## Load model
# model_name = "microsoft/phi-4"

model_name = "../models/peleke-phi-4-h100runwtags-20250727/checkpoint-4000"
config = PeftConfig.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, torch_dtype=torch.bfloat16, trust_remote_code=True).cuda()
# model = PeftModel.from_pretrained(base_model, model_name).cuda()
# model.eval()

In [None]:
## Format Prompt with Tags
def format_prompt(antigen_sequence):
    epitope_seq = re.sub(r'\[([A-Z])\]', r'<epi>\1</epi>', antigen_sequence)
    formatted_str = f"<ag>{epitope_seq}</ag><|im_end|>\n<h__fv>"
    return formatted_str

In [None]:
## List of Antigens to Test
test_antigens = [
    ## PD-1
    "NPPTFSPALLVVTEGDNATFTCSFS[S][F][V]L[N]WYRMQ[T][D][K]LAAF[P]E[D][R][S][Q][P][G]QDSRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGA[I]S[L]AQIKESLRAELRV",
]

In [None]:
## Generate complete antibody sequences for the example antigens
for antigen in test_antigens:
    # prompt = f"Antigen: {antigen}\nAntibody: "
    prompt = format_prompt(antigen)
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id,
            use_cache=False,
        )
    
    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    antibody_sequence = full_text.replace(prompt, "").strip()
    print(f"Antigen: {antigen}\nAntibody: {antibody_sequence}\n")