In [1]:
# Fine-Tune Llama for Antibody Sequence Generation

import torch
import os
import pandas as pd
import re
from datetime import datetime
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    Trainer, 
    TrainingArguments, 
    DataCollatorForSeq2Seq,
    TrainerCallback
)
from datasets import Dataset
from trl import SFTTrainer
from peft import get_peft_model, LoraConfig, TaskType

# Setup environment
#os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
torch.cuda.empty_cache()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Test GPU setup
print(f"Number of GPUs: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB")

# Load dataset
df = pd.read_csv("../data/sabdab/sabdab_training_dataset.csv")
df = df.dropna(subset=['h_chain_seq', 'l_chain_seq', 'antigen_seqs', 'highlighted_epitope_seqs'])
print(f"Dataset size: {len(df)} samples")



  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
Number of GPUs: 2
GPU 0: NVIDIA GeForce RTX 5090
Memory: 33.7 GB
GPU 1: NVIDIA GeForce RTX 3090 Ti
Memory: 25.3 GB
Dataset size: 9523 samples


In [2]:
# Choose Llama model - you can use different versions
 
model_name = "mistralai/Mistral-7B-Instruct-v0.2" #"meta-llama/Llama-3.1-8b-instruct"  # Change this to your preferred Llama model

# Load tokenizer and model
print(f"Loading {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
    use_fast=True
)

# Set padding token if not present (common for Llama models)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Load model with automatic device mapping for multi-GPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # Automatically distribute across GPUs
    torch_dtype=torch.float16,  # Use float16 for efficiency
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    max_memory={0: "30GB", 1: "23GB"}  # Adjust based on your GPU memory
)

Loading mistralai/Mistral-7B-Instruct-v0.2...


Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.32s/it]


In [3]:
# Add special tokens for epitopes
epitope_tokens = ["<epi>", "</epi>"]
task_tokens = ["Antigen", "Antibody", "Epitope"]
amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
extra_tokens = amino_acids + ["|"]

# Add all custom tokens
all_new_tokens = epitope_tokens + task_tokens + [t for t in extra_tokens if t not in tokenizer.get_vocab()]
num_added_tokens = tokenizer.add_special_tokens({
    "additional_special_tokens": tokenizer.additional_special_tokens + all_new_tokens
    if tokenizer.additional_special_tokens else all_new_tokens
})


In [4]:
# Resize model embeddings
model.resize_token_embeddings(len(tokenizer))
print(f"Added {num_added_tokens} new tokens to vocabulary")

# Prepare dataset
def convert_epitope_format(sequence):
    """Convert [X] format to <epi>X</epi> format"""
    return re.sub(r'\[([A-Z])\]', r'<epi>\\1</epi>', sequence)

def format_prompt(example):
    epitope_seq = convert_epitope_format(example['highlighted_epitope_seqs'])
    # Using Llama's typical prompt format
    return {
        "text": f"### Instruction: Generate antibody sequence for the given antigen.\n\n### Input:\nAntigen: {epitope_seq}\n\n### Response:\nAntibody: {example['antibody_fv_seqs']}\n"
    }


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Added 5 new tokens to vocabulary


In [5]:
# Create and map dataset
dataset = Dataset.from_pandas(df)
dataset = dataset.map(format_prompt)

# Tokenize dataset
def tokenize(example):
    # Llama models typically have longer context windows (4096 tokens)
    encoded = tokenizer(
        example["text"], 
        truncation=True, 
        max_length=1024,  # Adjust based on your sequences
        padding=False
    )
    encoded["labels"] = encoded["input_ids"].copy()
    return encoded

tokenized_dataset = dataset.map(tokenize)

# Check sequence lengths
sequence_lengths = [len(tokenizer(example["text"], truncation=False)["input_ids"]) for example in dataset]
max_len = 1024
truncated = sum(1 for length in sequence_lengths if length > max_len)
print(f"Sequences truncated at max_length={max_len}: {truncated}/{len(sequence_lengths)} ({100*truncated/len(sequence_lengths):.1f}%)")

# Remove unnecessary columns
columns_to_remove = [col for col in tokenized_dataset.column_names if col not in ["input_ids", "attention_mask", "labels"]]
tokenized_dataset = tokenized_dataset.remove_columns(columns_to_remove)

Map: 100%|██████████| 9523/9523 [00:00<00:00, 18642.84 examples/s]
Map: 100%|██████████| 9523/9523 [00:02<00:00, 3467.36 examples/s]


Sequences truncated at max_length=1024: 92/9523 (1.0%)


In [6]:
# Enable gradient computation for inputs
if hasattr(model, 'enable_input_require_grads'):
    model.enable_input_require_grads()
else:
    def make_inputs_require_grad(module, input, output):
        output.requires_grad_(True)
    model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)

# Configure LoRA for efficient fine-tuning
peft_config = LoraConfig(
    r=16,  # Rank
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],  # Llama attention modules
)

# Apply LoRA
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# Data collator
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    label_pad_token_id=-100,
    pad_to_multiple_of=8
)

trainable params: 13,631,488 || all params: 7,255,404,544 || trainable%: 0.1879


In [8]:
# Training arguments optimized for Llama
training_args = TrainingArguments(
    output_dir=f"../models/llama-antibody-{datetime.now().strftime('%Y%m%d_%H%M%S')}",
    per_device_train_batch_size=4,  # Adjust based on GPU memory
    gradient_accumulation_steps=2,  # Effective batch size = 8
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    warmup_steps=100,
    weight_decay=0.01,
    learning_rate=5e-5,  # Conservative learning rate for Llama
    logging_dir="../logs",
    logging_steps=50,
    save_strategy="steps",
    save_steps=500,
    eval_strategy="no",  # Add validation set if needed
    gradient_checkpointing=True,
    fp16=True,
    optim="adamw_torch",
    dataloader_num_workers=4,
    dataloader_pin_memory=True,
    remove_unused_columns=False,
    max_grad_norm=1.0,
    report_to="none",  # Change to "wandb" for logging
    seed=42,
)

In [9]:
# Test Generation Callback
class TestGenerationCallback(TrainerCallback):
    def __init__(self, model, tokenizer, test_antigens, log_every_n_steps=100):
        self.model = model
        self.tokenizer = tokenizer
        self.test_antigens = test_antigens
        self.log_every_n_steps = log_every_n_steps
        
    def create_test_prompt(self, antigen_with_epitopes):
        return f"### Instruction: Generate antibody sequence for the given antigen.\n\n### Input:\nAntigen: {antigen_with_epitopes}\n\n### Response:\nAntibody:"
    
    def generate_antibody_test(self, antigen_with_epitopes):
        prompt = self.create_test_prompt(antigen_with_epitopes)
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
        inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
        
        self.model.eval()
        try:
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=200,
                    temperature=0.7,
                    top_p=0.9,
                    do_sample=True,
                    pad_token_id=self.tokenizer.pad_token_id,
                    eos_token_id=self.tokenizer.eos_token_id,
                    repetition_penalty=1.1,
                )
                
                generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=False)
                if "Antibody:" in generated_text:
                    antibody_part = generated_text.split("Antibody:", 1)[1]
                    antibody_sequence = antibody_part.split("\n")[0].strip()
                else:
                    antibody_sequence = "Generation failed"
                
                return antibody_sequence
                
        except Exception as e:
            return f"Error: {str(e)}"
        finally:
            self.model.train()
    
    def on_log(self, args, state, control, **kwargs):
        if state.global_step % self.log_every_n_steps == 0 and state.global_step > 0:
            print(f"\n{'='*80}")
            print(f"TEST GENERATION - Step {state.global_step}")
            print(f"{'='*80}")
            
            for i, test_antigen in enumerate(self.test_antigens):
                antibody = self.generate_antibody_test(test_antigen)
                print(f"\n--- Test Case {i+1} ---")
                print(f"Input: {test_antigen[:60]}...")
                print(f"Generated: {antibody}")
            
            print(f"{'='*80}\n")

In [10]:
# Create test antigens
test_antigens = [
    convert_epitope_format("KVFGRCELAAAM[K][R]HGL[D][N][Y]RG[Y][S]LG[N]WVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCA[K]KIVSDGNGMNAWVAWRNRCK[G][T][D]V[Q]AW[I][R]GCRL"),
    convert_epitope_format("NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVI[R]G[N]EV[S][Q]IAPGQ[T]GNIADYNYKLPDDFTGCVIAWNSN[K]LDSKPSGNYNYLYRLLRKSKLKPFERDISTEIYQAGNKPCNGVAGPNCYSPLQSYGF[R]P[T][Y][G][V]GH[Q]PYRVVVLSFELLHAPATVCGP"),
]

# Initialize callback
test_callback = TestGenerationCallback(
    model=model,
    tokenizer=tokenizer,
    test_antigens=test_antigens,
    log_every_n_steps=100
)

# Create trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
    args=training_args,
    callbacks=[test_callback],
)

Truncating train dataset: 100%|██████████| 9523/9523 [00:00<00:00, 190865.18 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [11]:
# Start training
print("Starting training...")
trainer.train()

# Save the final model
trainer.save_model()
print(f"Model saved to {training_args.output_dir}")

# Save tokenizer
tokenizer.save_pretrained(training_args.output_dir)
print("Training completed!")

Starting training...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Step,Training Loss
50,4.8255
100,4.0696
150,3.6013
200,3.1438
250,3.0867
300,2.7682
350,2.8725
400,2.7567
450,2.6597
500,2.4959



TEST GENERATION - Step 100

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: EVQLVESGGGVLVQPGGSLTCTAYSISHWVRQSPGSKLEWIGEYYMVDTYAGNSSSGDPSGTSVTVSS|DISSLTQMTITPSEGSLKSYMSNGAWYQQQLQRPGKSAWYQQGYLTVSSVADSLGPEAATLTISSKNSENLSLTFDDTAVYYCQQSYSNNYLKMNNIYSSEDLAWYQGQTTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: EVQLVESGGGLVQPGGSLRLSCAASGFSFISSYYHWVRQAPGKGLEWVASITGSDTPKFRADSEVTISRDNAKFKKATAYLQMNLQDVVYACMEWYGRGTSVTVSS|DIVLTQSPSSVSATSGVRTISCTSGTDYLAWYQQKPWYQQGKAPKLLIYSQSNSTVGSGERSDASLTISRAELQADYFCQQYYCQQYLQMNSLQTSEDTAVYYCQQSGAETVVK


TEST GENERATION - Step 200

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLVQSGAELKPGASVKLSCKASGYTFTSYWMHWVRQAPGQGLEWIGEIYNPSNKYYADSVKGRFTISRDNSKNTLYLQMSSLRSEDTAVYYCTRNYGSGTFDYWGQGTLVTVSA|DIVLTQSPSSLSASVGDRVTITCRASQDISNYLAWYQQKPGQSPRLLIYKASTRATGIPARFSGSGSGTDFTLTITSLQPEDEAYYCQQYNNPWTFGGGTKVEIK

--




TEST GENERATION - Step 600

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLQQSGAELVRPGASVKLSCTGSGFNIHDDHAYWVKQRPEQGLEWIGEIYYTGDNYTYYADSVKGRFTISRDDASTAYLELSSLRSEDTAVYYCARERGTFYDYWGQGTLVTVSS|DIQMTQSPSSLSASSVEGERLTCRTSQSLSIHSWYQQKAPKLLIYAASESGPSGVSDRFSGSRAGTDFTLTISSLQAEDVAVYYCLQHYGNSPRTFGPGTKVDIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: QVTLKESGGDLVQPGGSLRLSCAASGFTFSSYYMSWVRQAPGKGLEWIGSIYYSGSTYYNSYADSVKGRFTISRDNAKNTLYLQMRAEDTAVYYCARDRDSSDPWYYYGMDVWGQGTTVTVSS|ELTQPPSVSGSPGQSITISCTGSGDVGKYLAWYQQKPGQAPVLVIYKRDSQRPSGIPDRFSGSNSGNTASLTISGLKTEDEAIYFCSEDDSSLHWVFGGGTKLTVL


TEST GENERATION - Step 700

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: VQLVESGGGLVKPGGSLRLSCAASGFTFSNYWMHWVRQAPGKGLEWVAVISYDGSNRYYADSVKGRFTISRDNSKNTLYLQMRAEDTAVYYCAKYGDAWNSFFDIWGQGTTVTVSS|DIQMTQSPSSLSASVGDRVTITCRASQSISSALAWYQQKPGKAPKLLIFAASSLQSGVPSRF




TEST GENERATION - Step 1100

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: EVQLVESGGGLVQPGGSLRLSCAASGFNVSYSIHWVRQAPGKGLEWVAYIYYSGGSTYYADSVKGRFTISADTSKNTAYLQMNSLRAEDTAVYYCARRRGDSGYFAYWGQGTLVTVSS|DIQMTQSPSSLSASVGDRVTITCRASQSVSSAVAWYQQKPGKAPKLLIYSASSLYSGVPSRFSGSRSGTDFTLTISSLQPEDFATYYCQQYNNYPLTFGAGTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: EVQLVESGGGLVQPGGSLRLSCAASGFTFSNYWMHWVRQPPGKGLEWVSGISSGSITYYADSVKGRFTISRDNAKNSLYLQMNSLRAEDTAVYYCARGDTVVGAMDYWGQGTLVTVS|DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKVLIFAASSLQSGVPSRFSGSGSGTDFALTISSLQPEDEADYYCQQYNSYPLTFGQGTKVEIK



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av


TEST GENERATION - Step 1200

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLQESGPGLVKPSETLSVTCSVSGDSMNNYYWTWIRQSPGKGLEWIGYISDRESANNYNPSLNSRVVSVDTSKNQFSLKLTAADTAVYYCARDRGRGSYDFWGQGTLVTVSS|DIQMTQSPSSLSASVGDRVTITCRASKQSVSSYLAWYQQKPGKAPKLLIYAASNLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPRTFGQGTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVAYISSSGSTYYADSVKGRFTISRDNAKNTLYLQMRAEDTAVYYCARDRDYWGQGTLVTVSS|DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTLALTFGGGTKVEIK


TEST GENERATION - Step 1300

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLVESGGGLVQPGGSLRLSCAASGFNIKKDYMSWVRQAPGKGLEWVSVIYSSSGSTYYADSVKGRFTISADTSKNTAYLQMRAEDTAVYYCARDRPYDDWFDPWGQGTLVTVSS|EIVMTQSPSSLSASVGDRVTITCRASQSISNYLAWYQQKPGKAPKLLIYDASSLYRFSGVPDRFSGSGSGTD




TEST GENERATION - Step 1600

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: VQLVQSGAEVKKPGSSVRVSCKAYGVFTNYALPVSWVRQAPGQRLEWMGWISYSGDSDDTKYAQRFQGRVTMTRDTSISTAYMELSRLRSDDTAVYYCAREGYGDSYWGQGTLVTVSA|DIVMTQSPLSLSVTPGEPASISCRSSQNGNTYLAWYLHKPGQSPHLLIYWASTRHTGVPDRFSGSGSGTDFTLTISRVETEDLAVYYCQQYNNWLPRTFGGGTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: QSLEESGGDLVKPGASLTLTCTASGDSFISSYAMNWIRQTPGKGLEWIGCIYYSGGTYYNQKFKGRVTITEDTSKSTAYMELTPEDTAVYYCARAEYRGYYGMDVWGQGTTVTV|DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCLQHYDNLPLTFGGGTKVEIK


TEST GENERATION - Step 1700

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLVESGGGVVQPGRSLRLSCAASGFTFSNYFMHWVRQAPGKGLEWVAVISYDGSNKYYADSVKGRFTISRDNSKNTLYLQMRAEDTAVYYCARERELRGEYFDYWGQGTTVTVSS|DIQLTQSPSFLSASVGDRVTITCRASQGISSYLAWYQQKPGKAPNLLIYAASSLQSGVPSRF




TEST GENERATION - Step 2100

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: EVQLVESGGGLVQPGGSLRLSCAASGFNVYSSSIHWVRQAPGKGLEWVSYISGSGSNTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARSRQWFDPWGQGTLVTVSS|DIQMTQSPSSLSASVGDRVTITCRASQSISNNLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYRTLALTFGPGTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYDMHWVRQPPGKGLEWVSTITSGGSITYYNPSLKGRVTISKDSQSINFVYLQMNSLRAEDTAIYYCTRSPDYGDVWGQGTTVTVSS|DIQMTQSPSSVSASVGDRVTITCRASQGISSWLAWYQQKPGKAPNLLIYAASSLQSGVPSRFSGSGSGTEFTLTISSLQPEDFATYYCQQLKSDPPTFGGGTKVEIK


TEST GENERATION - Step 2200

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVTLRESGPALVKPTQTLTLTCTFSGFSLSTPGGGVGWIRQPPGKALEWLALIYYAGSIDYAPSLKTRLTISKDTSKNQVVLTMTNMDPVDTATYYCVDRNWSAYWGQGTLIV|SVLTQSPATLSVSLGQRATISCKASQNVGSFMNWFQQKPGQPPKLLIYEVSNRPSGVPARFSGSGS

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av


TEST GENERATION - Step 2400

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: VQLVESGGGLVQPGGSLRLSCAASGFNVSYYSIHWVRQAPGKGLEWVASISSYYGYTYYADSVKGRFTISADTSKNTAYLQMNSLRAEDTAVYYCARYRYYGSWFDPWYFDYWGQGTLVTVSS|DIQMTQSPSSLSASVGDRVTITCRASQSVESSNLDWYQQKPGKAPKLLIYDASNLETGVPSRFSGSGSGTDFTFTISSLQPEDIATYYCQQHYEPPYTFGQGTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: VQLVESGGGLVQPGGSLRLSCAASGFNYYSSMHWVRQAPGKGLEWVSAISGSGDTTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARLGWRDYWGQGTLVTVSS|EIVLTQSPGTLSLSPGERATLSCRASQSVSSYLAWYQQKPGQAPRLLIYGASSRATGIPDRFSGSGSGTDFTLTISRLEPEDFAVYYCQQYGSSPPTFGQGTKVEIK


TEST GENERATION - Step 2500

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLQESGPGLVAPSQSLSITCTVSGFSLTNYSVHWVRQPPGKGLEWLGVIWAGGGGTNYNSALMSRVSISRDTSKNQFFLKLSSVTAADTAVYYCAREYYRTYYMDVWGKGTTVTVSS|DIQMTQSPSSLSASVGDRVTITCRASKNIKSYLSWYQQKPGKAPKLLIYAASSLQSG




TEST GENERATION - Step 2600

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLVQSGAEVKKPGASVKVSCKTSGYTFTSYNIHWVRQAPGQRLEWMGWINPNSGGTNYAQKFQGRVTMTRDTSISTAYMELSRLRSDDTAVYYCARGTTYYDSWYFDVWGQGTTVTVSS|EIVLTQSPGTLSLSPGERATLSCRASQSVSSSYLAWYQQKPGQAPRLLIYGASSRATGIPDRFSGSGSGTDFTLTISRLEPEDFAVYYCQQYGSSPLTFGGGTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: EVQLVESGGGLVKPGGSLRLSCSASGFTFSSYAMSWVRQAPGKGLEWVSAISSGGSTYYADSVKGRFTISRDNSKNTLYLQMRAEDTAVYYCAKDGYGNTYFDYWGQGTLVTVSS|DIVLTQSPLTSEPVPPGTQAVSITCGGSNIGGKNYVSWYQQHPGKAPKLLIYGASNRPSGVPDRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYTTPPTFGPGTRLEIK


TEST GENERATION - Step 2700

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QIQLVQSGPELKKPGETVKISCKASGYTFTNYGMNWVKQAPGKGLKWMGWINTYTGEPTYADDFKGRFAFSLETSASTAYLQIKNEDTATYFCARDRWFGAEDFFWGPGTVVTVSS|DIVMTQSPATLSVTPGDRVSLSCRASQSVIRNLAWYQQKSHESPRLLIKYASQSISG




TEST GENERATION - Step 3100

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLVQSGAEVKKPGASVKVSCKASGYPFTSYGISWVRQAPGQGLEWMGWISTYNGNTNYAQKFQGRVTMTTDTSTTTGYMELRRLRSDDTAVYYCARDYTRGAWFDPWGQGTLVTVSS|DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSAPPTFGQGTKVEIK

--- Test Case 2 ---
Input: NLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFT...
Generated: QVQLVESGGGLVQPGGSLRLSCAASGFTFSSYYMNWVRQAPGKGLEWVSGINGGTTIANYADTVKGRFTISRDNAKNTLYLQMNSLRAEDTAVYYCARDVGGYDVFGMDYWGQGTLVTVSS|DIQMTQSPSAMSASVGDRVTITCRASQSISSWLAWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPPPTFGQGTKVEIK


TEST GENERATION - Step 3200

--- Test Case 1 ---
Input: KVFGRCELAAAM<epi>\1</epi><epi>\1</epi>HGL<epi>\1</epi><epi>\...
Generated: QVQLQESGPGLVKPSETLSVTCIVSGGSISRYYWWWIRQSPGKGLEWIGEIYYHSGSTNYNPSLKSRVTISVDTSKNQFSLNLNSVTAADSAVYYCARDGDYWGQGTLVTVSS|DIQLTQSPSFLSASVGDRVTITCRASQGISSYLAWYQQKPGKAPKLLIYAASILQSGVP



Model saved to ../models/llama-antibody-20250812_190413
Training completed!
