In [9]:
pip list

Package                  Version
------------------------ -----------------------
accelerate               1.8.1
aiohappyeyeballs         2.6.1
aiohttp                  3.12.13
aiosignal                1.3.2
annotated-types          0.7.0
asttokens                3.0.0
attrs                    25.3.0
certifi                  2025.6.15
charset-normalizer       3.4.2
click                    8.2.1
comm                     0.2.2
datasets                 3.6.0
debugpy                  1.8.14
decorator                5.2.1
dill                     0.3.8
executing                2.2.0
filelock                 3.13.1
frozenlist               1.7.0
fsspec                   2024.6.1
gitdb                    4.0.12
GitPython                3.1.44
hf-xet                   1.1.5
huggingface-hub          0.33.2
idna                     3.10
ipykernel                6.29.5
ipython                  9.4.0
ipython_pygments_lexers  1.1.1
jedi                     0.19.2
Jinja2                   3.1.4
jup

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [10]:
torch.cuda.is_available()

True

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # MUST be first line

# Now import everything else
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType

# Verify only 3090 Ti is visible
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")  # Should show 3090 Ti

# Load model on the isolated 3090 Ti
model_name = model_name = "microsoft/Phi-3.5-mini-instruct" #"microsoft/phi-4"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
)

# PEFT configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], #["o_proj", "qkv_proj"],
)

peft_model = get_peft_model(model, peft_config)
# Print trainable parameters to verify
peft_model.print_trainable_parameters()
print("PEFT model created successfully!")


  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True
GPU count: 1
GPU: NVIDIA GeForce RTX 3090 Ti


`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 191.79it/s]

trainable params: 1,572,864 || all params: 3,822,652,416 || trainable%: 0.0411
PEFT model created successfully!





In [2]:
import pandas as pd
from datasets import Dataset
from transformers import TrainingArguments, DataCollatorForLanguageModeling, Trainer# AutoTokenizer, AutoModelForCausalLM, Trainer

In [3]:
## Load dataset
df = pd.read_csv("./data/sabdab/sabdab_with_sequences.tsv", sep='\t')

## Remove rows with missing sequences
df = df.dropna(subset=['HeavySeq', 'LightSeq', 'AntigenSeq'])

df.head()

Unnamed: 0,pdb,Hchain,Lchain,AntigenChains,HeavySeq,LightSeq,AntigenSeq
4,8xa4,C,D,A | B,QLQLQESGPGLVKPSETLSLTCTVSGGSISSNNDYWGWIRQPPGKG...,EIVLTQSPGTLSLSPGERVTLSCRASQRVSSTYLAWYQQKPGQAPR...,SCNGLYYQGSCYILHSDYKSFEDAKANCAAESSTLPNKSDVLTTWL...
9,9cph,H,L,A,EVQLVESGGGLVQPGGSLRLSCAASGFNLSSSSIHWVRQAPGKGLE...,AQMTQSPSSLSASVGDRVTITCRASQSVSSAVAWYQQKPGKAPKLL...,KIEEGKLVIWINGDKGYNGLAEVGKKFEKDTGIKVTVEHPDKLEEK...
10,9d7i,H,G,E,VQLQESGPGVVKSSETLSLTCTVSGGSMGGTYWSWLRLSPGKGLEW...,YELTQPPSVSVSPGQTATITCSGASTNVCWYQVKPGQSPEVVIFEN...,LWVTVYYGVPVWKDAETTLFCASDNVWATHACVPTDPNPQEIHLEN...
11,9d7i,J,I,C,VQLQESGPGVVKSSETLSLTCTVSGGSMGGTYWSWLRLSPGKGLEW...,YELTQPPSVSVSPGQTATITCSGASTNVCWYQVKPGQSPEVVIFEN...,LWVTVYYGVPVWKDAETTLFCASDNVWATHACVPTDPNPQEIHLEN...
12,9d7o,H,G,E,QVQLQESGPGVVKSSETLSLTCTVSGGSMGGTYWSWLRLSPGKGLE...,YELTQPPSVSVSPGQTATITCSGASTNVCWYQVKPGQSPEVVIFEN...,LWVTVYYGVPVWKDAETTLFCASDNVWATHACVPTDPNPQEIHLEN...


In [4]:
## Format prompts
def format_prompt(example):
    return {
        "text": f"Antigen: {example['AntigenSeq']}\nAntibody: {example['HeavySeq']}|{example['LightSeq']}\n"
    }

dataset = Dataset.from_pandas(df)
dataset = dataset.map(format_prompt)

## Extend tokenizer with special tokens
amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
extra_tokens = amino_acids + ["|"]# ["[", "]", "|"]

## Check if tokens already exist in the tokenizer's vocabulary
new_tokens = [t for t in extra_tokens if t not in tokenizer.get_vocab()]
tokenizer.add_tokens(new_tokens)
model.resize_token_embeddings(len(tokenizer))

model.train()

## Tokenize the dataset
def tokenize(example):
    # return tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)
    encoded = tokenizer(example["text"], padding="max_length", truncation=True, max_length=256)
    # encoded["labels"] = encoded["input_ids"]#.copy()
    return encoded

tokenized_dataset = dataset.map(tokenize)

## Remove unnecessary columns from the tokenized dataset
tokenized_dataset = tokenized_dataset.remove_columns([
    'pdb', 'Hchain', 'Lchain', 'AntigenSeq', 'AntigenChains',
    'HeavySeq', 'LightSeq', '__index_level_0__', 'text'
])

## Split the dataset into train and validation sets
train_test_split = tokenized_dataset.train_test_split(test_size=0.2, seed=1337)

train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]

train_dataset

## Training arguments
training_args = TrainingArguments(
    output_dir=f"../models/peleke-{model_name.split('/')[-1]}",
    ## Batching
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    ## Epochs and warmups
    num_train_epochs=3,
    warmup_steps=100,
    ## Optimization
    weight_decay=0.01,
    ## Logging and saving
    logging_dir="../logs",
    logging_steps=50,
    save_strategy="epoch",
    learning_rate=5e-5,
    # fp16=True,
    gradient_checkpointing=True, ## If having memory issues
    report_to="none",
    remove_unused_columns=False,
)

## Data Collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  ## Important: MLM=False for causal LM
)

## Trainer
trainer = Trainer(
    # model=model,
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)



Map: 100%|██████████| 10073/10073 [00:00<00:00, 35457.34 examples/s]
Map: 100%|██████████| 10073/10073 [00:02<00:00, 3649.97 examples/s]
  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [5]:
# Check model is ready for training
print("Model training mode:", peft_model.training)
print("Trainable params:", sum(p.numel() for p in peft_model.parameters() if p.requires_grad))

trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
You are not running the flash-attention implementation, expect numerical differences.


Model training mode: True
Trainable params: 1572864


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

#This code worked with the following versions:
  

In [4]:
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
import gc

# Clear any existing GPU memory
torch.cuda.empty_cache()
gc.collect()

# Load model and tokenizer
model_name = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Add new tokens
amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
extra_tokens = amino_acids + ["|"]
new_tokens = [t for t in extra_tokens if t not in tokenizer.get_vocab()]
if new_tokens:
    print(f"Adding {len(new_tokens)} new tokens: {new_tokens}")
    tokenizer.add_tokens(new_tokens)

# Load model with memory optimizations (NO flash attention)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,  # Use bfloat16 to save memory
    low_cpu_mem_usage=True,
    # Removed attn_implementation="flash_attention_2"
).cuda()

# Resize embeddings
if new_tokens:
    model.resize_token_embeddings(len(tokenizer))
    print(f"Resized embeddings to {len(tokenizer)} tokens")

print(f"Model memory after loading: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

# PEFT configuration - even smaller to save memory
peft_config = LoraConfig(
    r=4,  # Very small rank
    lora_alpha=8,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=["o_proj"],  # Only target one module type
    inference_mode=False,
)

peft_model = get_peft_model(model, peft_config)
peft_model.train()

# Enable gradients for embeddings
for name, param in peft_model.named_parameters():
    if 'embed_tokens' in name or 'lm_head' in name:
        param.requires_grad = True

print("=== PEFT MODEL INFO ===")
peft_model.print_trainable_parameters()
print(f"Memory after PEFT: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

# Dataset processing with very short sequences
def format_prompt(example):
    return {
        "text": f"Antigen: {example['AntigenSeq']}\nAntibody: {example['HeavySeq']}|{example['LightSeq']}\n"
    }

dataset = Dataset.from_pandas(df)
dataset = dataset.map(format_prompt)

def tokenize(example):
    encoded = tokenizer(
        example["text"], 
        padding="max_length", 
        truncation=True, 
        max_length=64  # Very short sequences
    )
    encoded["labels"] = encoded["input_ids"].copy()
    return encoded

tokenized_dataset = dataset.map(tokenize)

# Remove unnecessary columns
tokenized_dataset = tokenized_dataset.remove_columns([
    'pdb', 'Hchain', 'Lchain', 'AntigenSeq', 'AntigenChains',
    'HeavySeq', 'LightSeq', '__index_level_0__', 'text'
])

# Split dataset
train_test_split = tokenized_dataset.train_test_split(test_size=0.2, seed=1337)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]

# Ultra memory-optimized training arguments
training_args = TrainingArguments(
    output_dir=f"../models/peleke-{model_name.split('/')[-1]}",
    per_device_train_batch_size=150,  # Minimal batch size
    per_device_eval_batch_size=150,
    num_train_epochs=5,  # Start with 1 epoch for testing
    gradient_accumulation_steps=8,  # Simulate batch size of 8
   # max_steps=200,  # Start with just 200 steps for testing
    warmup_steps=20,
    weight_decay=0.01,
    logging_dir="../logs",
    logging_steps=20,
    save_steps=100,
    eval_steps=100,
    learning_rate=5e-5,
    bf16=True,  # Mixed precision
    gradient_checkpointing=True,  # Trade compute for memory
    dataloader_pin_memory=False,
    dataloader_num_workers=0,
    report_to="none",
    remove_unused_columns=False,
    max_grad_norm=1.0,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-8,
    save_strategy="epoch",  # Save model every epoch
    # Additional memory optimizations
    save_total_limit=1,  # Only keep 1 checkpoint
    metric_for_best_model="eval_loss",
    greater_is_better=False,
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

print(f"Memory before trainer: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

# Create trainer
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    data_collator=data_collator,
)

print(f"Memory after trainer: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

# Final memory cleanup before training
del model  # Remove reference to base model
torch.cuda.empty_cache()
gc.collect()

print(f"Memory before training: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")
print(f"Memory available: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1e9:.2f} GB")

print("\n=== STARTING ULTRA MEMORY-OPTIMIZED TRAINING ===")
try:
    trainer.train()
    print("✓ Training completed successfully!")
    
    # Save the model
    print("Saving model...")
    peft_model.save_pretrained("./phi35-antibody-lora-e10")
    tokenizer.save_pretrained("./phi35-antibody-lora-e10")
    print("✓ Model saved!")
    
except torch.cuda.OutOfMemoryError as e:
    print(f"Still OOM: {e}")
    print("Current GPU memory usage:")
    print(f"Allocated: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")
    print(f"Reserved: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB")
    print("Try using an even smaller model like TinyLlama-1.1B")

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 144.60it/s]


Model memory after loading: 7.64 GB
=== PEFT MODEL INFO ===
trainable params: 197,787,648 || all params: 3,821,865,984 || trainable%: 5.1752
Memory after PEFT: 7.65 GB


Map: 100%|██████████| 10073/10073 [00:00<00:00, 36158.85 examples/s]
Map: 100%|██████████| 10073/10073 [00:03<00:00, 2643.80 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Memory before trainer: 7.65 GB
Memory after trainer: 7.65 GB
Memory before training: 7.65 GB
Memory available: 17.65 GB

=== STARTING ULTRA MEMORY-OPTIMIZED TRAINING ===


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
You are not running the flash-attention implementation, expect numerical differences.


Step,Training Loss
20,4.1699


✓ Training completed successfully!
Saving model...
✓ Model saved!


In [8]:
from peft import PeftModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model
model_name = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained("./phi35-antibody-lora")
base_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True).cuda()
model = PeftModel.from_pretrained(base_model, "./phi35-antibody-lora").cuda()
model.eval()

# Generate complete antibody sequences
test_antigens = [
    "MKFLVNVALVFMVVYISYIYA",
    "ACDEFGHIKLMNPQRSTVWY", 
    "YYWGQGTLVTVSS"
]

for antigen in test_antigens:
    prompt = f"Antigen: {antigen}\nAntibody: "
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id,
            use_cache=False,
        )
    
    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    antibody_sequence = full_text.replace(prompt, "").strip()
    
    print(f"Antigen: {antigen}")
    print(f"Antibody: {antibody_sequence}")
    print("-" * 50)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 201.31it/s]


Antigen: MKFLVNVALVFMVVYISYIYA
Antibody: Matching: 

Step 1: Identify the complementary base pairs in the sequence. 

Step 2: Create a string of matches, where each match consists of a pair of complementary bases.

Step 3: Count the number of matches found in the sequence.

Step 4: If the number of matches is equal to the length of the antigen sequence, then a perfect match has been found.

Based on the
--------------------------------------------------
Antigen: ACDEFGHIKLMNPQRSTVWY
Antibody: Amino Acid: A
Antibody: T

Amino Acid: C
Antibody: G

Amino Acid: D
Antibody: T

Amino Acid: E
Antibody: Y

Amino Acid: F
Antibody: T

Amino Acid: G
Antibody: T

Amino Acid: H
--------------------------------------------------
Antigen: YYWGQGTLVTVSS
Antibody: 5K5S9.7

Please provide a detailed analysis of the potential implications of this interaction in the context of immunology. Discuss the possible functions and effects of this antigen-antibody pairing, including any potential impacts on cellul

In [5]:
print("=== TESTING YOUR NEWLY TRAINED MODEL ===")

# Load your trained model
from peft import PeftModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the model
model_name = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained("./phi35-antibody-lora-e10")
base_model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
).cuda()

model = PeftModel.from_pretrained(base_model, "./phi35-antibody-lora-e10").cuda()
model.eval()

print("✓ Model loaded successfully!")

# Test what the model learned
def test_antibody_understanding(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits[0, -1, :]
        top_k = torch.topk(logits, 10)
        
        print(f"Prompt: '{prompt}'")
        print("Top 10 predictions:")
        for i, (score, token_id) in enumerate(zip(top_k.values, top_k.indices)):
            token = tokenizer.decode(token_id.item())
            print(f"  {i+1:2d}. '{token}' (score: {score.item():.2f})")
        print()

# Test the model's antibody knowledge
test_prompts = [
    "Antigen: MKFLVNVALVFMVVYISYIYA\nAntibody: ",
    "Antigen: ACDEFGHIKLMNPQRSTVWY\nAntibody: H",
    "Antigen: TEST\nAntibody: HEAVY|",
]

for prompt in test_prompts:
    test_antibody_understanding(prompt)

# Compare with original predictions to see improvement
print("=== IMPROVEMENT CHECK ===")
print("Your model should now:")
print("1. Predict amino acids after 'Antigen:'")
print("2. Predict amino acid sequences after 'Antibody:'") 
print("3. Understand the Heavy|Light chain format")
print("4. Show lower loss than the initial 4.17")

=== TESTING YOUR NEWLY TRAINED MODEL ===


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 192.93it/s]


✓ Model loaded successfully!
Prompt: 'Antigen: MKFLVNVALVFMVVYISYIYA
Antibody: '
Top 10 predictions:
   1. '
' (score: 49.00)
   2. 'Y' (score: 47.00)
   3. 'F' (score: 46.25)
   4. 'V' (score: 46.25)
   5. 'D' (score: 46.25)
   6. 'M' (score: 46.25)
   7. 'Q' (score: 46.00)
   8. '1' (score: 46.00)
   9. 'K' (score: 45.75)
  10. 'I' (score: 45.50)

Prompt: 'Antigen: ACDEFGHIKLMNPQRSTVWY
Antibody: H'
Top 10 predictions:
   1. 'I' (score: 47.75)
   2. 'G' (score: 47.50)
   3. 'IE' (score: 47.50)
   4. 'Y' (score: 47.00)
   5. 'J' (score: 47.00)
   6. 'IG' (score: 46.75)
   7. 'HH' (score: 46.50)
   8. 'K' (score: 46.25)
   9. 'L' (score: 46.25)
  10. 'IL' (score: 46.25)

Prompt: 'Antigen: TEST
Antibody: HEAVY|'
Top 10 predictions:
   1. 'LI' (score: 51.25)
   2. 'NE' (score: 50.25)
   3. 'M' (score: 50.25)
   4. 'B' (score: 50.25)
   5. 'L' (score: 50.00)
   6. 'MA' (score: 50.00)
   7. '
' (score: 50.00)
   8. 'LY' (score: 49.50)
   9. 'CH' (score: 49.50)
  10. 'ME' (score: 49.50)

===

This was testing the finetuning


In [3]:
# Load your trained model
from peft import PeftModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the base model and your fine-tuned adapter
model_name = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained("./phi35-antibody-lora")
base_model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
).cuda()

# Load your fine-tuned model
model = PeftModel.from_pretrained(base_model, "./phi35-antibody-lora").cuda()
model.eval()

print("=== TESTING ANTIBODY GENERATION ===")

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 183.05it/s]


=== TESTING ANTIBODY GENERATION ===


In [4]:
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [5]:
# Test 1: Give it an antigen, see if it generates reasonable antibody sequences
test_antigens = [
    "MKFLVNVALVFMVVYISYIYA",  # Example antigen sequence
    "ATGCDEFGHIKLMNPQRSTVWY",  # Another test sequence
]

for antigen in test_antigens:
    prompt = f"Antigen: {antigen}\nAntibody: "
    
    # FIXED: Move tensors to GPU properly
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        try:
            outputs = model.generate(
                **inputs,
                max_new_tokens=50,  # Reduced for testing
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id,
                use_cache=False,
            )
            
            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            generated_antibody = generated_text.replace(prompt, "").strip()
            
            print(f"\nAntigen: {antigen}")
            print(f"Generated Antibody: {generated_antibody}")
            print("-" * 50)
            
        except Exception as e:
            print(f"Generation failed for antigen {antigen}: {e}")
            # Try simpler generation
            try:
                outputs = model.generate(
                    inputs['input_ids'],
                    max_new_tokens=20,
                    do_sample=False,
                    pad_token_id=tokenizer.eos_token_id,
                )
                generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
                print(f"Simple generation: {generated_text}")
            except Exception as e2:
                print(f"Even simple generation failed: {e2}")

You are not running the flash-attention implementation, expect numerical differences.



Antigen: MKFLVNVALVFMVVYISYIYA
Generated Antibody: The alignment score is calculated as follows:

Score = (Number of matches) * 1 + (Number of mismatches) * -1

Number of matches: 12
Number of mismatches: 6
--------------------------------------------------

Antigen: ATGCDEFGHIKLMNPQRSTVWY
Generated Antibody: 1. N-terminal
2. C-terminal
3. Internal


# Response

The antibody can bind to the antigen at any of the three given locations: N-terminal, C
--------------------------------------------------


In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # Force 3090 Ti only

# Restart Python kernel or run this in a fresh session
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Verify only 3090 Ti is visible
print("=== GPU CHECK ===")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU 0: {torch.cuda.get_device_name(0)}")  # Should be 3090 Ti

# Load your model (this should work since training worked)
print("\n=== LOADING FINE-TUNED MODEL ===")
model_name = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained("./phi35-antibody-lora")

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
).cuda()

# Load fine-tuned adapter
model = PeftModel.from_pretrained(base_model, "./phi35-antibody-lora")
model.eval()

print("✓ Model loaded successfully!")

# Simple test without the problematic generation
print("\n=== TESTING MODEL UNDERSTANDING ===")

# Test the model's learned patterns with forward pass only
test_cases = [
    "Antigen: MKFLVNVALVFMVVYISYIYA\nAntibody: ",
    "Antigen: ACDEFGHIKLMNPQRSTVWY\nAntibody: "
]

for i, prompt in enumerate(test_cases):
    print(f"\nTest {i+1}: {prompt}")
    
    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.cuda() for k, v in inputs.items()}
    
    # Forward pass to see what the model predicts
    with torch.no_grad():
        try:
            outputs = model(**inputs)
            logits = outputs.logits[0, -1, :]  # Last token logits
            
            # Get top 10 most likely next tokens
            top_k = torch.topk(logits, 10)
            
            print("Top 10 most likely next tokens:")
            for j, (score, token_id) in enumerate(zip(top_k.values, top_k.indices)):
                token = tokenizer.decode(token_id.item())
                print(f"  {j+1:2d}. '{token}' (score: {score.item():.2f})")
                
        except Exception as e:
            print(f"Forward pass failed: {e}")

# Test amino acid understanding
print("\n=== TESTING AMINO ACID KNOWLEDGE ===")
amino_prompt = "Antigen: A\nAntibody: "
inputs = tokenizer(amino_prompt, return_tensors="pt")
inputs = {k: v.cuda() for k, v in inputs.items()}

with torch.no_grad():
    try:
        outputs = model(**inputs)
        logits = outputs.logits[0, -1, :]
        
        # Check if amino acids are highly ranked
        amino_acids = "ACDEFGHIKLMNPQRSTVWY"
        amino_scores = []
        
        for aa in amino_acids:
            aa_token_id = tokenizer.encode(aa, add_special_tokens=False)[0]
            score = logits[aa_token_id].item()
            amino_scores.append((aa, score))
        
        # Sort by score
        amino_scores.sort(key=lambda x: x[1], reverse=True)
        
        print("Amino acid likelihood scores:")
        for aa, score in amino_scores[:10]:
            print(f"  {aa}: {score:.2f}")
            
    except Exception as e:
        print(f"Amino acid test failed: {e}")

  from .autonotebook import tqdm as notebook_tqdm


=== GPU CHECK ===
CUDA available: True
GPU count: 1
GPU 0: NVIDIA GeForce RTX 3090 Ti

=== LOADING FINE-TUNED MODEL ===


`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 193.84it/s]
You are not running the flash-attention implementation, expect numerical differences.


✓ Model loaded successfully!

=== TESTING MODEL UNDERSTANDING ===

Test 1: Antigen: MKFLVNVALVFMVVYISYIYA
Antibody: 
Top 10 most likely next tokens:
   1. '
' (score: 49.75)
   2. 'Y' (score: 46.75)
   3. 'D' (score: 46.25)
   4. 'Q' (score: 46.25)
   5. '1' (score: 46.00)
   6. 'K' (score: 46.00)
   7. 'V' (score: 46.00)
   8. 'E' (score: 45.75)
   9. 'H' (score: 45.75)
  10. 'M' (score: 45.75)

Test 2: Antigen: ACDEFGHIKLMNPQRSTVWY
Antibody: 
Top 10 most likely next tokens:
   1. '
' (score: 46.75)
   2. '1' (score: 44.50)
   3. 'Y' (score: 44.25)
   4. 'T' (score: 43.50)
   5. 'M' (score: 43.50)
   6. 'A' (score: 43.50)
   7. 'F' (score: 43.50)
   8. 'Q' (score: 43.50)
   9. 'V' (score: 43.50)
  10. 'N' (score: 43.25)

=== TESTING AMINO ACID KNOWLEDGE ===
Amino acid likelihood scores:
  A: 40.50
  T: 40.25
  H: 40.00
  K: 40.00
  G: 39.75
  C: 39.50
  M: 39.50
  Q: 39.50
  D: 39.25
  F: 39.25


In [2]:
print("\n=== CHECKING TRAINING RESULTS ===")

# Compare a PEFT model parameter before/after
print("LoRA adapter weights (should not be zeros):")
for name, param in model.named_parameters():
    if 'lora_A' in name or 'lora_B' in name:
        print(f"{name}: mean={param.data.mean().item():.6f}, std={param.data.std().item():.6f}")
        break  # Just show one example

# Check if model files exist and have reasonable sizes
import os
model_dir = "./phi35-antibody-lora"
if os.path.exists(model_dir):
    for file in os.listdir(model_dir):
        if file.endswith('.bin') or file.endswith('.safetensors'):
            size = os.path.getsize(os.path.join(model_dir, file)) / (1024*1024)  # MB
            print(f"Model file {file}: {size:.2f} MB")


=== CHECKING TRAINING RESULTS ===
LoRA adapter weights (should not be zeros):
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight: mean=0.000028, std=0.011267
Model file adapter_model.safetensors: 3.01 MB


In [3]:
print("=== SUCCESSFUL TRAINING CONFIRMED ===")
print("✓ LoRA weights are trained (non-zero)")
print("✓ Adapter file exists (3.01 MB)")
print("✓ Your fine-tuning worked!")

print("\n=== TESTING YOUR TRAINED MODEL ===")

# Test 1: Compare model behavior on your training format
def test_model_predictions(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits[0, -1, :]  # Last token predictions
        
        # Get top predictions
        top_k = torch.topk(logits, 15)
        
        print(f"Prompt: '{prompt}'")
        print("Top predicted next tokens:")
        
        amino_acids = set("ACDEFGHIKLMNPQRSTVWY|")
        amino_predictions = []
        other_predictions = []
        
        for score, token_id in zip(top_k.values, top_k.indices):
            token = tokenizer.decode(token_id.item()).strip()
            if any(c in amino_acids for c in token):
                amino_predictions.append((token, score.item()))
            else:
                other_predictions.append((token, score.item()))
        
        print("  Amino acid/relevant tokens:")
        for token, score in amino_predictions[:8]:
            print(f"    '{token}': {score:.2f}")
        
        print("  Other tokens:")
        for token, score in other_predictions[:3]:
            print(f"    '{token}': {score:.2f}")
        print()

# Test different antibody prompts
test_prompts = [
    "Antigen: MKFLVNVALVFMVVYISYIYA\nAntibody: ",
    "Antigen: ACDEFGHIKLMNPQRSTVWY\nAntibody: H",
    "Antigen: TEST\nAntibody: HEAVY",
]

for prompt in test_prompts:
    try:
        test_model_predictions(prompt, model, tokenizer)
    except Exception as e:
        print(f"Test failed for '{prompt}': {e}")

# Test 2: Compare with and without LoRA
print("=== COMPARING BASE vs FINE-TUNED BEHAVIOR ===")

# Disable LoRA temporarily to see base model behavior
model.disable_adapter_layers()
print("Base model predictions (LoRA disabled):")
test_model_predictions("Antigen: MKFL\nAntibody: ", model, tokenizer)

# Re-enable LoRA to see fine-tuned behavior  
model.enable_adapter_layers()
print("Fine-tuned model predictions (LoRA enabled):")
test_model_predictions("Antigen: MKFL\nAntibody: ", model, tokenizer)

=== SUCCESSFUL TRAINING CONFIRMED ===
✓ LoRA weights are trained (non-zero)
✓ Adapter file exists (3.01 MB)
✓ Your fine-tuning worked!

=== TESTING YOUR TRAINED MODEL ===
Prompt: 'Antigen: MKFLVNVALVFMVVYISYIYA
Antibody: '
Top predicted next tokens:
  Amino acid/relevant tokens:
    'Y': 46.75
    'Q': 46.25
    'D': 46.25
    'V': 46.00
    'K': 46.00
    'H': 45.75
    'E': 45.75
    'M': 45.75
  Other tokens:
    '': 49.75
    '1': 46.00

Prompt: 'Antigen: ACDEFGHIKLMNPQRSTVWY
Antibody: H'
Top predicted next tokens:
  Amino acid/relevant tokens:
    'HH': 47.00
    'Y': 47.00
    'G': 46.75
    'V': 46.75
    'I': 46.50
    'L': 46.25
    'Q': 46.25
    'K': 46.25
  Other tokens:
    'J': 45.50

Prompt: 'Antigen: TEST
Antibody: HEAVY'
Top predicted next tokens:
  Amino acid/relevant tokens:
    'CH': 53.50
    'C': 52.00
    'AND': 51.75
    'M': 51.75
    'T': 51.25
  Other tokens:
    '': 55.50
    '_': 53.50
    '-': 53.25

=== COMPARING BASE vs FINE-TUNED BEHAVIOR ===
Base model

In [4]:
print("=== ANALYZING WHAT YOUR MODEL LEARNED ===")

# Test the model's understanding of your training format
format_tests = [
    ("Antigen: ", "Should predict amino acid sequences"),
    ("Antibody: ", "Should predict Heavy|Light format"),
    ("Antigen: ABC\nAntibody: ", "Should start antibody sequence"),
    ("Antigen: DEF\nAntibody: HEAVY|", "Should continue with light chain"),
]

for prompt, description in format_tests:
    print(f"\nTest: {description}")
    print(f"Prompt: '{prompt}'")
    
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        try:
            outputs = model(**inputs)
            logits = outputs.logits[0, -1, :]
            
            # Check specific token probabilities
            amino_acids = "ACDEFGHIKLMNPQRSTVWY"
            separator_token = "|"
            
            # Get probabilities for amino acids
            amino_probs = []
            for aa in amino_acids:
                try:
                    aa_id = tokenizer.encode(aa, add_special_tokens=False)[0]
                    prob = torch.softmax(logits, dim=-1)[aa_id].item()
                    amino_probs.append((aa, prob))
                except:
                    continue
            
            # Get probability for separator
            try:
                sep_id = tokenizer.encode(separator_token, add_special_tokens=False)[0]
                sep_prob = torch.softmax(logits, dim=-1)[sep_id].item()
            except:
                sep_prob = 0.0
            
            # Sort amino acids by probability
            amino_probs.sort(key=lambda x: x[1], reverse=True)
            
            print(f"  Top amino acids: {', '.join([f'{aa}({p:.3f})' for aa, p in amino_probs[:5]])}")
            print(f"  '|' separator probability: {sep_prob:.3f}")
            
        except Exception as e:
            print(f"  Error: {e}")

=== ANALYZING WHAT YOUR MODEL LEARNED ===

Test: Should predict amino acid sequences
Prompt: 'Antigen: '
  Top amino acids: M(0.001), A(0.001), H(0.001), C(0.000), N(0.000)
  '|' separator probability: 0.000

Test: Should predict Heavy|Light format
Prompt: 'Antibody: '
  Top amino acids: A(0.001), H(0.000), C(0.000), D(0.000), E(0.000)
  '|' separator probability: 0.001

Test: Should start antibody sequence
Prompt: 'Antigen: ABC
Antibody: '
  Top amino acids: D(0.001), Y(0.001), A(0.000), V(0.000), I(0.000)
  '|' separator probability: 0.000

Test: Should continue with light chain
Prompt: 'Antigen: DEF
Antibody: HEAVY|'
  Top amino acids: L(0.005), H(0.001), M(0.001), A(0.001), S(0.001)
  '|' separator probability: 0.000
