
# BigProtein-Qwen2.5 — Step‑by‑Step Test Notebook (Colab)
This notebook lets you **test each component** of the protein‑conditioned Qwen2.5 pipeline *before* running full training.  
It mirrors the main script logic, but runs **function‑by‑function** so you can see errors early with clear tracebacks.

> **Files expected in the working directory** (upload or mount a folder containing them):  
> - `bigmodel_joint_train.py`  
> - `protein_encoder.py`  
> - `structure_encoder.py`


In [1]:
#@title Mount Google Drive
from pathlib import Path
from huggingface_hub import snapshot_download
import os, json, pickle, pandas as pd
from tqdm import tqdm
from rich import print as rprint

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

%cd /content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines

from pathlib import Path
BASE_DIR = Path("/content/drive/MyDrive/LLM/Bioreasoner/data/hf/proteinDT")
OUT_DIR  = BASE_DIR / "sft_test_demo"
print(f"Using Google Drive folder as BASE_DIR: {BASE_DIR}")


Mounted at /content/drive
/content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines
Using Google Drive folder as BASE_DIR: /content/drive/MyDrive/LLM/Bioreasoner/data/hf/proteinDT



## 0) Runtime & Installs
If you're on Google Colab, run this cell to install dependencies.


In [3]:
# Check GPU
!nvidia-smi

# Fresh pip + libs (PyTorch CUDA 12.1 build + matching libs)
%pip -q install --upgrade pip
%pip install -q --index-url https://download.pytorch.org/whl/cu126 \
  torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0
%pip -q install transformers==4.56.1 huggingface_hub==0.35.0 tqdm safetensors

Thu Oct  2 03:10:40 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off |   00000000:00:05.0 Off |                    0 |
| N/A   37C    P0             56W /  400W |       0MiB /  81920MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
# --- Version & import sanity checks ---
import torch, transformers, huggingface_hub
print("torch            :", torch.__version__)
print("transformers     :", transformers.__version__)
print("huggingface_hub  :", huggingface_hub.__version__)

# Top-level ESM import should work on 4.56.1
try:
    from transformers import AutoTokenizer, EsmForMaskedLM
    print("✅ Top-level EsmForMaskedLM import OK")
except Exception as e:
    print("❌ Top-level EsmForMaskedLM import failed:", repr(e))
    # Fallback check (direct module path)
    try:
        from transformers.models.esm.modeling_esm import EsmForMaskedLM as _E
        print("✅ Direct modeling_esm import OK (fallback)")
    except Exception as ee:
        print("❌ Direct modeling_esm import failed too:", repr(ee))

torch            : 2.8.0+cu126
transformers     : 4.56.1
huggingface_hub  : 0.35.0
✅ Top-level EsmForMaskedLM import OK


torch            : 2.8.0+cu126

transformers     : 4.56.1

huggingface_hub  : 0.35.0

✅ Top-level EsmForMaskedLM import OK


## 1) Loading Encoder Checkpoints

In [5]:

# === LLM & Encoders ===
MODEL_NAME         = "Qwen/Qwen2.5-0.5B-Instruct"   # Small-ish for Colab testing
PROTEIN_CONFIG = "/content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/esm2_t12_35M_UR50D"
STRUCTURE_CONFIG = "/content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/foldseek_t12_35M"
PROTREK_CKPT    = "/content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/ProTrek_35M.pt"
PROJECT_DIR = "/content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines"
DATA_JSONL = "/content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines/protein2desc_sft_ALLFOUR_c000-009_fullcot.jsonl"
OUT_DIR = "/content/drive/MyDrive/LLM/Bioreasoner/testing_notebooks/runs_colab_test"

for p in [PROJECT_DIR, DATA_JSONL, PROTEIN_CONFIG, STRUCTURE_CONFIG, PROTREK_CKPT, OUT_DIR]:
    print("✓ exists:", os.path.exists(p), p)



✓ exists: True /content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines
✓ exists: True /content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines/protein2desc_sft_ALLFOUR_c000-009_fullcot.jsonl
✓ exists: True /content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/esm2_t12_35M_UR50D
✓ exists: True /content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/foldseek_t12_35M
✓ exists: True /content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/ProTrek_35M.pt
✓ exists: True /content/drive/MyDrive/LLM/Bioreasoner/testing_notebooks/runs_colab_test


In [6]:
# === Prefix/Proj ===
SINGLE_TOKEN_PREFIX = False     # True -> 1 token; False -> soft prefix of length PREFIX_LEN
PREFIX_LEN          = 4
PROJ_HID            = 1024
DROPOUT             = 0.10

# === Training toggles ===
USE_LORA            = False
TRAIN_ENCODERS      = False    # True = end-to-end; False = freeze encoders
FREEZE_PROTEIN      = False    # only used if TRAIN_ENCODERS=True
FREEZE_STRUCTURE    = False    # only used if TRAIN_ENCODERS=True
GRAD_CHECKPOINT     = False

# === Misc ===
DEVICE              = "cuda" if torch.cuda.is_available() else "cpu"
MAX_LEN             = 512
BSZ                 = 2
ACCUM               = 1
LR                  = 5e-5
WARMUP_RATIO        = 0.03
EPOCHS              = 1
OUTPUT_DIR          = "runs/colab_smoketest"
LOG_EVERY           = 1

print("Device:", DEVICE)

Device: cuda


In [None]:
SUBSET_JSONL = os.path.join(PROJECT_DIR, "train_subset_100.jsonl")

# Write first 1000 non-empty lines to subset
count = 0
with open(DATA_JSONL, "r", encoding="utf-8") as fin, open(SUBSET_JSONL, "w", encoding="utf-8") as fout:
    for line in fin:
        if not line.strip():
            continue
        fout.write(line)
        count += 1
        if count >= 100:
            break

print("Wrote subset lines:", count, "->", SUBSET_JSONL)

Wrote subset lines: 100 -> /content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines/train_subset_100.jsonl


In [7]:
from train_prefix_qwen import train, parse_args
SUBSET_JSONL = os.path.join(PROJECT_DIR, "train_subset_100.jsonl")

In [8]:
import types, os

SAVE_DIR = os.path.join(PROJECT_DIR, "runs_colab_test")

args = types.SimpleNamespace(
    # Data
    train_file   = SUBSET_JSONL,
    val_file     = None,
    batch_size   = 4,         # adjust if you want
    accum_steps  = 1,
    max_len      = 2560,       # keep modest for speed
    # Model
    model_name   = "Qwen/Qwen2.5-0.5B-Instruct",
    dtype        = "fp32",    # or "bf16" on A100 for speed
    prefix_len   = 4,         # try 1 or 4+
    prefix_gate  = 1.0,       # stabilizer on the soft prefix
    learnable_gate = False,
    freeze_llm   = False,     # True = projector-only
    train_encoders = True,   # keep ESM encoders frozen for speed
    # Encoders
    protein_config = PROTEIN_CONFIG,
    structure_config = STRUCTURE_CONFIG,
    protrek_ckpt  = PROTREK_CKPT,
    prot_slot     = 1,
    stru_slot     = 3,
    # Optim
    epochs      = 2,
    lr          = 1e-3,       # projector+LLM small LR
    weight_decay= 0.0,
    # Save/eval
    save_dir    = OUT_DIR,
    save_every  = 0,
    eval_every  = 0,
    # Misc
    seed        = 42,
)

# Kick off training
train(args)

Device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loaded LLM: Qwen/Qwen2.5-0.5B-Instruct | hidden_size=896 | dtype=torch.float32
[ProteinEncoder] loaded from slot 1 | missing=0 unexpected=0
[StructureEncoder] loaded from slot 3 | missing=0 unexpected=0
Finished epoch 1. Elapsed 16s
[ep 2] step 50 | loss=3.4017 | supervised_tokens=36468 | time=32s
Finished epoch 2. Elapsed 32s
Saved final checkpoint to /content/drive/MyDrive/LLM/Bioreasoner/testing_notebooks/runs_colab_test/final.pt


In [8]:
import os, json, pprint, torch

# Point this to your latest checkpoint
CKPT = "/content/drive/MyDrive/LLM/Bioreasoner/testing_notebooks/runs_colab_test/final.pt"  # <-- change if needed

sd = torch.load(CKPT, map_location="cpu")
print("final.pt keys:", list(sd.keys()))
print("global/final step:", sd.get("final_step"))
print("\nargs:")
pprint.pprint(sd.get("args"), indent=2)

opt_state = sd.get("optimizer", {})
print("\nOptimizer param groups:", len(opt_state.get("param_groups", [])))
num_states = sum(len(v) for v in opt_state.get("state", {}).values())
print("Total tensors tracked by optimizer state:", num_states)

# Encoders presence (None if not trained or not saved)
enc = sd.get("encoders", {})
print("\nEncoders saved?")
print("  protein :", "yes" if (isinstance(enc, dict) and enc.get("protein") is not None) else "no")
print("  structure:", "yes" if (isinstance(enc, dict) and enc.get("structure") is not None) else "no")

final.pt keys: ['projector', 'llm', 'encoders', 'prefix_gate', 'optimizer', 'args', 'final_step']
global/final step: 50

args:
{ 'accum_steps': 1,
  'batch_size': 4,
  'dtype': 'fp32',
  'epochs': 2,
  'eval_every': 0,
  'freeze_llm': False,
  'learnable_gate': False,
  'lr': 0.001,
  'max_len': 2560,
  'model_name': 'Qwen/Qwen2.5-0.5B-Instruct',
  'prefix_gate': 1.0,
  'prefix_len': 4,
  'prot_slot': 1,
  'protein_config': '/content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/esm2_t12_35M_UR50D',
  'protrek_ckpt': '/content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/ProTrek_35M.pt',
  'save_dir': '/content/drive/MyDrive/LLM/Bioreasoner/testing_notebooks/runs_colab_test',
  'save_every': 0,
  'seed': 42,
  'stru_slot': 3,
  'structure_config': '/content/drive/MyDrive/LLM/Bioreasoner/protrek/weights/ProTrek_35M/foldseek_t12_35M',
  'train_encoders': True,
  'train_file': '/content/drive/MyDrive/LLM/Bioreasoner/testing_pipelines/train_subset_100.jsonl',
  'va

In [9]:
# === Cell 2: Parameter delta checks vs fresh ===
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer

sd   = torch.load(CKPT, map_location="cpu")
args = sd["args"]

# Map dtype
DTYPE = {"fp32": torch.float32, "bf16": torch.bfloat16, "fp16": torch.float16}[args["dtype"]]
MODEL = args["model_name"]
P     = args["prefix_len"]

# --- Helpers ---
def compare_modules(m0, m1, name, topk=10):
    total, changed = 0, 0
    rel = []
    for (n0,p0),(n1,p1) in zip(m0.state_dict().items(), m1.state_dict().items()):
        assert n0 == n1, f"name mismatch: {n0} vs {n1}"
        a0 = p0.detach().to(torch.float32).view(-1)
        a1 = p1.detach().to(torch.float32).view(-1)
        total += a0.numel()
        diff = (a1 - a0)
        rel.append((n0, float(diff.norm() / (a0.norm() + 1e-12))))
        changed += int((~torch.isclose(a0, a1)).sum().item())
    print(f"[{name}] total_elems={total:,}  approx_changed_frac={changed/total:.4f}")
    print(f"[{name}] top-{topk} by relative Δ:")
    for n, r in sorted(rel, key=lambda x: -x[1])[:topk]:
        print(f"  {n:55s}  relΔ={r:.6f}")

# --- LLM (base vs trained) ---
tok = AutoTokenizer.from_pretrained(MODEL)
if tok.pad_token is None: tok.pad_token = tok.eos_token
base_llm = AutoModelForCausalLM.from_pretrained(MODEL, dtype=DTYPE)
H = base_llm.config.hidden_size
print(f"LLM hidden={H}, dtype={DTYPE}")

trained_llm_state = sd.get("llm")
if trained_llm_state is None:
    print("\nLLM was frozen (no llm weights saved). Skipping LLM delta.")
else:
    trained_llm = AutoModelForCausalLM.from_pretrained(MODEL, dtype=DTYPE)
    missing, unexpected = trained_llm.load_state_dict(trained_llm_state, strict=False)
    print("\nLoaded trained LLM. missing:", len(missing), "unexpected:", len(unexpected))
    compare_modules(base_llm, trained_llm, "llm", topk=15)

# --- Projector (fresh-arch vs trained) ---
def make_projector(hid=H, d_in=2048, P=P, dtype=DTYPE):
    m = nn.Sequential(
        nn.Linear(d_in, hid),
        nn.SiLU(),
        nn.Linear(hid, hid * P),
    )
    return m.to(dtype=torch.float32)  # store in fp32 for clean delta math

base_proj    = make_projector()
trained_proj = make_projector()
trained_proj.load_state_dict(sd["projector"], strict=True)
print("\n== Projector delta vs fresh init ==")
compare_modules(base_proj, trained_proj, "projector", topk=10)

# --- Encoders (only if saved) ---
enc_sd = sd.get("encoders", {})
if isinstance(enc_sd, dict) and (enc_sd.get("protein") is not None or enc_sd.get("structure") is not None):
    import protein_encoder as protein_encoder_mod
    import structure_encoder as structure_encoder_mod

    # Fresh (random-init) encoders from configs
    base_prot = protein_encoder_mod.ProteinEncoder(args["protein_config"], out_dim=1024, load_pretrained=False)
    base_stru = structure_encoder_mod.StructureEncoder(args["structure_config"], out_dim=1024, load_pretrained=False)

    if enc_sd.get("protein") is not None:
        trained_prot = protein_encoder_mod.ProteinEncoder(args["protein_config"], out_dim=1024, load_pretrained=False)
        trained_prot.load_state_dict(enc_sd["protein"], strict=False)
        print("\n== ProteinEncoder delta vs fresh init ==")
        compare_modules(base_prot, trained_prot, "protein_encoder", topk=10)
    else:
        print("\nProteinEncoder not saved (probably not trained).")

    if enc_sd.get("structure") is not None:
        trained_stru = structure_encoder_mod.StructureEncoder(args["structure_config"], out_dim=1024, load_pretrained=False)
        trained_stru.load_state_dict(enc_sd["structure"], strict=False)
        print("\n== StructureEncoder delta vs fresh init ==")
        compare_modules(base_stru, trained_stru, "structure_encoder", topk=10)
    else:
        print("\nStructureEncoder not saved (probably not trained).")
else:
    print("\nNo encoders saved in checkpoint (encoders likely frozen or not saved).")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


LLM hidden=896, dtype=torch.float32

Loaded trained LLM. missing: 0 unexpected: 0
[llm] total_elems=630,167,424  approx_changed_frac=1.0000
[llm] top-15 by relative Δ:
  model.layers.0.self_attn.v_proj.weight                   relΔ=0.546569
  model.layers.2.self_attn.v_proj.weight                   relΔ=0.538066
  model.layers.1.self_attn.v_proj.weight                   relΔ=0.522997
  model.layers.0.self_attn.o_proj.weight                   relΔ=0.459119
  model.layers.0.self_attn.v_proj.bias                     relΔ=0.421852
  model.embed_tokens.weight                                relΔ=0.414175
  lm_head.weight                                           relΔ=0.414175
  model.layers.1.self_attn.o_proj.weight                   relΔ=0.399209
  model.layers.2.self_attn.o_proj.weight                   relΔ=0.395096
  model.layers.3.self_attn.v_proj.weight                   relΔ=0.371228
  model.layers.6.self_attn.v_proj.weight                   relΔ=0.366560
  model.layers.8.self_attn.v_

In [10]:
# === Cell 3: Functional loss check (baseline zero prefix vs trained protein prefix) ===
import os, json, itertools, torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM
import protein_encoder as protein_encoder_mod
import structure_encoder as structure_encoder_mod
import torch.nn as nn

sd   = torch.load(CKPT, map_location="cpu")
args = sd["args"]

MODEL = args["model_name"]
P     = args["prefix_len"]
DTYPE = {"fp32": torch.float32, "bf16": torch.bfloat16, "fp16": torch.float16}[args["dtype"]]

# Small eval slice (use your train jsonl or a held-out jsonl)
EVAL_JSONL = args["train_file"]  # or a separate val file path
N_EVAL     = 100

examples = []
with open(EVAL_JSONL, "r", encoding="utf-8") as f:
    for line in itertools.islice((l for l in f if l.strip()), N_EVAL):
        examples.append(json.loads(line))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Tokenizer & LLM (trained weights if present)
tok = AutoTokenizer.from_pretrained(MODEL)
if tok.pad_token is None: tok.pad_token = tok.eos_token

llm = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=DTYPE).to(device)
if sd.get("llm") is not None:
    llm.load_state_dict(sd["llm"], strict=False)
llm.eval()
we = llm.get_input_embeddings()
H  = llm.config.hidden_size

# Projector (trained)
def make_projector(hid=H, d_in=2048, P=P):
    return nn.Sequential(
        nn.Linear(d_in, hid),
        nn.SiLU(),
        nn.Linear(hid, hid * P),
    )

projector = make_projector().to(device, dtype=DTYPE)
projector.load_state_dict(sd["projector"], strict=True)
projector.eval()

# Gate
pg = sd.get("prefix_gate")
if isinstance(pg, torch.Tensor):
    prefix_gate = pg.to(device=device, dtype=DTYPE)
else:
    prefix_gate = torch.tensor(float(pg) if pg is not None else 1.0, device=device, dtype=DTYPE)

# Encoders: load base from configs, then overlay trained encoders if saved
prot_enc = protein_encoder_mod.ProteinEncoder(args["protein_config"], out_dim=1024, load_pretrained=False).eval()
stru_enc = structure_encoder_mod.StructureEncoder(args["structure_config"], out_dim=1024, load_pretrained=False).eval()
enc_sd = sd.get("encoders", {})
if isinstance(enc_sd, dict):
    if enc_sd.get("protein") is not None:
        prot_enc.load_state_dict(enc_sd["protein"], strict=False)
    if enc_sd.get("structure") is not None:
        stru_enc.load_state_dict(enc_sd["structure"], strict=False)

# Simple batch builder for eval (text only)
def build_text_batch(batch, max_len=256):
    prompts   = [b["prompt"] for b in batch]
    responses = [b["response"] for b in batch]
    enc_p = tok(prompts, add_special_tokens=False)
    enc_r = tok([r + tok.eos_token for r in responses], add_special_tokens=False)

    ids_list, prompt_lens = [], []
    T_max = 0
    for i in range(len(batch)):
        ids_p = enc_p["input_ids"][i]
        ids_r = enc_r["input_ids"][i]
        ids   = (ids_p + ids_r)[: max_len]
        ids_list.append(ids)
        p_keep = min(len(ids_p), len(ids))
        prompt_lens.append(p_keep)
        T_max = max(T_max, len(ids))

    pad_id = tok.pad_token_id
    input_ids = torch.full((len(batch), T_max), pad_id, dtype=torch.long, device=device)
    attn_mask = torch.zeros(len(batch), T_max, dtype=torch.long, device=device)
    labels    = torch.full((len(batch), T_max), -100, dtype=torch.long, device=device)
    for i, ids in enumerate(ids_list):
        t = len(ids)
        input_ids[i, :t] = torch.tensor(ids, dtype=torch.long, device=device)
        attn_mask[i, :t] = 1
        L = [-100]*prompt_lens[i] + ids[prompt_lens[i]:]
        labels[i, :t]    = torch.tensor(L, dtype=torch.long, device=device)
    return input_ids, attn_mask, labels

@torch.no_grad()
def eval_split(examples, with_protein: bool, batch_size=8, max_len=256):
    total, count = 0.0, 0
    for i in range(0, len(examples), batch_size):
        batch = examples[i:i+batch_size]
        input_ids, attn_t, labels_t = build_text_batch(batch, max_len=max_len)
        B, T = input_ids.shape

        text_emb = we(input_ids).to(DTYPE)

        if with_protein:
            # compute protein vectors on CPU, cast to device
            aa_list   = [b.get("aa_seq") for b in batch]
            stru_list = [b.get("stru_str") for b in batch]
            # Encode only non-empty items; rebuild to Bx2048
            def encode_list(enc, xs):
                idxs = [k for k,x in enumerate(xs) if x]
                vecs = None
                if idxs:
                    seqs = [xs[k] for k in idxs]
                    vecs = enc.get_repr(seqs, batch_size=max(1,len(seqs)), verbose=False).cpu()  # (n,1024)
                full = torch.zeros(len(xs), 1024, dtype=torch.float32)
                if idxs:
                    for j,k in enumerate(idxs): full[k] = vecs[j]
                return full
            prot = encode_list(prot_enc, aa_list)
            stru = encode_list(stru_enc, stru_list)
            pvec = torch.cat([prot, stru], dim=1).to(device).to(DTYPE)  # (B,2048)
        else:
            pvec = torch.zeros(B, 2048, dtype=torch.float32, device=device).to(DTYPE)

        pref = projector(pvec).view(B, P, H) * prefix_gate
        inputs = torch.cat([pref, text_emb], dim=1)
        attn   = torch.cat([torch.ones(B, P, dtype=torch.long, device=device), attn_t], dim=1)
        labs   = torch.cat([torch.full((B, P), -100, dtype=torch.long, device=device), labels_t], dim=1)

        out  = llm(inputs_embeds=inputs, attention_mask=attn, use_cache=False)
        log  = out.logits[:, :-1, :].contiguous().float()
        lab  = labs[:, 1:].contiguous()
        loss = F.cross_entropy(log.view(-1, log.size(-1)), lab.view(-1), ignore_index=-100, reduction="mean")
        total += float(loss.detach().cpu()); count += 1
    return total / max(count, 1)

loss_zero   = eval_split(examples, with_protein=False, batch_size=8, max_len=256)
loss_trained= eval_split(examples, with_protein=True,  batch_size=8, max_len=256)
print(f"Baseline (zero prefix) loss:  {loss_zero:.4f}")
print(f"Trained  (protein prefix) loss: {loss_trained:.4f}  (lower is better)")

`torch_dtype` is deprecated! Use `dtype` instead!


Baseline (zero prefix) loss:  6.4565
Trained  (protein prefix) loss: 6.4557  (lower is better)
