In [None]:
# 1) In the very first cell of your notebook:
from huggingface_hub import notebook_login
notebook_login()  # This will prompt you to paste your HF token

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Install dependencies
!pip install -q bitsandbytes transformers datasets peft accelerate safetensors


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m125.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m102.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip install -q "transformers>=4.38" datasets accelerate peft bitsandbytes safetensors

In [None]:
!pip install -U transformers accelerate bitsandbytes datasets



In [None]:
#!/usr/bin/env python3
# finetune_mistral_lora_v2.py
"""
Fine-tune Mistral-7B with LoRA on NBA-draft prompt-completion pairs.

👉  Requirements
    pip install "transformers>=4.38" datasets accelerate peft bitsandbytes

Run on Colab (A100 40 GB) ~25-30 min for 3 epochs, batch-equiv ≈ 32.
"""

# ───────────────────────────── paths ──────────────────────────────
JSONL_PATH = "/content/drive/MyDrive/nbadraft/train_samples_vorp2.jsonl"
OUTPUT_DIR = "/content/drive/MyDrive/nbadraft/mistral-lora-v13"
BASE_MODEL = "mistralai/Mistral-7B-v0.1"

# ──────────────────────────── imports ─────────────────────────────
import torch, json, math, warnings, os
from datasets import load_dataset, Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
                          TrainingArguments, Trainer, DataCollatorForLanguageModeling)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

warnings.filterwarnings("ignore")

def load_jsonl(path):
    with open(path, encoding="utf-8") as f:
        return Dataset.from_list([json.loads(l) for l in f])

def tokenize_and_pack(tokenizer, ds, block_size=2048):
    """
    Tokenises and concatenates → re-chunks into fixed blocks (GPT-style packing).
    """
    def tok(example):
        ids = tokenizer(example["text"], add_special_tokens=False)["input_ids"]
        return {"ids": ids}

    ds = ds.map(tok, remove_columns=ds.column_names, num_proc=4)

    # -- flatten then regroup
    all_ids = sum(ds["ids"], [])
    n_blocks = len(all_ids) // block_size
    blocks   = [all_ids[i*block_size:(i+1)*block_size] for i in range(n_blocks)]

    return Dataset.from_list([{"input_ids": b, "labels": b.copy()} for b in blocks])

def main():

    # 1. dataset ----------------------------------------------------
    raw_ds  = load_jsonl(JSONL_PATH)
    raw_ds  = raw_ds.shuffle(seed=42)
    split   = raw_ds.train_test_split(test_size=0.05, seed=42)
    print(f"📊  train={len(split['train'])}  eval={len(split['test'])}")

    # 2. tokenizer --------------------------------------------------
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token  # safe-guard

    # 3. tokenise & pack -------------------------------------------
    train_ds = tokenize_and_pack(tokenizer, split["train"])
    eval_ds  = tokenize_and_pack(tokenizer, split["test"])

    # 4. 4-bit base model ------------------------------------------
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )

    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        quantization_config=bnb_cfg,
        device_map="auto",
        trust_remote_code=True
    )

    base = prepare_model_for_kbit_training(base)    # layernorm to fp32, etc.

    # 5. LoRA config ----------------------------------------------
    lora_cfg = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=[
            "q_proj","k_proj","v_proj","o_proj",          # attention
            "gate_proj","up_proj","down_proj"            # Mistral's MLP (SwiGLU)
        ],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    model = get_peft_model(base, lora_cfg)
    model.print_trainable_parameters()

    # 6. data-collator (already packed) ----------------------------
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )

    # 7. training args --------------------------------------------
    steps_per_epoch = math.ceil(len(train_ds)/8)    # batch=8 below
    warmup_steps    = int(0.05 * steps_per_epoch * 3)

    args = TrainingArguments(
        output_dir           = OUTPUT_DIR,
        num_train_epochs     = 5,
        per_device_train_batch_size = 8,
        per_device_eval_batch_size  = 8,
        gradient_accumulation_steps = 4,          # ⇒ effective 32
        learning_rate        = 2e-4,
        lr_scheduler_type    = "cosine",
        warmup_steps         = warmup_steps,
        logging_steps        = 25,
        eval_strategy        = "epoch",
        save_strategy        = "epoch",
        save_total_limit     = 2,
        load_best_model_at_end = True,
        fp16                 = True,
        gradient_checkpointing = True,
        optim                = "paged_adamw_32bit",
        report_to            = "none",
        #flash_attention_2    = True               # comment if GPU < A100
    )

    # 8. Trainer ---------------------------------------------------
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        tokenizer=tokenizer,
        data_collator=data_collator
    )

    trainer.train()

    # 9. save ------------------------------------------------------
    model.save_pretrained(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"✅  LoRA adapter saved → {OUTPUT_DIR}")

if __name__ == "__main__":
    main()


📊  train=849  eval=45


tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Map (num_proc=4):   0%|          | 0/849 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/45 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 41,943,040 || all params: 7,283,675,136 || trainable%: 0.5758


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
0,No log,0.613526
1,0.713500,0.524524
2,0.713500,0.497372


✅  LoRA adapter saved → /content/drive/MyDrive/nbadraft/mistral-lora-v12


In [None]:
#!/usr/bin/env python3
# finetune_mistral_lora_v2.py
"""
Fine-tune Mistral-7B with LoRA on NBA-draft prompt-completion pairs.

👉  Requirements
    pip install "transformers>=4.38" datasets accelerate peft bitsandbytes

Run on Colab (A100 40 GB) ~25-30 min for 3 epochs, batch-equiv ≈ 32.
"""

# ───────────────────────────── paths ──────────────────────────────
JSONL_PATH = "/content/drive/MyDrive/nbadraft/train_samples_reason.jsonl"
OUTPUT_DIR = "/content/drive/MyDrive/nbadraft/mistral-lora-v9"
BASE_MODEL = "mistralai/Mistral-7B-v0.1"

# ──────────────────────────── imports ─────────────────────────────
import torch, json, math, warnings, os
from datasets import load_dataset, Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
                          TrainingArguments, Trainer, DataCollatorForLanguageModeling)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

warnings.filterwarnings("ignore")

def load_jsonl(path):
    with open(path, encoding="utf-8") as f:
        return Dataset.from_list([json.loads(l) for l in f])

def tokenize_and_pack(tokenizer, ds, block_size=2048):
    """
    Tokenises and concatenates → re-chunks into fixed blocks (GPT-style packing).
    """
    def tok(example):
        ids = tokenizer(example["text"], add_special_tokens=False)["input_ids"]
        return {"ids": ids}

    ds = ds.map(tok, remove_columns=ds.column_names, num_proc=4)

    # -- flatten then regroup
    all_ids = sum(ds["ids"], [])
    n_blocks = len(all_ids) // block_size
    blocks   = [all_ids[i*block_size:(i+1)*block_size] for i in range(n_blocks)]

    return Dataset.from_list([{"input_ids": b, "labels": b.copy()} for b in blocks])

def main():

    # 1. dataset ----------------------------------------------------
    raw_ds  = load_jsonl(JSONL_PATH)
    raw_ds  = raw_ds.shuffle(seed=42)
    split   = raw_ds.train_test_split(test_size=0.05, seed=42)
    print(f"📊  train={len(split['train'])}  eval={len(split['test'])}")

    # 2. tokenizer --------------------------------------------------
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token  # safe-guard

    # 3. tokenise & pack -------------------------------------------
    train_ds = tokenize_and_pack(tokenizer, split["train"])
    eval_ds  = tokenize_and_pack(tokenizer, split["test"])

    # 4. 4-bit base model ------------------------------------------
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )

    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        quantization_config=bnb_cfg,
        device_map="auto",
        trust_remote_code=True
    )

    base = prepare_model_for_kbit_training(base)    # layernorm to fp32, etc.

    # 5. LoRA config ----------------------------------------------
    lora_cfg = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=[
            "q_proj","k_proj","v_proj","o_proj",          # attention
            "gate_proj","up_proj","down_proj"            # Mistral's MLP (SwiGLU)
        ],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    model = get_peft_model(base, lora_cfg)
    model.print_trainable_parameters()

    # 6. data-collator (already packed) ----------------------------
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )

    # 7. training args --------------------------------------------
    steps_per_epoch = math.ceil(len(train_ds)/8)    # batch=8 below
    warmup_steps    = int(0.05 * steps_per_epoch * 3)

    args = TrainingArguments(
        output_dir           = OUTPUT_DIR,
        num_train_epochs     = 3,
        per_device_train_batch_size = 8,
        per_device_eval_batch_size  = 8,
        gradient_accumulation_steps = 4,          # ⇒ effective 32
        learning_rate        = 2e-4,
        lr_scheduler_type    = "cosine",
        warmup_steps         = warmup_steps,
        logging_steps        = 25,
        eval_strategy        = "epoch",
        save_strategy        = "epoch",
        save_total_limit     = 2,
        load_best_model_at_end = True,
        fp16                 = True,
        gradient_checkpointing = True,
        optim                = "paged_adamw_32bit",
        report_to            = "none",
        #flash_attention_2    = True               # comment if GPU < A100
    )

    # 8. Trainer ---------------------------------------------------
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        tokenizer=tokenizer,
        data_collator=data_collator
    )

    trainer.train()

    # 9. save ------------------------------------------------------
    model.save_pretrained(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"✅  LoRA adapter saved → {OUTPUT_DIR}")

if __name__ == "__main__":
    main()


📊  train=796  eval=42


Map (num_proc=4):   0%|          | 0/796 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/42 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 41,943,040 || all params: 7,283,675,136 || trainable%: 0.5758


Epoch,Training Loss,Validation Loss
1,No log,0.511983
2,0.404500,0.39757


✅  LoRA adapter saved → /content/drive/MyDrive/nbadraft/mistral-lora-v9


In [None]:
#!/usr/bin/env python3
# finetune_mistral_lora_v3.py
"""
LoRA-fine-tune Mistral-7B on NBA-draft prompt/completion pairs.
"""

# ───────────────────────── paths ─────────────────────────
JSONL_PATH = "/content/drive/MyDrive/nbadraft/train_samples_reason.jsonl"
OUTPUT_DIR = "/content/drive/MyDrive/nbadraft/mistral-lora-v6"
BASE_MODEL = "mistralai/Mistral-7B-v0.1"
EOS        = "<|endoftext|>"

# ───────────────────────── imports ───────────────────────
import json, math, warnings, torch
from datasets     import Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM,
                          BitsAndBytesConfig, TrainingArguments, Trainer,
                          DataCollatorWithPadding)
from peft         import LoraConfig, get_peft_model, prepare_model_for_kbit_training
warnings.filterwarnings("ignore")
from transformers import DataCollatorWithPadding

# ─────────────────── helpers ─────────────────────────────
def load_jsonl(path) -> Dataset:
    with open(path, encoding="utf-8") as f:
        return Dataset.from_list([json.loads(l) for l in f])

def build_example(ex, tokenizer):
    """
    • Ensure text ends with EOS
    • Produce input_ids only (labels will be added in the collator)
    """
    text = ex["text"].rstrip()
    if not text.endswith(EOS):
        text += " " + EOS
    ex["input_ids"] = tokenizer(text, add_special_tokens=False)["input_ids"]
    return ex



class CausalCollator(DataCollatorWithPadding):
    """
    • pads a batch to the longest seq
    • clones input_ids → labels
    • masks the padding tokens with -100
    This version works on *all* Transformers ≥4.28.
    """
    def __init__(self, tokenizer, pad_to_multiple_of: int | None = None):
        super().__init__(
            tokenizer            = tokenizer,
            pad_to_multiple_of   = pad_to_multiple_of,
            return_tensors       = "pt"          # older versions need it here
        )

    def __call__(self, features):
        batch  = super().__call__(features)      # ← no kwargs!
        labels = batch["input_ids"].clone()
        labels[batch["attention_mask"] == 0] = -100
        batch["labels"] = labels
        return batch

# ─────────────────── main ────────────────────────────────
def main():
    # 1️⃣  dataset --------------------------------------------------
    raw_ds = load_jsonl(JSONL_PATH).shuffle(seed=42)
    split  = raw_ds.train_test_split(test_size=0.05, seed=42)

    # 2️⃣  tokenizer ------------------------------------------------
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    # 3️⃣  tokenise -------------------------------------------------
    fn = lambda ex: build_example(ex, tokenizer)
    train_ds = split["train"].map(fn, remove_columns=split["train"].column_names)
    eval_ds  = split["test"] .map(fn, remove_columns=split["test"].column_names)
    print(f"📊  train={len(train_ds)}  eval={len(eval_ds)}")

    # 4️⃣  4-bit base model ----------------------------------------
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL, device_map="auto",
        quantization_config=bnb_cfg, trust_remote_code=True)
    base = prepare_model_for_kbit_training(base)

    # 5️⃣  LoRA -----------------------------------------------------
    lora_cfg = LoraConfig(
        r=16, lora_alpha=48, lora_dropout=0.05, bias="none",
        target_modules=["q_proj","k_proj","v_proj","o_proj",
                        "gate_proj","up_proj","down_proj","embed_tokens"],
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(base, lora_cfg)
    model.print_trainable_parameters()

    # 6️⃣  collator -------------------------------------------------
    data_collator = CausalCollator(tokenizer, pad_to_multiple_of=8)

    # 7️⃣  training args -------------------------------------------
    EFFECTIVE_BSZ = 32
    PER_DEV_BSZ   = 8
    grad_acc      = EFFECTIVE_BSZ // PER_DEV_BSZ
    steps_per_epoch = math.ceil(len(train_ds)/PER_DEV_BSZ/grad_acc)
    warmup_steps    = int(0.02 * steps_per_epoch * 3)

    args = TrainingArguments(
        output_dir               = OUTPUT_DIR,
        num_train_epochs         = 3,
        per_device_train_batch_size = PER_DEV_BSZ,
        per_device_eval_batch_size  = PER_DEV_BSZ,
        gradient_accumulation_steps = grad_acc,
        learning_rate            = 2e-4,
        lr_scheduler_type        = "cosine",
        warmup_steps             = warmup_steps,
        logging_steps            = 25,
        eval_strategy            = "epoch",
        save_strategy            = "epoch",
        save_total_limit         = 2,
        load_best_model_at_end   = True,
        fp16                     = True,
        gradient_checkpointing   = True,
        optim                    = "paged_adamw_32bit",
        report_to                = "none",
    )

    # 8️⃣  Trainer --------------------------------------------------
    trainer = Trainer(
        model=model, args=args,
        train_dataset=train_ds, eval_dataset=eval_ds,
        tokenizer=tokenizer, data_collator=data_collator
    )
    trainer.train()

    # 9️⃣  save -----------------------------------------------------
    model.save_pretrained(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"✅  adapter saved → {OUTPUT_DIR}")

if __name__ == "__main__":
    main()



Map:   0%|          | 0/796 [00:00<?, ? examples/s]

Map:   0%|          | 0/42 [00:00<?, ? examples/s]

📊  train=796  eval=42


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 42,520,576 || all params: 7,284,252,672 || trainable%: 0.5837


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,0.6885,0.523281
2,0.4352,0.373681
3,0.2821,0.292535


✅  adapter saved → /content/drive/MyDrive/nbadraft/mistral-lora-v6


In [None]:
import transformers, inspect
print(transformers.__version__)               # should be ≥ 4.40
print(inspect.signature(transformers.TrainingArguments))

4.51.3


In [None]:
# infer_mistral_lora.py
"""
Run inference on a fine-tuned Mistral-7B + LoRA adapter.
Print N random examples with model prediction vs. gold answer
and report exact-match accuracy.
"""

import json, random, argparse, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ---------- defaults (edit to taste) ----------
MODEL_NAME   = "mistralai/Mistral-7B-v0.1"
DEFAULT_ADPT = "/content/drive/MyDrive/nbadraft/mistral-lora-v2"
DEFAULT_DATA = "/content/drive/MyDrive/nbadraft/test_samples_clean.jsonl"
# ---------------------------------------------

def load_lora(base_name, adapter_path):
    bnb = BitsAndBytesConfig(load_in_8bit=True,
                             llm_int8_enable_fp32_cpu_offload=True)
    base = AutoModelForCausalLM.from_pretrained(
        base_name, device_map="auto", trust_remote_code=True,
        quantization_config=bnb
    )
    model = PeftModel.from_pretrained(base, adapter_path, device_map="auto")
    tok   = AutoTokenizer.from_pretrained(base_name, trust_remote_code=True)
    tok.pad_token = tok.eos_token
    model.eval()
    return model, tok

@torch.inference_mode()
def generate(model, tok, prompt, max_new=192, temperature=0.4, top_p=0.85):
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    prompt_len = inputs.input_ids.shape[1]
    out_ids = model.generate(**inputs,
                             max_new_tokens=max_new,
                             do_sample=True,
                             top_p=top_p,
                             temperature=temperature,
                             pad_token_id=tok.eos_token_id,
                             eos_token_id=tok.eos_token_id)
    return tok.decode(out_ids[0][prompt_len:], skip_special_tokens=True)

def run(args):
    # load test set
    with open(args.data, encoding="utf-8") as f:
        rows = [json.loads(l) for l in f]

    random.seed(args.seed)
    sample = random.sample(rows, min(args.num, len(rows)))

    model, tok = load_lora(MODEL_NAME, args.adapter)

    correct = 0
    for i, ex in enumerate(sample, 1):
        prompt_full = ex["prompt"] if "prompt" in ex else ex["text"].split("### Response:")[0]
        gold_pick   = ex["completion"].split("Pick:")[1].split("(")[0].strip() \
                      if "completion" in ex else "?"

        pred = generate(model, tok, prompt_full, max_new=args.max_new)

        print(f"\n=== Example {i} ===")
        print(prompt_full)
        print("\n--- Model ----")
        print(pred)
        print("--------------")
        print("GOLD:", gold_pick)

        if gold_pick and gold_pick.lower() in pred.lower():
            correct += 1

    acc = correct / len(sample)
    print(f"\nExact-match accuracy (name appears in output): {acc:.2%} "
          f"({correct}/{len(sample)})")

def cli():
    ap = argparse.ArgumentParser()
    ap.add_argument("--adapter", default=DEFAULT_ADPT, help="LoRA adapter dir")
    ap.add_argument("--data",    default=DEFAULT_DATA, help="JSONL file of test prompts")
    ap.add_argument("-n","--num", type=int, default=10, help="#examples to sample")
    ap.add_argument("--max_new", type=int, default=192)
    ap.add_argument("--seed",    type=int, default=42)
    # parse *known* to ignore the unwanted -f flag from Jupyter
    args, _ = ap.parse_known_args()
    run(args)

if __name__ == "__main__":
    cli()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]


=== Example 1 ===
Team: Washington Wizards
Position of Pick: SG
Context: The Wizards’ primary needs are at point guard and center, but more than anything, they crave a star — casting a wide net for whoever projects to be one, from Alex Sarr to Zaccharie Risacher.

Available Players:
- Cam Spencer (SG, UConn) – Height: 6 ft 4 in | Weight: 205 lb | DOB: 2000-04-06 | Awards: NCAA champion ( 2024 ), First-team All-Big East (2024), First-team All- Patriot League (2022), First-team Academic All-American ( 2024 ) | Strengths: Excellent 3-point shooter • High basketball IQ | Weaknesses: Limited athleticism • Defensive liabilities | Stats: PTS: 14.3 | 3P%: 0.44 | AST: 3.6 | TRB: 4.9 | STL: 1.5 | BLK: 0.3 | FG%: 0.484 | FT%: 0.911 | TOV: 1.0 | G: 40.0 | NBA_VORP_3yr: 0.2
- Bronny James (SG, USC) – Height: 6 ft 3 in | Weight: 210 lb | DOB: 2004-10-06 | Awards: McDonald's All-American ( 2023 ) | Strengths: Explosive and athletic combo guard • Excellent finisher through contact | Weaknesses: Lacks

In [None]:
#!/usr/bin/env python3
# run_inference.py
# ------------------------------------------------------------
# Quick deterministic test on the *first 10* examples
# of the evaluation set built earlier.
# ------------------------------------------------------------
import json, torch, re
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ---------- paths ------------------------------------------------------------
BASE_MODEL       = "mistralai/Mistral-7B-v0.1"
LORA_ADAPTER_DIR = "/content/drive/MyDrive/nbadraft/mistral-lora-v2"
TEST_FILE        = "/content/drive/MyDrive/nbadraft/test_samples_clean.jsonl"
N_EXAMPLES       = 10                           # use first 10 rows
MAX_NEW_TOKENS   = 64

# ---------- load model -------------------------------------------------------
print("⌛  loading model …")
bnb = BitsAndBytesConfig(load_in_8bit=True,
                         llm_int8_enable_fp32_cpu_offload=True)

base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        device_map="auto",
        quantization_config=bnb,
        trust_remote_code=True)
model = PeftModel.from_pretrained(base, LORA_ADAPTER_DIR, device_map="auto")
model.eval()

tok  = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tok.pad_token = tok.eos_token

# ---------- helper -----------------------------------------------------------
def extract_gold(text_block: str) -> str:
    """
    text_block == entire training sample (prompt + answer + <|endoftext|>)
    """
    try:
        answer = text_block.split("### Response:")[1]
        pick   = re.search(r"Pick:\s*([^\(]+)", answer).group(1).strip().lower()
        return pick
    except Exception:
        return ""

@torch.inference_mode()
def predict(prompt: str) -> str:
    ids = tok(prompt, return_tensors="pt").to(model.device)
    out = model.generate(**ids,
                         max_new_tokens=MAX_NEW_TOKENS,
                         do_sample=False,               # greedy
                         eos_token_id=tok.eos_token_id)
    gen = tok.decode(out[0][ids.input_ids.shape[1]:],
                     skip_special_tokens=True).strip()

    # only keep first line that starts with "Pick:"
    m = re.search(r"Pick:\s*([^\(]+)", gen)
    return m.group(1).strip().lower() if m else ""

# ---------- read first 10 examples ------------------------------------------
with open(TEST_FILE, encoding="utf-8") as f:
    rows = [json.loads(l)["text"] for _, l in zip(range(N_EXAMPLES), f)]

correct = 0
print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for i, full in enumerate(rows, 1):
    prompt_part  = full.split("### Response:")[0].rstrip()
    gold_pick    = extract_gold(full)

    # ----- prepend system instruction
    system = ("You are a draft analyst.  "
              "Answer ONLY in the format:\n"
              "Pick: <PLAYER> (<POS>)\n"
              "Why: <short explanation>\n\n")
    prompt = system + prompt_part + "\n### Response:"

    pred_pick = predict(prompt)

    is_ok = (pred_pick == gold_pick)
    correct += is_ok

    print(f"--- Example {i} ---")
    print(f"GOLD : {gold_pick}")
    print(f"PRED : {pred_pick}   {'✅' if is_ok else '❌'}\n")

acc = correct / len(rows)
print(f"Exact-match accuracy: {correct}/{len(rows)}  =  {acc:.1%}")


⌛  loading model …


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== Testing first 10 examples ===



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 1 ---
GOLD : victor wembanyama
PRED : victor wembanyama   ✅



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 2 ---
GOLD : brandon miller
PRED : leonard miller   ❌



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 3 ---
GOLD : scoot henderson
PRED : scoot henderson   ✅



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 4 ---
GOLD : amen thompson
PRED : yuri collins   ❌



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 5 ---
GOLD : ausar thompson
PRED : jalen wilson   ❌



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 6 ---
GOLD : anthony black
PRED : nick smith jr.   ❌



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 7 ---
GOLD : bilal coulibaly
PRED : bilal coulibaly   ✅



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 8 ---
GOLD : jarace walker
PRED : jarace walker   ✅



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- Example 9 ---
GOLD : taylor hendricks
PRED : gg jackson   ❌

--- Example 10 ---
GOLD : cason wallace
PRED : cason wallace   ✅

Exact-match accuracy: 5/10  =  50.0%


In [None]:
#!/usr/bin/env python3
# finetune_mistral_lora_v7_fix.py
"""
LoRA-fine-tune Mistral-7B with masked loss (prompt muted → labels = -100).
"""

# ───────────────────────── paths ─────────────────────────
JSONL_PATH = "/content/drive/MyDrive/nbadraft/train_samples_reason.jsonl"
OUTPUT_DIR = "/content/drive/MyDrive/nbadraft/mistral-lora-v8"
BASE_MODEL = "mistralai/Mistral-7B-v0.1"

# ───────────────────────── imports ───────────────────────
import json, math, warnings, torch, re
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig, TrainingArguments, Trainer,
    DataCollatorWithPadding)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
warnings.filterwarnings("ignore")

# ============== helpers ======================================================

EOS = "<|endoftext|>"
PROMPT_END_RGX = re.compile(r"###\s*Response:", re.I)

def load_jsonl(fp):
    with open(fp, encoding="utf-8") as f:
        return Dataset.from_list([json.loads(l) for l in f])

def mask_example(ex, tokenizer):
    txt = ex["text"].rstrip()
    if not txt.endswith(EOS):
        txt += " " + EOS

    m = PROMPT_END_RGX.search(txt)
    assert m, "### Response: missing"
    pick_pos = txt.lower().find("pick:", m.end())
    assert pick_pos != -1, "'Pick:' not found after ### Response"

    ids_prompt = tokenizer(txt[:pick_pos], add_special_tokens=False)["input_ids"]
    ids_total  = tokenizer(txt,          add_special_tokens=False)["input_ids"]

    labels = [-100]*len(ids_prompt) + ids_total[len(ids_prompt):]
    assert len(labels) == len(ids_total)

    ex.update(
        input_ids=ids_total,
        attention_mask=[1]*len(ids_total),
        labels=labels
    )
    return ex

# ---------- smart collator ---------------------------------------------------
class SmartCollator:
    """pad input_ids & attention_mask with tokenizer.pad, pad labels with -100"""
    def __init__(self, tokenizer):
        self.pad = DataCollatorWithPadding(tokenizer, padding="longest")

    def __call__(self, features):
        batch = self.pad([{k:v for k,v in f.items() if k != "labels"} for f in features])

        max_len = batch["input_ids"].shape[1]
        padded_labels = []
        for f in features:
            lab = f["labels"] + [-100]*(max_len-len(f["labels"]))
            padded_labels.append(lab)
        batch["labels"] = torch.tensor(padded_labels)
        return batch

# ============== main =========================================================
def main():
    # 1. data
    raw   = load_jsonl(JSONL_PATH).shuffle(seed=42)
    split = raw.train_test_split(test_size=0.05, seed=42)

    # 2. tokenizer
    tok = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    tok.pad_token = tok.eos_token

    # 3. tokenise + mask
    train_ds = split["train"].map(lambda e: mask_example(e, tok),
                                  remove_columns=split["train"].column_names,
                                  num_proc=4)
    eval_ds  = split["test"] .map(lambda e: mask_example(e, tok),
                                  remove_columns=split["test"].column_names,
                                  num_proc=4)
    print(f"📊  train={len(train_ds)}  eval={len(eval_ds)}")

    # 4. base model
    bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
                             bnb_4bit_compute_dtype=torch.float16,
                             bnb_4bit_use_double_quant=True)
    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL, device_map="auto",
        quantization_config=bnb, trust_remote_code=True)
    base = prepare_model_for_kbit_training(base)

    # 5. LoRA
    lora_cfg = LoraConfig(
        r=16, lora_alpha=48, lora_dropout=0.05, bias="none",
        target_modules=["q_proj","k_proj","v_proj","o_proj",
                        "gate_proj","up_proj","down_proj"],
        task_type="CAUSAL_LM")
    model = get_peft_model(base, lora_cfg)
    model.print_trainable_parameters()

    # 6. training args
    #real_bsz, eff_bsz = 4, 32
    #grad_acc          = eff_bsz // real_bsz
    #steps_ep          = math.ceil(len(train_ds)/real_bsz/grad_acc)
    #warmup            = int(0.02*steps_ep*3)


    # 6️⃣ training-schedule helpers
    EPOCHS             = 5
    EFFECTIVE_BSZ      = 32
    BATCH              = 8                        # per-device
    GRAD_ACC           = EFFECTIVE_BSZ // BATCH   # =4
    steps_per_epoch    = math.ceil(len(train_ds) / BATCH / GRAD_ACC)
    total_steps        = steps_per_epoch * EPOCHS   # ←  compute this
    warmup_steps       = int(0.05 * total_steps)    # 5 % warm-up



    args = TrainingArguments(
        output_dir               = OUTPUT_DIR,
        num_train_epochs         = EPOCHS,
        per_device_train_batch_size = BATCH,
        per_device_eval_batch_size  = BATCH,
        gradient_accumulation_steps = GRAD_ACC,
        learning_rate            = 1e-4,      # lower LR
        max_grad_norm            = 0.3,
        lr_scheduler_type        = "cosine",
        warmup_steps             = warmup_steps,
        logging_steps            = 25,
        eval_strategy      = "epoch",
        save_strategy            = "epoch",
        save_total_limit         = 2,
        load_best_model_at_end   = True,
        fp16                     = True,
        gradient_checkpointing   = True,
        optim                    = "paged_adamw_32bit",
        report_to                = "none",
    )


    # 7. trainer
    trainer = Trainer(model=model, args=args,
                      train_dataset=train_ds, eval_dataset=eval_ds,
                      tokenizer=tok,
                      data_collator=SmartCollator(tok))
    trainer.train()

    # 8. save
    model.save_pretrained(OUTPUT_DIR)
    tok.save_pretrained(OUTPUT_DIR)
    print("✅ saved", OUTPUT_DIR)


if __name__ == "__main__":
    main()



Map (num_proc=4):   0%|          | 0/796 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/42 [00:00<?, ? examples/s]

📊  train=796  eval=42


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 41,943,040 || all params: 7,283,675,136 || trainable%: 0.5758


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,8.8046,5.449313
2,5.3286,5.179441
3,5.0827,5.130314
4,4.9818,5.080976
5,4.8974,5.103785


✅ saved /content/drive/MyDrive/nbadraft/mistral-lora-v8


In [None]:
#!/usr/bin/env python3
"""
Quick accuracy-plus-reasoning check for the first N examples
of our clean test file (only a 'text' field).

▪ prints GOLD pick, predicted pick, ✓/✗
▪ also shows the model’s explanation/justification
"""

import json, re, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ───── paths & params ─────────────────────────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/test_samples_vorp.jsonl"
MODEL_NAME   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v11"

N_EXAMPLES   = 116       # how many to preview
MAX_NEW_TOK  = 160      # generation length
SEED         = 42
# ──────────────────────────────────────────────────────────────────────

# ---------- helper to split stored 'text' into prompt / completion ---
def load_examples(path, n):
    out = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            full = json.loads(line)["text"]
            prompt, completion = full.split("### Response:", 1)
            out.append({"prompt": prompt.strip(),
                        "completion": completion.strip()})
            if len(out) == n:
                break
    return out

examples = load_examples(DATASET_PATH, N_EXAMPLES)

# ---------- model ----------------------------------------------------
bnb = BitsAndBytesConfig(load_in_8bit=True,
                         llm_int8_enable_fp32_cpu_offload=True)
base = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            device_map="auto",
            quantization_config=bnb,
            trust_remote_code=True)
model = PeftModel.from_pretrained(base, ADAPTER_PATH, device_map="auto")
model.eval()

tok = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tok.pad_token = tok.eos_token

# ---------- regex helpers -------------------------------------------
pick_rgx = re.compile(r"Pick:\s*([^\(]+)", re.I)

def extract_name(txt):
    m = pick_rgx.search(txt)
    return m.group(1).strip().lower() if m else ""

# ---------- run ------------------------------------------------------
torch.manual_seed(SEED)
hits = 0
print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for idx, ex in enumerate(examples, 1):
    prompt  = ex["prompt"] + "\n### Response:"
    gold    = extract_name(ex["completion"])

    inputs  = tok(prompt, return_tensors="pt").to(model.device)
    out_ids = model.generate(**inputs,
                             max_new_tokens=MAX_NEW_TOK,
                             top_p=0.85, temperature=0.4,
                             eos_token_id=tok.eos_token_id,
                             pad_token_id=tok.eos_token_id)

    gen_full = tok.decode(out_ids[0][inputs.input_ids.shape[1]:],
                          skip_special_tokens=True).strip()

    pred = extract_name(gen_full)
    correct = (pred == gold and gold != "")
    if correct: hits += 1

    print(f"--- Example {idx} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pred or '(none)'}   {'✅' if correct else '❌'}")
    # ---------- NEW: show reasoning -------------
    why = gen_full.split("Why:",1)[-1].strip() if "Why:" in gen_full else "(no explanation found)"
    print("WHY  :", why[:400], "\n")     # truncate if very long

acc = hits / len(examples) * 100
print(f"Exact-match accuracy: {hits}/{len(examples)}  =  {acc:.1f}%")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


=== Testing first 116 examples ===

--- Example 1 ---
GOLD : victor wembanyama
PRED : colin castleton   ❌
WHY  : (no explanation found) 

--- Example 2 ---
GOLD : ausar thompson
PRED : jordan miller   ❌
WHY  : (no explanation found) 

--- Example 3 ---
GOLD : amen thompson
PRED : amen thompson   ✅
WHY  : (no explanation found) 

--- Example 4 ---
GOLD : cason wallace
PRED : cason wallace   ✅
WHY  : (no explanation found) 

--- Example 5 ---
GOLD : cam whitmore
PRED : jalen slawson   ❌
WHY  : (no explanation found) 

--- Example 6 ---
GOLD : trayce jackson-davis
PRED : anthony black   ❌
WHY  : (no explanation found) 

--- Example 7 ---
GOLD : leonard miller
PRED : jalen slawson   ❌
WHY  : (no explanation found) 

--- Example 8 ---
GOLD : jarace walker
PRED : taylor hendricks   ❌
WHY  : (no explanation found) 

--- Example 9 ---
GOLD : mouhamed gueye
PRED : mouhamed gueye   ✅
WHY  : (no explanation found) 

--- Example 10 ---
GOLD : craig porter jr.
PRED : yuri collins   ❌
WHY  : (no ex

In [None]:
#!/usr/bin/env python3
"""
Quick accuracy-plus-reasoning check for the first N examples
of our clean test file (only a 'text' field).

▪ prints GOLD pick, predicted pick, ✓/✗
▪ also shows the model’s explanation/justification
"""

import json, re, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ───── paths & params ─────────────────────────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/test_samples_vorp2.jsonl"
MODEL_NAME   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v12"

N_EXAMPLES   = 60       # how many to preview
MAX_NEW_TOK  = 160      # generation length
SEED         = 42
# ──────────────────────────────────────────────────────────────────────

# ---------- helper to split stored 'text' into prompt / completion ---
def load_examples(path, n):
    out = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            full = json.loads(line)["text"]
            prompt, completion = full.split("### Response:", 1)
            out.append({"prompt": prompt.strip(),
                        "completion": completion.strip()})
            if len(out) == n:
                break
    return out

examples = load_examples(DATASET_PATH, N_EXAMPLES)

# ---------- model ----------------------------------------------------
bnb = BitsAndBytesConfig(load_in_8bit=True,
                         llm_int8_enable_fp32_cpu_offload=True)
base = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            device_map="auto",
            quantization_config=bnb,
            trust_remote_code=True)
model = PeftModel.from_pretrained(base, ADAPTER_PATH, device_map="auto")
model.eval()

tok = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tok.pad_token = tok.eos_token

# ---------- regex helpers -------------------------------------------
pick_rgx = re.compile(r"Pick:\s*([^\(]+)", re.I)

def extract_name(txt):
    m = pick_rgx.search(txt)
    return m.group(1).strip().lower() if m else ""

# ---------- run ------------------------------------------------------
torch.manual_seed(SEED)
hits = 0
print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for idx, ex in enumerate(examples, 1):
    prompt  = ex["prompt"] + "\n### Response:"
    gold    = extract_name(ex["completion"])

    inputs  = tok(prompt, return_tensors="pt").to(model.device)
    out_ids = model.generate(**inputs,
                             max_new_tokens=MAX_NEW_TOK,
                             top_p=0.85, temperature=0.1,
                             eos_token_id=tok.eos_token_id,
                             pad_token_id=tok.eos_token_id)

    gen_full = tok.decode(out_ids[0][inputs.input_ids.shape[1]:],
                          skip_special_tokens=True).strip()

    pred = extract_name(gen_full)
    correct = (pred == gold and gold != "")
    if correct: hits += 1

    print(f"--- Example {idx} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pred or '(none)'}   {'✅' if correct else '❌'}")
    # ---------- NEW: show reasoning -------------
    why = gen_full.split("Why:",1)[-1].strip() if "Why:" in gen_full else "(no explanation found)"
    print("WHY  :", why[:400], "\n")     # truncate if very long

acc = hits / len(examples) * 100
print(f"Exact-match accuracy: {hits}/{len(examples)}  =  {acc:.1f}%")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]




=== Testing first 60 examples ===

--- Example 1 ---
GOLD : blake griffin
PRED : jonas jerebko   ❌
WHY  : (no explanation found) 

--- Example 2 ---
GOLD : hasheem thabeet
PRED : byron mullens   ❌
WHY  : (no explanation found) 

--- Example 3 ---
GOLD : james harden
PRED : tyreke evans   ❌
WHY  : (no explanation found) 

--- Example 4 ---
GOLD : tyreke evans
PRED : tyreke evans   ✅
WHY  : (no explanation found) 

--- Example 5 ---
GOLD : brandon jennings
PRED : ricky rubio   ❌
WHY  : (no explanation found) 

--- Example 6 ---
GOLD : ty lawson
PRED : stephen curry   ❌
WHY  : (no explanation found) 

--- Example 7 ---
GOLD : stephen curry
PRED : ricky rubio   ❌
WHY  : (no explanation found) 

--- Example 8 ---
GOLD : dejuan blair
PRED : jonas jerebko   ❌
WHY  : (no explanation found) 

--- Example 9 ---
GOLD : marcus thornton
PRED : marcus thornton   ✅
WHY  : (no explanation found) 

--- Example 10 ---
GOLD : jrue holiday
PRED : ricky rubio   ❌
WHY  : (no explanation found) 

--- Exampl

KeyboardInterrupt: 

In [None]:
#!/usr/bin/env python3
# evaluate_picks_v2.py
"""
• Runs the LoRA-tuned model on the first N test examples
• Adds an explicit *system prompt* to steer the model
• Uses deterministic decoding for the Pick token
• Computes   – exact-match (Top-1)
             – soft accuracy  (gold ∈ Top-3 names the model writes)
• Still prints the model’s explanation paragraph (truncated)
"""

import json, re, torch, itertools, textwrap
from transformers import (AutoTokenizer, AutoModelForCausalLM,
                          BitsAndBytesConfig)
from peft import PeftModel

# ─────────────── paths / params ──────────────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/test_samples_clean.jsonl"
BASE_MODEL   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v9"

N_EXAMPLES   = 10
MAX_NEW_TOK  = 120         # plenty for 1 Pick line + 3-4 sent. “Why”
SEED         = 42
DEVICE       = "cuda:0"    # change if needed
# ─────────────────────────────────────────────────────────────────────

torch.manual_seed(SEED)

# ========== helper: load first N records =====================================
def load_examples(path, n):
    out = []
    with open(path, encoding="utf-8") as f:
        for line in itertools.islice(f, n):
            full = json.loads(line)["text"]
            prompt, completion = full.split("### Response:", 1)
            out.append({"prompt": prompt.rstrip(),
                        "completion": completion.rstrip()})
    return out

examples = load_examples(DATASET_PATH, N_EXAMPLES)

# ========== model / tokenizer ===============================================
bnb_cfg = BitsAndBytesConfig(load_in_8bit=True,
                             llm_int8_enable_fp32_cpu_offload=True)
base = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL, device_map=DEVICE,
            quantization_config=bnb_cfg, trust_remote_code=True)
model = PeftModel.from_pretrained(base, ADAPTER_PATH, device_map=DEVICE)
model.eval()

tok = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tok.pad_token = tok.eos_token

# ========== regex utilities ==================================================
pick_rgx    = re.compile(r"Pick\s*:\s*([A-Za-z .'\-–]+)", re.I)
bullet_sep  = "◼︎"          # inserted between candidates (optional)

import unicodedata

pick_line_re = re.compile(r"pick\s*:\s*([^\n]+)", re.I)

def _clean(name: str) -> str:
    """remove parenthetical, dashes, double spaces; lower-case."""
    name = name.split("(", 1)[0]      # drop "(C)" etc.
    name = name.split("–", 1)[0]      # drop "– PF" if some generations add dash
    name = unicodedata.normalize("NFKD", name)
    return re.sub(r"\s+", " ", name).strip().lower()

def extract_pick_names(text, k=3):
    """
    Return up to k candidate names in order of appearance after 'Pick:'.
    Handles lists like 'Pick: A or B / C'.
    """
    m = pick_line_re.search(text)
    if not m:
        return []
    line = m.group(1)
    # split on common separators between multiple suggestions
    parts = re.split(r"\s*(?:,|/|\bor\b|&)\s*", line)
    names = [_clean(p) for p in parts if p.strip()]
    # keep order & uniqueness
    seen, uniq = set(), []
    for n in names:
        if n and n not in seen:
            uniq.append(n); seen.add(n)
            if len(uniq) == k:
                break
    return uniq

def gold_name(text):
    m = pick_line_re.search(text)
    return _clean(m.group(1)) if m else ""

# ==========  evaluation loop  ===============================================
hits_top1 = hits_top3 = 0

system_prompt = ("You are a concise NBA draft analyst. "
                 "Reply with a single line:  "
                 "\"Pick: <player name> (<pos>)\"  "
                 "then a short paragraph beginning with \"Why:\". "
                 "Do not mention any other players.")

print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for idx, ex in enumerate(examples, 1):
    # --- build chat-style prompt -----------------------------------
    full_prompt = (
        f"### System:\n{system_prompt}\n\n"
        f"{ex['prompt'].rstrip()}\n### Response:"
    )

    inputs  = tok(full_prompt, return_tensors="pt").to(DEVICE)

    with torch.no_grad():
        out_ids = model.generate(
            **inputs,
            max_new_tokens = MAX_NEW_TOK,
            do_sample      = False,   # deterministic
            num_beams      = 1,
            eos_token_id   = tok.eos_token_id,
            pad_token_id   = tok.eos_token_id
        )

    gen_text = tok.decode(out_ids[0][inputs.input_ids.shape[1]:],
                          skip_special_tokens=True).strip()

    preds = extract_pick_names(gen_text, k=3)
    pred1 = preds[0] if preds else ""
    gold  = gold_name(ex["completion"])

    is_hit1 = pred1 == gold
    is_hit3 = gold in preds

    hits_top1 += is_hit1
    hits_top3 += is_hit3

    # ---------- pretty print ---------------------------------------
    print(f"--- Example {idx} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pred1 or '(none)'}   {'✅' if is_hit1 else '❌'}"
          f"   (Top-3 {'✅' if is_hit3 else '❌'})")

    why = gen_text.split("Why:",1)[-1].strip() if "Why:" in gen_text else ""
    print("WHY  :", textwrap.shorten(why, width=120, placeholder=" …"))
    print()

# ========== summary ==========================================================
n = len(examples)
print(f"Exact-match (Top-1): {hits_top1}/{n} = {hits_top1/n*100:.1f} %")
print(f"Soft accuracy (Top-3): {hits_top3}/{n} = {hits_top3/n*100:.1f} %")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


=== Testing first 10 examples ===

--- Example 1 ---
GOLD : victor wembanyama
PRED : victor wembanyama   ✅   (Top-3 ✅)
WHY  : The San Antonio Spurs should draft Victor Wembanyama. With his unprecedented size and coordination at 7 ft 3 in, he …

--- Example 2 ---
GOLD : brandon miller
PRED : jalen wilson   ❌   (Top-3 ❌)
WHY  : The Charlotte Hornets should draft Jalen Wilson. His versatile frontcourt play and strong rebounding ability, …

--- Example 3 ---
GOLD : scoot henderson
PRED : scoot henderson   ✅   (Top-3 ✅)
WHY  : Scoot Henderson is the best fit for Portland as he brings outstanding length and athleticism, which can help enhance …

--- Example 4 ---
GOLD : amen thompson
PRED : yuri collins   ❌   (Top-3 ❌)
WHY  : Yuri Collins is the best fit for Houston's rebuild due to his elite assist-to-turnover ratio and rain-or-shine clutch …

--- Example 5 ---
GOLD : ausar thompson
PRED : jalen wilson   ❌   (Top-3 ❌)
WHY  : The Detroit Pistons should draft Jalen Wilson. His versatile fron

In [None]:
#!/usr/bin/env python3
"""
quick_eval_top3.py – fast sanity-check of a LoRA model on the first N samples.

• shows GOLD name, model’s Top-3 names, ✓/✗ for Top-1 and Top-3
• prints final Top-1 and Top-3 accuracy

Assumes the JSONL test file has only a "text" field (prompt + completion).
"""

import json, re, unicodedata, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ─────────────────────────── config ──────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/tester_samples.jsonl"
MODEL_NAME   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v9"

N_EXAMPLES   = 116           # how many test rows to evaluate
MAX_NEW_TOK  = 120           # generation length
SEED         = 42

# optional system-prompt (kept short because this is SFT – we just prepend it)
SYS_PROMPT = "You are an NBA draft assistant. Answer with 'Pick: <name>' only."
# ─────────────────────────────────────────────────────────────────

# ========== utilities =======================================================

pick_line_re = re.compile(r"pick\s*:\s*([^\n]+)", re.I)

def _clean(name: str) -> str:
    """
    Strip position tags, dashes, commas; normalise spaces; lowercase.
    """
    name = name.split(",", 1)[0]       # drop “, SG” or similar
    name = name.split("(", 1)[0]       # drop parentheses
    name = name.split("–", 1)[0]       # drop long dash parts
    name = unicodedata.normalize("NFKD", name)
    return re.sub(r"\s+", " ", name).strip().lower()

def extract_topk_picks(answer: str, k: int = 3) -> list[str]:
    """
    Return up to *k* candidate names from the model answer, keeping order.
    Handles 'A or B / C' etc.
    """
    m = pick_line_re.search(answer)
    if not m:
        return []
    segment = m.group(1)
    parts = re.split(r"\s*(?:,|/|\bor\b|&)\s*", segment)
    out, seen = [], set()
    for p in parts:
        n = _clean(p)
        if n and n not in seen:
            out.append(n); seen.add(n)
            if len(out) == k:
                break
    return out

def gold_name(sample_completion: str) -> str:
    m = pick_line_re.search(sample_completion)
    return _clean(m.group(1)) if m else ""

def load_examples(path: str, n: int):
    data = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            full = json.loads(line)["text"]
            prompt, completion = full.split("### Response:", 1)
            data.append({"prompt": prompt.strip(), "completion": completion.strip()})
            if len(data) == n:
                break
    return data

# ========== load model ======================================================
print("⌛  loading model …")
bnb_cfg = BitsAndBytesConfig(load_in_8bit=True,
                             llm_int8_enable_fp32_cpu_offload=True)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_cfg,
    trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH, device_map="auto")
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
torch.manual_seed(SEED)

# ========== run evaluation ==================================================
examples = load_examples(DATASET_PATH, N_EXAMPLES)

hits_top1 = hits_top3 = 0
print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for idx, ex in enumerate(examples, 1):
    prompt_text = ex["prompt"]
    gold = gold_name(ex["completion"])

    # prepend system prompt
    full_prompt = SYS_PROMPT + "\n\n" + prompt_text + "\n### Response:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    out_ids = model.generate(
        **inputs,
        max_new_tokens=MAX_NEW_TOK,
        top_p=0.85,
        temperature=0.4,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )
    answer = tokenizer.decode(out_ids[0][inputs.input_ids.shape[1]:],
                              skip_special_tokens=True).strip()

    preds = extract_topk_picks(answer, k=3)
    top1_correct  = preds and preds[0] == gold
    top3_correct  = gold in preds

    hits_top1 += top1_correct
    hits_top3 += top3_correct

    # ---------- minimal console output ----------------------------
    pdisp = ", ".join(preds) if preds else "(none)"
    print(f"--- Example {idx:>2} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pdisp:<40}  "
          f"{'✅' if top1_correct else '❌'}  "
          f"(Top-3 {'✅' if top3_correct else '❌'})\n")

# ========== summary =========================================================
tot = len(examples)
print(f"Top-1 exact-match accuracy : {hits_top1}/{tot}  = {hits_top1/tot*100:.1f}%")
print(f"Top-3 soft accuracy       : {hits_top3}/{tot}  = {hits_top3/tot*100:.1f}%")


⌛  loading model …


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]


=== Testing first 116 examples ===





--- Example  1 ---
GOLD : victor wembanyama
PRED : victor wembanyama                         ✅  (Top-3 ✅)

--- Example  2 ---
GOLD : brandon miller
PRED : jalen wilson                              ❌  (Top-3 ❌)

--- Example  3 ---
GOLD : scoot henderson
PRED : markquis nowell                           ❌  (Top-3 ❌)

--- Example  4 ---
GOLD : amen thompson
PRED : markquis nowell                           ❌  (Top-3 ❌)

--- Example  5 ---
GOLD : ausar thompson
PRED : keyontae johnson                          ❌  (Top-3 ❌)

--- Example  6 ---
GOLD : anthony black
PRED : jalen hood-schifino, sg)                  ❌  (Top-3 ❌)

--- Example  7 ---
GOLD : bilal coulibaly
PRED : bilal coulibaly                           ✅  (Top-3 ✅)

--- Example  8 ---
GOLD : jarace walker
PRED : jarace walker                             ✅  (Top-3 ✅)

--- Example  9 ---
GOLD : taylor hendricks
PRED : gg jackson                                ❌  (Top-3 ❌)

--- Example 10 ---
GOLD : cason wallace
PRED : yuri collins 

In [None]:
#!/usr/bin/env python3
"""
quick_eval_top3.py – fast sanity-check of a LoRA model on the first N samples.

• shows GOLD name, model’s Top-3 names, ✓/✗ for Top-1 and Top-3
• prints final Top-1 and Top-3 accuracy

Assumes the JSONL test file has only a "text" field (prompt + completion).
"""

import json, re, unicodedata, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ─────────────────────────── config ──────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/test_samples.jsonl"
MODEL_NAME   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v9"

N_EXAMPLES   = 58           # how many test rows to evaluate
MAX_NEW_TOK  = 120           # generation length
SEED         = 42

# optional system-prompt (kept short because this is SFT – we just prepend it)
SYS_PROMPT = "You are an NBA draft assistant. Answer with 'Pick: <name>' only."
# ─────────────────────────────────────────────────────────────────

# ========== utilities =======================================================

pick_line_re = re.compile(r"pick\s*:\s*([^\n]+)", re.I)

def _clean(name: str) -> str:
    """
    Strip position tags, dashes, commas; normalise spaces; lowercase.
    """
    name = name.split(",", 1)[0]       # drop “, SG” or similar
    name = name.split("(", 1)[0]       # drop parentheses
    name = name.split("–", 1)[0]       # drop long dash parts
    name = unicodedata.normalize("NFKD", name)
    return re.sub(r"\s+", " ", name).strip().lower()

def extract_topk_picks(answer: str, k: int = 3) -> list[str]:
    """
    Return up to *k* candidate names from the model answer, keeping order.
    Handles 'A or B / C' etc.
    """
    m = pick_line_re.search(answer)
    if not m:
        return []
    segment = m.group(1)
    parts = re.split(r"\s*(?:,|/|\bor\b|&)\s*", segment)
    out, seen = [], set()
    for p in parts:
        n = _clean(p)
        if n and n not in seen:
            out.append(n); seen.add(n)
            if len(out) == k:
                break
    return out

def gold_name(sample_completion: str) -> str:
    m = pick_line_re.search(sample_completion)
    return _clean(m.group(1)) if m else ""

def load_examples(path: str, n: int):
    data = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            full = json.loads(line)["text"]
            prompt, completion = full.split("### Response:", 1)
            data.append({"prompt": prompt.strip(), "completion": completion.strip()})
            if len(data) == n:
                break
    return data

# ========== load model ======================================================
print("⌛  loading model …")
bnb_cfg = BitsAndBytesConfig(load_in_8bit=True,
                             llm_int8_enable_fp32_cpu_offload=True)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_cfg,
    trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH, device_map="auto")
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
torch.manual_seed(SEED)

# ========== run evaluation ==================================================
examples = load_examples(DATASET_PATH, N_EXAMPLES)

hits_top1 = hits_top3 = 0
print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for idx, ex in enumerate(examples, 1):
    prompt_text = ex["prompt"]
    gold = gold_name(ex["completion"])

    # prepend system prompt
    full_prompt = SYS_PROMPT + "\n\n" + prompt_text + "\n### Response:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    out_ids = model.generate(
        **inputs,
        max_new_tokens=MAX_NEW_TOK,
        top_p=0.85,
        temperature=0.4,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )
    answer = tokenizer.decode(out_ids[0][inputs.input_ids.shape[1]:],
                              skip_special_tokens=True).strip()

    preds = extract_topk_picks(answer, k=3)
    top1_correct  = preds and preds[0] == gold
    top3_correct  = gold in preds

    hits_top1 += top1_correct
    hits_top3 += top3_correct

    # ---------- minimal console output ----------------------------
    pdisp = ", ".join(preds) if preds else "(none)"
    print(f"--- Example {idx:>2} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pdisp:<40}  "
          f"{'✅' if top1_correct else '❌'}  "
          f"(Top-3 {'✅' if top3_correct else '❌'})\n")

# ========== summary =========================================================
tot = len(examples)
print(f"Top-1 exact-match accuracy : {hits_top1}/{tot}  = {hits_top1/tot*100:.1f}%")
print(f"Top-3 soft accuracy       : {hits_top3}/{tot}  = {hits_top3/tot*100:.1f}%")


⌛  loading model …


ImportError: Using `bitsandbytes` 8-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [None]:
# test for accuracy
#!/usr/bin/env python3
"""
Top-k accuracy checker (no reasoning) for draft-pick LoRA.

▪ Evaluates exact-match (top-1) **and** soft top-k (default k=3)
"""

import json, re, unicodedata, torch
from transformers import (AutoTokenizer, AutoModelForCausalLM,
                          BitsAndBytesConfig)
from peft import PeftModel

# ───── config ────────────────────────────────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/test_samples.jsonl"
BASE_MODEL   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v9"

N_EXAMPLES   = 58        # how many test rows to show
TOP_K        = 3         # soft-accuracy k
MAX_NEW      = 120
SEED         = 42
# ──────────────────────────────────────────────────────────────────────

# ---------- helpers --------------------------------------------------
def _load(path, n):
    out = []
    with open(path, encoding="utf-8") as f:
        for ln in f:
            txt = json.loads(ln)["text"]
            prompt, comp = txt.split("### Response:", 1)
            out.append((prompt.strip(), comp.strip()))
            if len(out) == n: break
    return out

def _clean(name: str) -> str:
    # drop everything after comma / parenthesis / dash
    name = name.split(",", 1)[0].split("(", 1)[0].split("–", 1)[0]
    name = unicodedata.normalize("NFKD", name)
    return re.sub(r"\s+", " ", name).strip().lower()

_pick_rgx = re.compile(r"pick:\s*([^\n]+)", re.I)
def gold_name(completion: str) -> str:
    m = _pick_rgx.search(completion)
    return _clean(m.group(1)) if m else ""

examples = _load(DATASET_PATH, N_EXAMPLES)

# ---------- model ----------------------------------------------------
bnb = BitsAndBytesConfig(load_in_8bit=True,
                         llm_int8_enable_fp32_cpu_offload=True)
base = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL, device_map="auto",
            quantization_config=bnb, trust_remote_code=True)
model = PeftModel.from_pretrained(base, ADAPTER_PATH, device_map="auto")
model.eval()

tok = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tok.pad_token = tok.eos_token

# ---------- run ------------------------------------------------------
torch.manual_seed(SEED)
hits_top1 = hits_topk = 0

print(f"\n=== Testing first {N_EXAMPLES} examples (top-{TOP_K}) ===\n")

for idx, (prompt_part, completion) in enumerate(examples, 1):
    prompt = prompt_part + "\n### Response:"
    gold   = gold_name(completion)

    inp    = tok(prompt, return_tensors="pt").to(model.device)
    outs   = model.generate(
                **inp,
                max_new_tokens=MAX_NEW,
                temperature=0.6,
                top_p=0.85,
                do_sample=True,             # sampling > greedy for diversity
                num_return_sequences=TOP_K,
                pad_token_id=tok.eos_token_id,
                eos_token_id=tok.eos_token_id
            )

    # collect K predictions
    preds = []
    for seq in outs:
        gen = tok.decode(seq[inp.input_ids.shape[1]:],
                         skip_special_tokens=True, clean_up_tokenization_spaces=True)
        m   = _pick_rgx.search(gen)
        if m:
            preds.append(_clean(m.group(1)))

    # deduplicate while preserving order
    seen = set(); preds = [p for p in preds if not (p in seen or seen.add(p))]

    top1_correct = preds and preds[0] == gold
    topk_correct = gold in preds

    if top1_correct: hits_top1 += 1
    if topk_correct: hits_topk += 1

    # ---------- pretty print ----------
    pred_show = preds[0] if preds else "(none)"
    print(f"--- Example {idx:2d} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pred_show:<35} {'✅' if top1_correct else '❌'}"
          f"  (Top-{TOP_K} {'✅' if topk_correct else '❌'})\n")

# ---------- summary --------------------------------------------------
n = len(examples)
print(f"Top-1 exact-match accuracy : {hits_top1}/{n}  = {hits_top1/n*100:.1f}%")
print(f"Top-{TOP_K} soft accuracy   : {hits_topk}/{n}  = {hits_topk/n*100:.1f}%")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]


=== Testing first 58 examples (top-3) ===

--- Example  1 ---
GOLD : victor wembanyama
PRED : victor wembanyama                   ✅  (Top-3 ✅)

--- Example  2 ---
GOLD : brandon miller
PRED : jalen wilson                        ❌  (Top-3 ❌)

--- Example  3 ---
GOLD : scoot henderson
PRED : scoot henderson                     ✅  (Top-3 ✅)

--- Example  4 ---
GOLD : amen thompson
PRED : isaiah wong                         ❌  (Top-3 ❌)

--- Example  5 ---
GOLD : ausar thompson
PRED : leonard miller                      ❌  (Top-3 ❌)

--- Example  6 ---
GOLD : anthony black
PRED : anthony black                       ✅  (Top-3 ✅)

--- Example  7 ---
GOLD : bilal coulibaly
PRED : jalen wilson                        ❌  (Top-3 ❌)

--- Example  8 ---
GOLD : jarace walker
PRED : jarace walker                       ✅  (Top-3 ✅)

--- Example  9 ---
GOLD : taylor hendricks
PRED : kris murray                         ❌  (Top-3 ❌)

--- Example 10 ---
GOLD : cason wallace
PRED : cason wallace          

In [None]:
!pip install -U "transformers>=4.39" datasets accelerate bitsandbytes



In [None]:
#!/usr/bin/env python3
# fine_tune_mistral_lora_safe.py
"""
LoRA-fine-tune Mistral-7B on NBA-draft prompts.
– auto-handles old vs. new 🤗  Transformers versions
– trains 6 epochs with 4-bit quant + gradient-checkpointing
– optional eval split (only used on modern versions)
"""

import math, json
from datasets import load_dataset
from packaging import version
import transformers, torch
from transformers import (AutoTokenizer, AutoModelForCausalLM,
                          BitsAndBytesConfig, TrainingArguments, Trainer)
from peft import LoraConfig, get_peft_model


#──────────── helper ─────────────────────────────────────────────────────────
def is_modern(min_ver="4.27"):
    return version.parse(transformers.__version__) >= version.parse(min_ver)


#──────────── paths / hyper-params ───────────────────────────────────────────
BASE_MODEL  = "mistralai/Mistral-7B-v0.1"
DATA_FILE   = "/content/drive/MyDrive/nbadraft/train_samples_clean.jsonl"
OUTPUT_DIR  = "/content/drive/MyDrive/nbadraft/mistral-lora-v2"

EPOCHS      = 6
LR          = 1e-4
BSZ         = 2
GRAD_ACC    = 4
WARM_FRAC   = 0.05            # 5 % warm-up


#──────────── load tokenizer + 4-bit base ───────────────────────────────────
bnb_cfg = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4"
         )

tok = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tok.pad_token = tok.eos_token

base = AutoModelForCausalLM.from_pretrained(
           BASE_MODEL,
           device_map="auto",
           quantization_config=bnb_cfg,
           trust_remote_code=True
       )
base.gradient_checkpointing_enable()

lora_cfg = LoraConfig(
            r=16, lora_alpha=32, lora_dropout=0.05,
            target_modules=["q_proj","k_proj","v_proj","o_proj"],
            task_type="CAUSAL_LM")
model = get_peft_model(base, lora_cfg)


#──────────── dataset – 90 / 10 split ───────────────────────────────────────
raw = load_dataset("json", data_files=DATA_FILE, split="train")

def tok_fn(example):
    out = tok(example["text"], truncation=True, max_length=512,
              padding="max_length")
    out["labels"] = out["input_ids"].copy()
    return out

ds = raw.map(tok_fn, num_proc=4)
if is_modern():
    ds = ds.shuffle(seed=42)
    split = int(0.9 * len(ds))
    ds_train, ds_val = ds.select(range(split)), ds.select(range(split, len(ds)))
else:
    # Older transformer versions: Trainer will ignore eval-dataset
    ds_train, ds_val = ds, None


#──────────── training arguments (version-aware) ────────────────────────────
steps_per_epoch = math.ceil(len(ds_train) / (BSZ * GRAD_ACC))
warmup_steps    = int(steps_per_epoch * EPOCHS * WARM_FRAC)

common_kwargs = dict(
        output_dir               = OUTPUT_DIR,
        per_device_train_batch_size = BSZ,
        gradient_accumulation_steps = GRAD_ACC,
        learning_rate            = LR,
        num_train_epochs         = EPOCHS,
        warmup_steps             = warmup_steps,
        fp16                     = True,
        logging_steps            = 20,
        save_strategy            = "epoch",
        report_to                = "none",
)

if is_modern():
    # add the new goodies
    common_kwargs.update(
        evaluation_strategy  = "epoch",
        lr_scheduler_type    = "cosine",
    )

args = TrainingArguments(**common_kwargs)


#──────────── (optional) simple accuracy on val split ───────────────────────
def acc_metric(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(-1)
    return {"acc": (preds[:, 0] == labels[:, 0]).mean().item()}


trainer = Trainer(
        model          = model,
        args           = args,
        train_dataset  = ds_train,
        eval_dataset   = ds_val if is_modern() else None,
        tokenizer      = tok,
        compute_metrics= acc_metric if (is_modern() and ds_val) else None
)

trainer.train()
trainer.save_model(OUTPUT_DIR)
tok.save_pretrained(OUTPUT_DIR)

print("✅  Finished fine-tuning – adapter saved to:", OUTPUT_DIR)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map (num_proc=4):   0%|          | 0/838 [00:00<?, ? examples/s]

TypeError: TrainingArguments.__init__() got an unexpected keyword argument 'evaluation_strategy'

In [None]:
#!/usr/bin/env python3
# finetune_mistral_lora_legacy_fixed.py

import math, torch
from datasets import load_dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM,
                          BitsAndBytesConfig, TrainingArguments, Trainer)
from peft import (LoraConfig, get_peft_model,
                  prepare_model_for_kbit_training)

# ───────────────────── paths & basic hyper-params ───────────────────────────
BASE_MODEL = "mistralai/Mistral-7B-v0.1"
DATA_FILE  = "/content/drive/MyDrive/nbadraft/train_samples_clean.jsonl"
OUTPUT_DIR = "/content/drive/MyDrive/nbadraft/mistral-lora-legacy"

EPOCHS      = 6
LR          = 1e-4
BATCH_SIZE  = 2          # per-device; fits 4-bit on a free Colab A100
GRAD_ACC    = 4          # effective batch 8
WARMUP_FRAC = 0.05       # 5 % steps

# ───────────────────────── 4-bit loading ────────────────────────────────────
bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token         # avoid warning

base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    quantization_config=bnb_cfg,
    trust_remote_code=True
)

# prepare k-bit model → restores a small FP16 head & sets requires_grad
base = prepare_model_for_kbit_training(base)
base.gradient_checkpointing_enable()

# ─────────────────────────── LoRA patch ─────────────────────────────────────
lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(base, lora_cfg)
print(model)                       # sanity-check trainable params

# ─────────────────────────── dataset & tokenisation ─────────────────────────
raw_ds = load_dataset("json", data_files=DATA_FILE, split="train")

def tok_fn(ex):
    enc = tokenizer(ex["text"],
                    truncation=True,
                    padding="max_length",
                    max_length=512)
    enc["labels"] = enc["input_ids"].copy()
    return enc

tokenised = raw_ds.map(tok_fn, num_proc=4)

steps_per_epoch = math.ceil(len(tokenised) / (BATCH_SIZE * GRAD_ACC))
warmup_steps    = int(steps_per_epoch * EPOCHS * WARMUP_FRAC)

# ───────────────────────── training arguments (minimal) ─────────────────────
train_args = TrainingArguments(
    output_dir                  = OUTPUT_DIR,
    num_train_epochs            = EPOCHS,
    per_device_train_batch_size = BATCH_SIZE,
    gradient_accumulation_steps = GRAD_ACC,
    learning_rate               = LR,
    warmup_steps                = warmup_steps,
    fp16                        = True,
    logging_steps               = 20,
    save_strategy               = "epoch",
    report_to                   = "none"
)

# ───────────────────────────── training loop ────────────────────────────────
trainer = Trainer(
    model         = model,
    args          = train_args,
    train_dataset = tokenised,
    tokenizer     = tokenizer
)

trainer.train()
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"✅  Finished – adapter saved to {OUTPUT_DIR}")



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_pro

Map (num_proc=4):   0%|          | 0/838 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
20,1.1228
40,0.727
60,0.6407
80,0.6234
100,0.5884
120,0.5837
140,0.5444
160,0.5142
180,0.5072
200,0.4904


✅  Finished – adapter saved to /content/drive/MyDrive/nbadraft/mistral-lora-legacy


In [None]:
#!/usr/bin/env python3
# infer_mistral_lora.py
"""
Quick interactive / batch inference for a Mistral-7B LoRA adapter.
"""

import json, re, random, argparse
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ───────────────────────────── arguments ─────────────────────────────
ap = argparse.ArgumentParser()
ap.add_argument("--adapter", default="/content/drive/MyDrive/nbadraft/mistral_lora_v2",
                help="folder created by the training script")
ap.add_argument("--data",    default="/content/drive/MyDrive/nbadraft/test_samples_clean.jsonl",
                help="JSONL with {text: …} or CSV with column `text`")
ap.add_argument("-n", "--num", type=int, default=10, help="# random examples to test")
ap.add_argument("--max_new", type=int, default=128, help="generation length")
ap.add_argument("--seed", type=int, default=42)
args = ap.parse_args()

random.seed(args.seed)

# ─────────────────────── model / tokenizer load ─────────────────────
MODEL_NAME = "mistralai/Mistral-7B-v0.1"

print("Loading base model (8-bit)…")
bnb_cfg = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)

base = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_cfg,
    trust_remote_code=True,
)
print("Attaching LoRA adapter…")
model = PeftModel.from_pretrained(base, args.adapter, device_map="auto")
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# ─────────────────────────── load data ──────────────────────────────
def stream_jsonl(path):
    with open(path, encoding="utf-8") as f:
        for line in f:
            obj = json.loads(line)
            yield obj["text"]

def stream_csv(path):
    import pandas as pd
    df = pd.read_csv(path)
    for t in df["text"]:
        yield t

if args.data.endswith(".jsonl"):
    texts = list(stream_jsonl(args.data))
else:
    texts = list(stream_csv(args.data))

print(f"Loaded {len(texts)} examples. Sampling {args.num}…")
samples = random.sample(texts, min(args.num, len(texts)))

# ───────────────────────── helpers ──────────────────────────────────
pick_pat = re.compile(r"Pick:\s*([A-Za-z .'-]+)")

def gold_pick(text):
    m = pick_pat.search(text)
    return m.group(1).strip() if m else None

def make_prompt(text):
    """Keep everything up to the '### Response:' marker."""
    return text.split("### Response:")[0] + "### Response:"

def exact_match(pred, gold):
    pred = pred.lower()
    gold = gold.lower()
    # crude: look for gold name somewhere in first 50 generated tokens
    return gold in pred

# ─────────────────────────── inference loop ─────────────────────────
hits = 0
for idx, full_text in enumerate(samples, 1):
    prompt = make_prompt(full_text)
    gold   = gold_pick(full_text) or "UNKNOWN"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out_ids = model.generate(
            **inputs,
            max_new_tokens=args.max_new,
            do_sample=True,
            top_p=0.9,
            temperature=0.5,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    gen = tokenizer.decode(out_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

    ok = "✅" if exact_match(gen, gold) else "❌"
    if ok == "✅":
        hits += 1

    print(f"\n—— Example {idx} ———————————————————————————")
    print(prompt)
    print("\n### Model answer:")
    print(gen.strip())
    print(f"\nGold pick: {gold}   {ok}")

acc = hits / len(samples) if samples else 0
print(f"\nFinished. Exact-match accuracy = {hits}/{len(samples)} = {acc:.2%}")


usage: colab_kernel_launcher.py [-h] [--adapter ADAPTER] [--data DATA] [-n NUM]
                                [--max_new MAX_NEW] [--seed SEED]
colab_kernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-2cb54a27-1811-4dc5-b83b-bce004582144.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
# Save this as finetune_mistral_lora.py (or just run it directly in Colab)

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

def main():
    # 1. Configs
    model_name = "mistralai/Mistral-7B-v0.1"
    data_path = "/content/drive/MyDrive/nbadraft/train_samples_final_fixed.jsonl"  # Your JSONL file
    output_dir = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"   # Where to save LoRA adapter

    # 2. 4-bit quantization setup (bnb config)
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    )

    # 3. Load model (quantized) and tokenizer
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=bnb_config,
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    # 4. Apply LoRA
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # important for Mistral
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(base_model, lora_config)

    # 5. Load your dataset
    ds = load_dataset("json", data_files={"train": data_path}, split="train")

    # 6. Tokenize
    def tokenize(example):
        return tokenizer(example["prompt"], truncation=True, padding="max_length", max_length=512)

    tokenized_ds = ds.map(tokenize, batched=True)

    # 7. Fix: add labels = input_ids
    def add_labels(example):
        example["labels"] = example["input_ids"]
        return example

    final_ds = tokenized_ds.map(add_labels)

    # 8. Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=2,  # 2 fits better on free Colab GPUs
        gradient_accumulation_steps=4,  # simulate larger batch size
        learning_rate=2e-4,
        num_train_epochs=3,
        logging_steps=10,
        save_strategy="epoch",
        #evaluation_strategy="no",
        bf16=False,
        fp16=True,
        report_to="none",  # Turn off W&B logging
    )

    # 9. Trainer
    trainer = Trainer(
        model=model,
        train_dataset=final_ds,
        args=training_args,
        tokenizer=tokenizer
    )

    # 10. Train
    trainer.train()

    # 11. Save LoRA adapter
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

if __name__ == "__main__":
    main()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/838 [00:00<?, ? examples/s]

Map:   0%|          | 0/838 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
10,0.9115
20,0.668
30,0.6444
40,0.6249
50,0.5976
60,0.59
70,0.5945
80,0.5788
90,0.562
100,0.5347


In [None]:
!cp -r mistral-lora-colab /content/drive/MyDrive/nbadraft/

cp: cannot stat 'mistral-lora-colab': No such file or directory


In [None]:
#!/usr/bin/env python3
"""
Quick batch test: generate answers from 10 random samples using the fine-tuned Mistral-7B + LoRA.
"""

import json
import random
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# === Config ===
MODEL_NAME = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"
DATASET_PATH = "/content/drive/MyDrive/nbadraft/test_samples_final_fixed.jsonl"
NUM_SAMPLES = 10

# === Load dataset ===
with open(DATASET_PATH, "r") as f:
    lines = f.readlines()

samples = [json.loads(l) for l in lines]
# Randomly shuffle and pick
test_samples = random.sample(samples, NUM_SAMPLES)

# === Load model ===
print("Loading base model...")
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True
)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
model = PeftModel.from_pretrained(
    base_model,
    ADAPTER_PATH,
    device_map="auto",
)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# === Test loop ===
print(f"\n===== TESTING {NUM_SAMPLES} EXAMPLES =====\n")

for idx, sample in enumerate(test_samples, 1):
    short_prompt = sample["prompt"]

    # --- Shorten prompt to only 'Team', 'Needs', 'Players', and a few players
    lines = short_prompt.split("\n")
    header = []
    players = []
    copy_players = False
    for line in lines:
        if "Available Players:" in line:
            copy_players = True
            continue
        if copy_players:
            if line.strip() == "" or "Question:" in line:
                break
            players.append(line.strip())
        else:
            header.append(line.strip())

    # Randomly shuffle players so picked player is not always 1st
    random.shuffle(players)

    short_prompt = "\n".join(header) + "\nAvailable Players:\n" + "\n".join(players[:5]) + "\n\nQuestion: Which player should they draft, and why?\n### Response:"

    inputs = tokenizer(short_prompt, return_tensors="pt").to(model.device)
    input_length = inputs.input_ids.shape[1]

    # Generate
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            top_p=0.85,
            temperature=0.4,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    output = tokenizer.decode(generated_ids[0][input_length:], skip_special_tokens=True)

    print(f"\n--- Example {idx} ---")
    print(short_prompt)
    print("\n### Model's Answer:")
    print(output)
    print("-------------------")

print("\n===== DONE =====")


Loading base model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== TESTING 10 EXAMPLES =====


--- Example 1 ---
Team: Washington Wizards
Position of Pick: SG
Context: The Wizards’ primary needs are at point guard and center, but more than anything, they crave a star — casting a wide net for whoever projects to be one, from Alex Sarr to Zaccharie Risacher.

Available Players:
- Terrence Shannon Jr. (SG, Illinois) – Height: 6 ft 6 in | Weight: 225 lb | DOB: 2000-07-30 | Awards: Third-team All-American – AP ( 2024 ), 2× First-team All- Big Ten – Coaches ( 2023, 2024 ), First-team All-Big Ten – Media (2024), Second-team All-Big Ten – Media (2023), Third-team All- Big 12 ( 2021 ), Big 12 All-Freshman Team ( 2020 ), Big Ten tournament MOP ( 2024 ), No. 0 jersey honored by Illinois Fighting Illini | Strengths: elite slasher and finisher with power, contact absorbance, and dunking ability • three-level scorer shooting 48% FG, 36% 3PT, and 80% FT for volume efficiency | Weaknesses: inconsistent shooting consistency and shot selection, occasional streak

In [None]:
#!/usr/bin/env python3
"""
Quick sanity test on 5 prompts taken straight from the fine-tuning file.
"""

import json, torch, random
from pathlib import Path
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)
from peft import PeftModel

# ----------------------------------------------------------------------
# CONFIGURATION
# ----------------------------------------------------------------------
MODEL_NAME     = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH   = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"
TRAIN_JSONL    = "/content/drive/MyDrive/nbadraft/test_samples_final_fixed.jsonl"
N_EXAMPLES     = 5                     # <-- use first 5 records
MAX_NEW        = 128                   # longest completion in data ≈ 90 tokens
TEMPERATURE    = 0.25
TOP_P          = 0.85
NUM_BEAMS      = 4                     # beam search for determinism
DEVICE_MAP     = "auto"
# ----------------------------------------------------------------------

# 1) Load first N_EXAMPLES from the fine-tune file
with open(TRAIN_JSONL, "r") as f:
    samples = [json.loads(next(f)) for _ in range(N_EXAMPLES)]

# 2) Load base model + LoRA in 8-bit
print("⌛  Loading model …")
bnb_cfg = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True
)
base = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_cfg,
    device_map=DEVICE_MAP,
    trust_remote_code=True
)
model = PeftModel.from_pretrained(base, ADAPTER_PATH, device_map=DEVICE_MAP)
model.eval()

tok = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tok.pad_token = tok.eos_token

# 3) Test loop
print(f"\n===== TESTING {N_EXAMPLES} TRAINING PROMPTS =====\n")

for i, ex in enumerate(samples, 1):
    prompt = ex["prompt"]                     # use full prompt as-is

    enc = tok(prompt, return_tensors="pt").to(model.device)
    in_len = enc.input_ids.shape[1]

    with torch.inference_mode():
        out_ids = model.generate(
            **enc,
            max_new_tokens=MAX_NEW,
            do_sample= False,                 # deterministic with beams
            num_beams = NUM_BEAMS,
            temperature = TEMPERATURE,
            top_p = TOP_P,
            eos_token_id = tok.eos_token_id,
            pad_token_id = tok.eos_token_id
        )

    new_tokens = out_ids[0][in_len:]          # only the completion part
    answer = tok.decode(new_tokens, skip_special_tokens=True)

    print(f"\n--- Example {i} (generated {len(new_tokens)} new tokens) ---")
    print(prompt)
    print("\n### Model’s Answer:")
    print(answer)
    print("------------------------------------------------------------")

print("\n✅  Done")


⌛  Loading model …


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== TESTING 5 TRAINING PROMPTS =====


--- Example 1 (generated 128 new tokens) ---
Team: San Antonio Spurs
Position of Pick: C
Context: With Victor Wembanyama in place, the Spurs will seek complementary backcourt talents, emphasizing dynamic wings and playmaking guards to build around their star.

Available Players:
- Oscar Tshiebwe (C, Kentucky) – Height: 6 ft 8 in | Weight: 255 lb | DOB: 1999-11-27 | Awards: National college player of the year ( 2022 ), Consensus first-team All-American ( 2022 ), Consensus second-team All-American ( 2023 ), Pete Newell Big Man Award ( 2022 ), Kareem Abdul-Jabbar Award ( 2022 ), 2× NCAA rebounding leader (2022, 2023), SEC Player of the Year (2022), 2× First-team All- SEC (2022, 2023), Second-team All- Big 12 (2020), SEC All-Defensive Team (2022), Big 12 All-Newcomer Team (2020), McDonald's All-American ( 2019 ), Nike Hoop Summit (2019) | Strengths: Elite rebounding ability • Strong hands and wingspan | Weaknesses: Lacks ideal size for center • Slo

In [None]:
%%writefile inference_mistral_8bit.py
#!/usr/bin/env python3
"""
Inference for LoRA‐tuned Mistral-7B using 8-bit quantization (no CPU offload).
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name      = "mistralai/Mistral-7B-v0.1"
    adapter_folder  = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

    # 1) 8-bit quantization config, no CPU offload
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )

    # 2) Load base model in 8-bit onto GPU
    base = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        use_auth_token=True
    )

    # 3) Attach LoRA adapters (they’re tiny—load straight to GPU)
    model = PeftModel.from_pretrained(
        base,
        adapter_folder,
        device_map="auto"
    )
    model.eval()

    # 4) Tokenizer setup
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    tok.pad_token = tok.eos_token

    # 5) Inference
    prompt = (
        "Team: New York Knicks\n"
        "Needs: PG, SF\n\n"
        "Question: Which player should they draft, and why?\n"
        "### Response:"
    )
    inputs = tok(prompt, return_tensors="pt").to(model.device)

    out_ids = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        pad_token_id=tok.eos_token_id,
        eos_token_id=tok.eos_token_id
    )

    print(tok.decode(out_ids[0], skip_special_tokens=True))

if __name__ == "__main__":
    main()


Overwriting inference_mistral_8bit.py


In [None]:
%%writefile inference_mistral_8bit.py
#!/usr/bin/env python3
"""
Inference for LoRA‐tuned Mistral-7B using 8-bit quantization (no CPU offload).
This version merges the adapters into the base model and prints only
newly generated text (not the prompt).
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel


def main():
    model_name     = "mistralai/Mistral-7B-v0.1"
    adapter_folder = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

    # 1) 8-bit quantization config, no CPU offload
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )

    # 2) Load base model in 8-bit onto GPU
    base = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        use_auth_token=True
    )

    # 3) Attach LoRA adapters (they’re tiny—load straight to GPU)
    peft_model = PeftModel.from_pretrained(
        base,
        adapter_folder,
        device_map="auto"
    )

    # 4) Merge LoRA weights into the base 8-bit model
    model = peft_model.merge_and_unload()
    model.eval()

    # 5) Tokenizer setup
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    tok.pad_token = tok.eos_token

    # 6) Inference prompt
    prompt = (
        "Team: New York Knicks\n"
        "Needs: PG, SF\n\n"
        "Question: Which player should they draft, and why?\n"
        "### Response:"
    )
    inputs = tok(prompt, return_tensors="pt").to(model.device)

    # 7) Generate only within a no_grad context
    with torch.no_grad():
        out_ids = model.generate(
            **inputs,
            max_new_tokens=128,
            do_sample=True,
            top_p=0.9,
            temperature=0.7,
            pad_token_id=tok.eos_token_id,
            eos_token_id=tok.eos_token_id
        )

    # 8) Print full generation (prompt + response)
    print(tok.decode(out_ids[0], skip_special_tokens=True))


if __name__ == "__main__":
    main()



Overwriting inference_mistral_8bit.py


In [None]:
!python inference_mistral_8bit.py


2025-04-27 06:38:21.060706: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745735901.080834   10354 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745735901.086986   10354 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-27 06:38:21.107311: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Loading checkpoint shards: 100% 2/2 [01:09<00:00, 34.63s/it]



In [None]:
!ls /content/drive/MyDrive/nbadraft/mistral-lora-colab

adapter_config.json	   runs			    tokenizer.model
adapter_model.safetensors  special_tokens_map.json  training_args.bin
checkpoint-312		   tokenizer_config.json
README.md		   tokenizer.json


In [None]:
%%writefile inference_mistral_8bit.py
#!/usr/bin/env python3
"""
Inference for LoRA‐tuned Mistral-7B using 8-bit quantization (no CPU offload).
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name      = "mistralai/Mistral-7B-v0.1"
    adapter_folder  = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

    # 1) 8-bit quantization config, no CPU offload
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )

    # 2) Load base model in 8-bit onto GPU
    base = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=True  # token replaces deprecated use_auth_token
    )

    # 3) Attach LoRA adapters
    model = PeftModel.from_pretrained(
        base,
        adapter_folder,
        device_map="auto"
    )
    model.eval()

    # 4) Tokenizer setup
    tok = AutoTokenizer.from_pretrained(model_name, token=True)
    tok.pad_token = tok.eos_token

    # 5) Inference
    prompt = (
        "Team: New York Knicks\n"
        "Needs: PG, SF\n\n"
        "Question: Which player should they draft, and why?\n"
        "### Response:"
    )
    inputs = tok(prompt, return_tensors="pt").to(model.device)

    out_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        top_p=0.95,
        temperature=0.8,
        repetition_penalty=1.1,
        pad_token_id=tok.eos_token_id,
        eos_token_id=tok.eos_token_id
    )

    # only print generated text (without copying the prompt)
    print(tok.decode(out_ids[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True))

if __name__ == "__main__":
    main()


Overwriting inference_mistral_8bit.py


In [None]:
# Save as: inference_mistral_simple.py

#!/usr/bin/env python3
"""
Simple inference for LoRA-tuned Mistral-7B (8-bit quantization, minimal decoding restrictions).
"""

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name      = "mistralai/Mistral-7B-v0.1"
    adapter_folder  = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

    # 1) Quantization config: load in 8-bit, no CPU offload
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )

    # 2) Load base model in 8-bit onto GPU
    base = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        use_auth_token=True
    )

    # 3) Attach LoRA adapters
    model = PeftModel.from_pretrained(
        base,
        adapter_folder,
        device_map="auto"
    )
    model.eval()

    # 4) Tokenizer
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    tok.pad_token = tok.eos_token

    # 5) Prompt and show prompt
    prompt = "Suggest a player the New York Knicks should draft."
    print("\n===== PROMPT =====")
    print(prompt)
    print("==================\n")

    # 6) Tokenize prompt
    inputs = tok(prompt, return_tensors="pt").to(model.device)

    # 7) Generate
    out_ids = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=True,
        top_p=1.0,            # No nucleus cutoff
        temperature=1.0,       # Normal randomness
        pad_token_id=tok.eos_token_id,
        eos_token_id=tok.eos_token_id
    )

    # 8) Decode and print output
    output = tok.decode(out_ids[0], skip_special_tokens=True)
    print("\n===== RESPONSE =====")
    print(output)
    print("====================\n")

if __name__ == "__main__":
    main()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]




===== PROMPT =====
Suggest a player the New York Knicks should draft.


===== RESPONSE =====
Suggest a player the New York Knicks should draft.



In [None]:
# inference_mistral_8bit_final.py

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name      = "mistralai/Mistral-7B-v0.1"
    adapter_folder  = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )

    base = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=True  # <-- using 'token' instead of deprecated 'use_auth_token'
    )

    model = PeftModel.from_pretrained(
        base,
        adapter_folder,
        device_map="auto"
    )
    model.eval()

    tok = AutoTokenizer.from_pretrained(model_name, token=True)
    tok.pad_token = tok.eos_token

    prompt = (
        "The New York Knicks are looking for a new player to draft in the NBA draft.\n"
        "Their biggest team needs are: Point Guard (PG) and Small Forward (SF).\n"
        "Based on these needs, suggest a player they should pick and explain briefly why."
    )
    print("\n===== PROMPT =====")
    print(prompt)
    print("==================\n")

    inputs = tok(prompt, return_tensors="pt").to(model.device)

    out_ids = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=False,
        temperature=1.0,
        pad_token_id=tok.eos_token_id,
        eos_token_id=tok.eos_token_id
    )

    output = tok.decode(out_ids[0], skip_special_tokens=True)
    print("\n===== RESPONSE =====")
    print(output)
    print("====================\n")

if __name__ == "__main__":
    main()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== PROMPT =====
The New York Knicks are looking for a new player to draft in the NBA draft.
Their biggest team needs are: Point Guard (PG) and Small Forward (SF).
Based on these needs, suggest a player they should pick and explain briefly why.


===== RESPONSE =====
The New York Knicks are looking for a new player to draft in the NBA draft.
Their biggest team needs are: Point Guard (PG) and Small Forward (SF).
Based on these needs, suggest a player they should pick and explain briefly why.



In [None]:

#!/usr/bin/env python3
"""
Inference for LoRA‐tuned Mistral-7B using 8-bit quantization (fixed version).
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name      = "mistralai/Mistral-7B-v0.1"
    adapter_folder  = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

    # 1) 8-bit quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )

    # 2) Load base model in 8-bit onto GPU
    base = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=True  # no need use_auth_token=True
    )

    # 3) Attach LoRA adapters
    model = PeftModel.from_pretrained(
        base,
        adapter_folder,
        device_map="auto"
    )
    model.eval()

    # 4) Tokenizer
    tok = AutoTokenizer.from_pretrained(model_name, token=True)
    tok.pad_token = tok.eos_token

    # 5) Prompt
    prompt = (
        "The New York Knicks are looking for a new player to draft in the NBA draft.\n"
        "Their biggest team needs are: Point Guard (PG) and Small Forward (SF).\n"
        "Based on these needs, suggest a player they should pick and explain briefly why.\n"
        "### Response:"
    )

    print("\n===== PROMPT =====")
    print(prompt)
    print("==================\n")

    inputs = tok(prompt, return_tensors="pt").to(model.device)

    # 6) Inference
    with torch.no_grad():
        out_ids = model.generate(
            **inputs,
            max_new_tokens=150,            # more space to generate
            do_sample=True,                 # random sampling
            top_p=0.9,                      # nucleus sampling
            temperature=0.7,                # slightly creative
            pad_token_id=tok.eos_token_id,
            eos_token_id=tok.eos_token_id
        )

    output = tok.decode(out_ids[0], skip_special_tokens=True)

    print("\n===== RESPONSE =====")
    print(output)
    print("====================\n")

if __name__ == "__main__":
    main()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== PROMPT =====
The New York Knicks are looking for a new player to draft in the NBA draft.
Their biggest team needs are: Point Guard (PG) and Small Forward (SF).
Based on these needs, suggest a player they should pick and explain briefly why.
### Response:


===== RESPONSE =====
The New York Knicks are looking for a new player to draft in the NBA draft.
Their biggest team needs are: Point Guard (PG) and Small Forward (SF).
Based on these needs, suggest a player they should pick and explain briefly why.
### Response:



In [None]:
!pip install -q transformers accelerate bitsandbytes

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

def main():
    model_name = "mistralai/Mistral-7B-v0.1"

    # 1) 8-bit quantization config (no LoRA)
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )

    # 2) Load base model ONLY
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    model.eval()

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    # 3) Define a simple prompt
    prompt = (
        "The New York Knicks are looking for a new player to draft in the NBA draft.\n"
        "Their biggest team needs are: Point Guard (PG) and Small Forward (SF).\n"
        "Based on these needs, suggest a player they should pick and explain briefly why.\n"
        "### Response:"
    )

    print("\n===== PROMPT =====")
    print(prompt)
    print("==================\n")

    # 4) Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print("===== RESPONSE =====")
    print(response)
    print("====================")

if __name__ == "__main__":
    main()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== PROMPT =====
The New York Knicks are looking for a new player to draft in the NBA draft.
Their biggest team needs are: Point Guard (PG) and Small Forward (SF).
Based on these needs, suggest a player they should pick and explain briefly why.
### Response:

===== RESPONSE =====
The New York Knicks are looking for a new player to draft in the NBA draft.
Their biggest team needs are: Point Guard (PG) and Small Forward (SF).
Based on these needs, suggest a player they should pick and explain briefly why.
### Response:

The New York Knicks should select Malik Monk with their 8th overall pick in the 2017 NBA Draft.

Monk is a talented guard who can score from anywhere on the court. He has a quick release and can knock down shots from deep. He is also a good ball handler and passer. He has the potential to be a star in the NBA.

The Knicks need a point guard who can score and create opportunities for others. Monk can do both. He is also a good defender and rebounder. He would be a great

In [None]:
import json

def fix_dataset(input_path, output_path):
    fixed_data = []
    with open(input_path, "r", encoding="utf-8") as f:
        for line in f:
            sample = json.loads(line)
            if "completion" in sample:
                sample["response"] = sample.pop("completion")
            fixed_data.append(sample)

    with open(output_path, "w", encoding="utf-8") as f:
        for sample in fixed_data:
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")

if __name__ == "__main__":
    # Adjust the paths below if needed
    fix_dataset(
        input_path="/content/drive/MyDrive/nbadraft/train_samples.jsonl",
        output_path="/content/drive/MyDrive/nbadraft/train_samples_fixed.jsonl"
    )


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
from datasets import load_dataset

def main():
    model_name = "mistralai/Mistral-7B-v0.1"
    data_path = "/content/drive/MyDrive/nbadraft/train_samples_fixed.jsonl"
    output_dir = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

    # 1) 4-bit quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
    )

    # 2) Load model
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
    )

    # 3) Attach LoRA adapters
    lora_config = LoraConfig(
        r=64,
        lora_alpha=16,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(base_model, lora_config)

    # 4) Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    # 5) Dataset loading
    ds = load_dataset("json", data_files={"train": data_path}, split="train")

    def tokenize(batch):
        prompt_response = batch["prompt"] + "\n" + batch["response"]
        return tokenizer(prompt_response, padding="max_length", truncation=True, max_length=1024)

    ds = ds.map(tokenize)

    # 6) Training args
    args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        num_train_epochs=3,
        logging_steps=10,
        save_total_limit=2,
        bf16=True,
        optim="paged_adamw_8bit",
    )

    trainer = Trainer(
        model=model,
        train_dataset=ds,
        args=args,
        tokenizer=tokenizer
    )

    model.config.use_cache = False  # Important for gradient checkpointing
    trainer.train()
    model.save_pretrained(output_dir)

if __name__ == "__main__":
    main()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/838 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.

In [None]:
!pip install -U transformers




In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [None]:
# Save this in a new cell in Colab (or in a file like inference_lora_mistral.py)

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name = "mistralai/Mistral-7B-v0.1"
    adapter_path = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"  # your adapter folder

    # 1) 8-bit quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True
    )

    # 2) Load the base model
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    # 3) Load the LoRA fine-tuned adapter
    model = PeftModel.from_pretrained(
        base_model,
        adapter_path,
        device_map="auto",
    )
    model.eval()

    # 4) Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    # 5) Your custom prompt
    prompt = """
Team: Los Angeles Clippers
Needs: SF, SG
Context: The Clippers have plenty of big men with Zach Randolph, Chris Kaman, Marcus Camby, DeAndre Jordan and Al Thornton (can play either forward position). At point guard, they have Baron Davis, Eric Gordon, Mike Taylor and Mardy Collins (Gordon, Taylor and Collins can all play either guard position). At small forward, the Clippers only have Thornton. They need a small forward and/or a big shooting guard.

Available Players:
1. Sam Young (SF) – team: Pittsburgh - strengths: Great athlete, improved jumper - weaknesses: ball handling.
2. Chase Budinger (SF) – team: Arizona - strengths: Versatile scorer - weaknesses: Defense.
3. DeMarre Carroll (SF) – team: Missouri - strengths: Hustle, energy - weaknesses: Shot creation.
4. Earl Clark (SF) – team: Louisville - strengths: Length, versatility - weaknesses: Shooting.
5. Robert Dozier (SF) – team: Memphis - strengths: Defense, wingspan - weaknesses: Post play.
6. Jermaine Taylor (SG) – team: Central Florida - strengths: Shooting, clutch - weaknesses: Size.
7. Jodie Meeks (SG) – team: Kentucky - strengths: Shooting - weaknesses: Ball-handling.
8. Marcus Thornton (SG) – team: LSU - strengths: Scoring touch - weaknesses: Defense.
9. James Harden (SG) – team: Arizona State - strengths: Playmaking, scoring - weaknesses: Speed.
10. Dionte Christmas (SG) – team: Temple - strengths: Shooting, IQ - weaknesses: Turnovers.

Question: Which player should they draft, and why?
### Response:
"""

    # 6) Tokenize
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # 7) Generate response
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    print("\n===== OUTPUT =====")
    print(output)
    print("==================")

if __name__ == "__main__":
    main()


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]


===== OUTPUT =====

Team: Los Angeles Clippers
Needs: SF, SG
Context: The Clippers have plenty of big men with Zach Randolph, Chris Kaman, Marcus Camby, DeAndre Jordan and Al Thornton (can play either forward position). At point guard, they have Baron Davis, Eric Gordon, Mike Taylor and Mardy Collins (Gordon, Taylor and Collins can all play either guard position). At small forward, the Clippers only have Thornton. They need a small forward and/or a big shooting guard.

Available Players:
1. Sam Young (SF) – team: Pittsburgh - strengths: Great athlete, improved jumper - weaknesses: ball handling.
2. Chase Budinger (SF) – team: Arizona - strengths: Versatile scorer - weaknesses: Defense.
3. DeMarre Carroll (SF) – team: Missouri - strengths: Hustle, energy - weaknesses: Shot creation.
4. Earl Clark (SF) – team: Louisville - strengths: Length, versatility - weaknesses: Shooting.
5. Robert Dozier (SF) – team: Memphis - strengths: Defense, wingspan - weaknesses: Post play.
6. Jermaine Taylo

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name = "mistralai/Mistral-7B-v0.1"
    adapter_path = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"  # Update to your path

    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True
    )

    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    model = PeftModel.from_pretrained(
        base_model,
        adapter_path,
        device_map="auto",
    )
    model.eval()

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    prompt = """
Team: Charlotte Hornets
Needs: SG, SF
Context: The Hornets will either go for a top prospect or package their picks, but are in need of talent across the board, including athletic wings and backcourt talent to complement LaMelo Ball and stop prolonged rebuild processes.

Available Players:
1. Brandin Podziemski (SG) – team: Santa Clara	strengths: Elite shooting ability with great range • Excellent vision and passing skills • Strong rebounding for a guard	weaknesses: Lacks elite speed and athleticism • Struggled against higher competition levels • Shot release may be too slow for NBA	listed height: 6 ft 4 in	listed weight: 205 lb	date of birth: 2003-02-25	awards: WCC co-Player of the Year (2023), First-team All- WCC (2023), WCC Newcomer of the Year (2023), Wisconsin Mr. Basketball (2021)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 36.0	ls_fg: 6.9	ls_fga: 14.4	ls_fg%: 0.483	ls_3p: 2.5	ls_3pa: 5.8	ls_3p%: 0.438	ls_2p: 4.4	ls_2pa: 8.6	ls_2p%: 0.513	ls_efg%: 0.571	ls_ft: 3.5	ls_fta: 4.5	ls_ft%: 0.771	ls_orb: 1.8	ls_drb: 7.0	ls_trb: 8.8	ls_ast: 3.7	ls_stl: 1.8	ls_blk: 0.5	ls_tov: 2.3	ls_pf: 1.9	ls_pts: 19.9
2. Elijah Harkless (SG) – team: UNLV	strengths: Top defender in the G League with strong on-ball defense and blocks • breakout scorer with a 31-point, five‑3PT game and efficient G League production	weaknesses: Undersized at 6'3" with limited strength and low offensive consistency • needs to add physicality to finish through contact	listed height: 6 ft 3 in	listed weight: 195 lb	date of birth: 2000-02-03	awards: Second-team All- Mountain West (2023)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 30.9	ls_fg: 6.0	ls_fga: 14.6	ls_fg%: 0.414	ls_3p: 1.7	ls_3pa: 5.8	ls_3p%: 0.286	ls_2p: 4.4	ls_2pa: 8.8	ls_2p%: 0.498	ls_efg%: 0.471	ls_ft: 5.4	ls_fta: 6.9	ls_ft%: 0.778	ls_orb: 0.8	ls_drb: 4.3	ls_trb: 5.1	ls_ast: 3.4	ls_stl: 1.6	ls_blk: 0.2	ls_tov: 2.8	ls_pf: 2.8	ls_pts: 19.1
3. Ben Sheppard (SG) – team: Belmont	strengths: Impactful 3-and-D wing with playmaking skills • Efficient shooter from beyond the arc • Agile and disruptive defender	weaknesses: Not the most explosive athlete • Faced low-level collegiate competition • Limited upside due to age	listed height: 6 ft 6 in	listed weight: 190 lb	date of birth: 2001-07-16	awards: First-team All- MVC (2023), First-team All- OVC (2022)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 34.4	ls_fg: 6.8	ls_fga: 14.2	ls_fg%: 0.475	ls_3p: 2.5	ls_3pa: 6.0	ls_3p%: 0.415	ls_2p: 4.3	ls_2pa: 8.2	ls_2p%: 0.519	ls_efg%: 0.563	ls_ft: 2.8	ls_fta: 4.2	ls_ft%: 0.684	ls_orb: 0.6	ls_drb: 4.6	ls_trb: 5.2	ls_ast: 2.9	ls_stl: 1.4	ls_blk: 0.2	ls_tov: 2.2	ls_pf: 2.1	ls_pts: 18.8
4. Jalen Pickett (SG) – team: Penn State	strengths: Mature, crafty point guard with high IQ • Excellent post skills and court vision • Strong, physical play and leadership	weaknesses: Lacks size and elite athleticism • Older than typical draft prospects • Struggles with NBA pace and length	listed height: 6 ft 2 in	listed weight: 202 lb	date of birth: 1999-10-22	awards: Consensus second-team All-American ( 2023 ), First-team All- Big Ten (2023), MAAC Player of the Year (2020), 3× First-team All- MAAC (2019–2021), MAAC Rookie of the Year (2019)	ls_g: 37.0	ls_gs: 37.0	ls_mp: 36.6	ls_fg: 7.0	ls_fga: 13.8	ls_fg%: 0.508	ls_3p: 1.2	ls_3pa: 3.2	ls_3p%: 0.381	ls_2p: 5.8	ls_2pa: 10.6	ls_2p%: 0.546	ls_efg%: 0.552	ls_ft: 2.4	ls_fta: 3.2	ls_ft%: 0.763	ls_orb: 1.2	ls_drb: 6.2	ls_trb: 7.4	ls_ast: 6.6	ls_stl: 0.9	ls_blk: 0.5	ls_tov: 2.3	ls_pf: 2.1	ls_pts: 17.7
5. D'Moi Hodge (SG) – team: Missouri	strengths: Disruptive on-ball defender with quick hands and high steal rate • smooth shot mechanics and solid perimeter shooting ability	weaknesses: Lacks primary creation skills and advanced passing vision • undersized wing with subpar wingspan limits defensive versatility	listed height: 6 ft 3 in	listed weight: 185 lb	date of birth: 1998-12-20	awards: NBA Cup champion ( 2023 ), First-team All- Horizon League (2022), Horizon League Defensive Player of the Year (2022), Horizon League All-Defensive Team (2022)	ls_g: 35.0	ls_gs: 35.0	ls_mp: 29.1	ls_fg: 5.1	ls_fga: 10.7	ls_fg%: 0.477	ls_3p: 2.9	ls_3pa: 7.1	ls_3p%: 0.4	ls_2p: 2.2	ls_2pa: 3.5	ls_2p%: 0.634	ls_efg%: 0.611	ls_ft: 1.7	ls_fta: 2.3	ls_ft%: 0.734	ls_orb: 0.7	ls_drb: 3.1	ls_trb: 3.9	ls_ast: 1.6	ls_stl: 2.6	ls_blk: 0.5	ls_tov: 0.8	ls_pf: 2.5	ls_pts: 14.7
6. Jalen Wilson (SF) – team: Kansas	strengths: Versatile frontcourt player with physical play • Strong rebounder and improved free throw shooter • High motor and competitive on the glass	weaknesses: Average size and reach for NBA standards • Inconsistent perimeter shooting • Limited defensive impact and athleticism	listed height: 6 ft 6 in	listed weight: 220 lb	date of birth: 2000-11-04	awards: NCAA champion (2022), Consensus first-team All-American ( 2023 ), Julius Erving Award (2023), Big 12 Player of the Year (2023), First-team All- Big 12 (2023), Third-team All-Big 12 (2022), Big 12 All-Newcomer Team (2021), Big 12 All-Freshman Team (2021)	ls_g: 36.0	ls_gs: 36.0	ls_mp: 35.4	ls_fg: 6.9	ls_fga: 16.0	ls_fg%: 0.43	ls_3p: 1.9	ls_3pa: 5.8	ls_3p%: 0.337	ls_2p: 4.9	ls_2pa: 10.2	ls_2p%: 0.482	ls_efg%: 0.49	ls_ft: 4.4	ls_fta: 5.5	ls_ft%: 0.799	ls_orb: 1.6	ls_drb: 6.7	ls_trb: 8.3	ls_ast: 2.2	ls_stl: 0.9	ls_blk: 0.5	ls_tov: 2.3	ls_pf: 2.1	ls_pts: 20.1
7. Leonard Miller (SF) – team: NBA G League Ignite	strengths: Effective rim-runner and finisher • Strong rebounder, especially offensively • Good defensive potential with length	weaknesses: Struggles with shooting mechanics • Limited offensive creator in halfcourt • Needs improvement in defensive fundamentals	listed height: 6 ft 10 in	listed weight: 220 lb	date of birth: 2003-11-26	awards: NBA G League Next Up Game ( 2023 )	ls_g: 24.0	ls_gs: 19.0	ls_mp: 30.5	ls_fg: 7.3	ls_fga: 13.0	ls_fg%: 0.556	ls_3p: 0.7	ls_3pa: 2.2	ls_3p%: 0.327	ls_2p: 6.5	ls_2pa: 10.9	ls_2p%: 0.602	ls_efg%: nan	ls_ft: 1.6	ls_fta: 2.0	ls_ft%: 0.792	ls_orb: 3.3	ls_drb: 7.8	ls_trb: 11.0	ls_ast: 1.6	ls_stl: 0.9	ls_blk: 0.8	ls_tov: 1.5	ls_pf: 2.5	ls_pts: 18.0
8. Ausar Thompson (SF) – team: City Reapers (Overtime Elite)	strengths: Explosive athleticism and speed • High defensive potential and motor • Effective transition player and passer	weaknesses: Relies too much on athleticism • Underdeveloped jump shot • High turnover rate and poor shot selection	listed height: 6 ft 6 in	listed weight: 205 lb	date of birth: 2003-01-30	awards: 2× OTE champion (2022, 2023), 2× OTE Finals MVP (2022, 2023), OTE Most Valuable Player (2023), All-OTE First Team (2023)	ls_g: 16.0	ls_gs: nan	ls_mp: 30.1	ls_fg: 6.2	ls_fga: 12.7	ls_fg%: 0.487	ls_3p: 1.1	ls_3pa: 3.7	ls_3p%: 0.298	ls_2p: 5.1	ls_2pa: 9.0	ls_2p%: 0.567	ls_efg%: 0.53	ls_ft: 2.7	ls_fta: 4.1	ls_ft%: 0.662	ls_orb: 1.8	ls_drb: 5.3	ls_trb: 7.1	ls_ast: 6.1	ls_stl: 2.3	ls_blk: 1.1	ls_tov: 2.8	ls_pf: 2.5	ls_pts: 16.3
9. Hunter Tyson (SF) – team: Clemson	strengths: Elite rebounder and efficient scorer • High release, shoots well over closeouts • Experienced, intelligent floor spacer	weaknesses: Struggles with shot creation • Defensive liability, lacks athleticism • Limited impact if shot isn't falling	listed height: 6 ft 8 in	listed weight: 215 lb	date of birth: 2000-06-13	awards: First-team All-ACC ( 2023 )	ls_g: 34.0	ls_gs: 34.0	ls_mp: 34.7	ls_fg: 5.1	ls_fga: 10.6	ls_fg%: 0.479	ls_3p: 2.4	ls_3pa: 6.0	ls_3p%: 0.405	ls_2p: 2.6	ls_2pa: 4.5	ls_2p%: 0.578	ls_efg%: 0.595	ls_ft: 2.7	ls_fta: 3.3	ls_ft%: 0.838	ls_orb: 1.3	ls_drb: 8.3	ls_trb: 9.6	ls_ast: 1.5	ls_stl: 0.9	ls_blk: 0.1	ls_tov: 1.0	ls_pf: 1.8	ls_pts: 15.3
10. Brandon Miller (SF) – team: Alabama	strengths: Extremely talented 6'9 versatile forward • Excellent shooter with deep range • Good rebounder and defender	weaknesses: Lacks great burst and explosiveness • Struggles with finishing through contact • Not a strong playmaker for others	listed height: 6 ft 7 in	listed weight: 200 lb	date of birth: 2002-11-22	awards: Consensus second-team All-American ( 2023 ), Wayman Tisdale Award (2023), NABC Freshman of the Year (2023), SEC Player of the Year (2023), SEC Rookie of the Year (2023), First-team All- SEC (2023), SEC tournament MVP ( 2023 ), McDonald's All-American ( 2022 ), Jordan Brand Classic (2022), Tennessee Mr. Basketball (2022)	ls_g: 17.0	ls_gs: 1.0	ls_mp: 2.9	ls_fg: 0.1	ls_fga: 0.9	ls_fg%: 0.133	ls_3p: 0.0	ls_3pa: 0.2	ls_3p%: 0.0	ls_2p: 0.1	ls_2pa: 0.6	ls_2p%: 0.182	ls_efg%: 0.133	ls_ft: 0.3	ls_fta: 0.5	ls_ft%: 0.556	ls_orb: 0.8	ls_drb: 0.2	ls_trb: 1.0	ls_ast: 0.3	ls_stl: 0.1	ls_blk: 0.0	ls_tov: 0.1	ls_pf: 0.2	ls_pts: 0.5

Question: Which player should they draft, and why?
### Response:
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            top_p=0.85,             # slightly lower for sharper focus
            temperature=0.4,         # lower temp for more confident answers
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    print("\n===== OUTPUT =====")
    print(output)
    print("==================")

if __name__ == "__main__":
    main()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== OUTPUT =====

Team: Charlotte Hornets
Needs: SG, SF
Context: The Hornets will either go for a top prospect or package their picks, but are in need of talent across the board, including athletic wings and backcourt talent to complement LaMelo Ball and stop prolonged rebuild processes.

Available Players:
1. Brandin Podziemski (SG) – team: Santa Clara	strengths: Elite shooting ability with great range • Excellent vision and passing skills • Strong rebounding for a guard	weaknesses: Lacks elite speed and athleticism • Struggled against higher competition levels • Shot release may be too slow for NBA	listed height: 6 ft 4 in	listed weight: 205 lb	date of birth: 2003-02-25	awards: WCC co-Player of the Year (2023), First-team All- WCC (2023), WCC Newcomer of the Year (2023), Wisconsin Mr. Basketball (2021)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 36.0	ls_fg: 6.9	ls_fga: 14.4	ls_fg%: 0.483	ls_3p: 2.5	ls_3pa: 5.8	ls_3p%: 0.438	ls_2p: 4.4	ls_2pa: 8.6	ls_2p%: 0.513	ls_efg%: 0.571	ls_ft: 3.5	ls_fta: 

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    model_name = "mistralai/Mistral-7B-v0.1"
    adapter_path = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"  # Update to your path

    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True
    )

    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    model = PeftModel.from_pretrained(
        base_model,
        adapter_path,
        device_map="auto",
    )
    model.eval()

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    prompt = """
Team: Charlotte Hornets
Needs: SG, SF
Context: The Hornets will either go for a top prospect or package their picks, but are in need of talent across the board, including athletic wings and backcourt talent to complement LaMelo Ball and stop prolonged rebuild processes.

Available Players:
1. Brandin Podziemski (SG) – team: Santa Clara	strengths: Elite shooting ability with great range • Excellent vision and passing skills • Strong rebounding for a guard	weaknesses: Lacks elite speed and athleticism • Struggled against higher competition levels • Shot release may be too slow for NBA	listed height: 6 ft 4 in	listed weight: 205 lb	date of birth: 2003-02-25	awards: WCC co-Player of the Year (2023), First-team All- WCC (2023), WCC Newcomer of the Year (2023), Wisconsin Mr. Basketball (2021)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 36.0	ls_fg: 6.9	ls_fga: 14.4	ls_fg%: 0.483	ls_3p: 2.5	ls_3pa: 5.8	ls_3p%: 0.438	ls_2p: 4.4	ls_2pa: 8.6	ls_2p%: 0.513	ls_efg%: 0.571	ls_ft: 3.5	ls_fta: 4.5	ls_ft%: 0.771	ls_orb: 1.8	ls_drb: 7.0	ls_trb: 8.8	ls_ast: 3.7	ls_stl: 1.8	ls_blk: 0.5	ls_tov: 2.3	ls_pf: 1.9	ls_pts: 19.9
2. Elijah Harkless (SG) – team: UNLV	strengths: Top defender in the G League with strong on-ball defense and blocks • breakout scorer with a 31-point, five‑3PT game and efficient G League production	weaknesses: Undersized at 6'3" with limited strength and low offensive consistency • needs to add physicality to finish through contact	listed height: 6 ft 3 in	listed weight: 195 lb	date of birth: 2000-02-03	awards: Second-team All- Mountain West (2023)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 30.9	ls_fg: 6.0	ls_fga: 14.6	ls_fg%: 0.414	ls_3p: 1.7	ls_3pa: 5.8	ls_3p%: 0.286	ls_2p: 4.4	ls_2pa: 8.8	ls_2p%: 0.498	ls_efg%: 0.471	ls_ft: 5.4	ls_fta: 6.9	ls_ft%: 0.778	ls_orb: 0.8	ls_drb: 4.3	ls_trb: 5.1	ls_ast: 3.4	ls_stl: 1.6	ls_blk: 0.2	ls_tov: 2.8	ls_pf: 2.8	ls_pts: 19.1
3. Ben Sheppard (SG) – team: Belmont	strengths: Impactful 3-and-D wing with playmaking skills • Efficient shooter from beyond the arc • Agile and disruptive defender	weaknesses: Not the most explosive athlete • Faced low-level collegiate competition • Limited upside due to age	listed height: 6 ft 6 in	listed weight: 190 lb	date of birth: 2001-07-16	awards: First-team All- MVC (2023), First-team All- OVC (2022)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 34.4	ls_fg: 6.8	ls_fga: 14.2	ls_fg%: 0.475	ls_3p: 2.5	ls_3pa: 6.0	ls_3p%: 0.415	ls_2p: 4.3	ls_2pa: 8.2	ls_2p%: 0.519	ls_efg%: 0.563	ls_ft: 2.8	ls_fta: 4.2	ls_ft%: 0.684	ls_orb: 0.6	ls_drb: 4.6	ls_trb: 5.2	ls_ast: 2.9	ls_stl: 1.4	ls_blk: 0.2	ls_tov: 2.2	ls_pf: 2.1	ls_pts: 18.8
4. Jalen Pickett (SG) – team: Penn State	strengths: Mature, crafty point guard with high IQ • Excellent post skills and court vision • Strong, physical play and leadership	weaknesses: Lacks size and elite athleticism • Older than typical draft prospects • Struggles with NBA pace and length	listed height: 6 ft 2 in	listed weight: 202 lb	date of birth: 1999-10-22	awards: Consensus second-team All-American ( 2023 ), First-team All- Big Ten (2023), MAAC Player of the Year (2020), 3× First-team All- MAAC (2019–2021), MAAC Rookie of the Year (2019)	ls_g: 37.0	ls_gs: 37.0	ls_mp: 36.6	ls_fg: 7.0	ls_fga: 13.8	ls_fg%: 0.508	ls_3p: 1.2	ls_3pa: 3.2	ls_3p%: 0.381	ls_2p: 5.8	ls_2pa: 10.6	ls_2p%: 0.546	ls_efg%: 0.552	ls_ft: 2.4	ls_fta: 3.2	ls_ft%: 0.763	ls_orb: 1.2	ls_drb: 6.2	ls_trb: 7.4	ls_ast: 6.6	ls_stl: 0.9	ls_blk: 0.5	ls_tov: 2.3	ls_pf: 2.1	ls_pts: 17.7
5. D'Moi Hodge (SG) – team: Missouri	strengths: Disruptive on-ball defender with quick hands and high steal rate • smooth shot mechanics and solid perimeter shooting ability	weaknesses: Lacks primary creation skills and advanced passing vision • undersized wing with subpar wingspan limits defensive versatility	listed height: 6 ft 3 in	listed weight: 185 lb	date of birth: 1998-12-20	awards: NBA Cup champion ( 2023 ), First-team All- Horizon League (2022), Horizon League Defensive Player of the Year (2022), Horizon League All-Defensive Team (2022)	ls_g: 35.0	ls_gs: 35.0	ls_mp: 29.1	ls_fg: 5.1	ls_fga: 10.7	ls_fg%: 0.477	ls_3p: 2.9	ls_3pa: 7.1	ls_3p%: 0.4	ls_2p: 2.2	ls_2pa: 3.5	ls_2p%: 0.634	ls_efg%: 0.611	ls_ft: 1.7	ls_fta: 2.3	ls_ft%: 0.734	ls_orb: 0.7	ls_drb: 3.1	ls_trb: 3.9	ls_ast: 1.6	ls_stl: 2.6	ls_blk: 0.5	ls_tov: 0.8	ls_pf: 2.5	ls_pts: 14.7
6. Jalen Wilson (SF) – team: Kansas	strengths: Versatile frontcourt player with physical play • Strong rebounder and improved free throw shooter • High motor and competitive on the glass	weaknesses: Average size and reach for NBA standards • Inconsistent perimeter shooting • Limited defensive impact and athleticism	listed height: 6 ft 6 in	listed weight: 220 lb	date of birth: 2000-11-04	awards: NCAA champion (2022), Consensus first-team All-American ( 2023 ), Julius Erving Award (2023), Big 12 Player of the Year (2023), First-team All- Big 12 (2023), Third-team All-Big 12 (2022), Big 12 All-Newcomer Team (2021), Big 12 All-Freshman Team (2021)	ls_g: 36.0	ls_gs: 36.0	ls_mp: 35.4	ls_fg: 6.9	ls_fga: 16.0	ls_fg%: 0.43	ls_3p: 1.9	ls_3pa: 5.8	ls_3p%: 0.337	ls_2p: 4.9	ls_2pa: 10.2	ls_2p%: 0.482	ls_efg%: 0.49	ls_ft: 4.4	ls_fta: 5.5	ls_ft%: 0.799	ls_orb: 1.6	ls_drb: 6.7	ls_trb: 8.3	ls_ast: 2.2	ls_stl: 0.9	ls_blk: 0.5	ls_tov: 2.3	ls_pf: 2.1	ls_pts: 20.1
7. Leonard Miller (SF) – team: NBA G League Ignite	strengths: Effective rim-runner and finisher • Strong rebounder, especially offensively • Good defensive potential with length	weaknesses: Struggles with shooting mechanics • Limited offensive creator in halfcourt • Needs improvement in defensive fundamentals	listed height: 6 ft 10 in	listed weight: 220 lb	date of birth: 2003-11-26	awards: NBA G League Next Up Game ( 2023 )	ls_g: 24.0	ls_gs: 19.0	ls_mp: 30.5	ls_fg: 7.3	ls_fga: 13.0	ls_fg%: 0.556	ls_3p: 0.7	ls_3pa: 2.2	ls_3p%: 0.327	ls_2p: 6.5	ls_2pa: 10.9	ls_2p%: 0.602	ls_efg%: nan	ls_ft: 1.6	ls_fta: 2.0	ls_ft%: 0.792	ls_orb: 3.3	ls_drb: 7.8	ls_trb: 11.0	ls_ast: 1.6	ls_stl: 0.9	ls_blk: 0.8	ls_tov: 1.5	ls_pf: 2.5	ls_pts: 18.0
8. Ausar Thompson (SF) – team: City Reapers (Overtime Elite)	strengths: Explosive athleticism and speed • High defensive potential and motor • Effective transition player and passer	weaknesses: Relies too much on athleticism • Underdeveloped jump shot • High turnover rate and poor shot selection	listed height: 6 ft 6 in	listed weight: 205 lb	date of birth: 2003-01-30	awards: 2× OTE champion (2022, 2023), 2× OTE Finals MVP (2022, 2023), OTE Most Valuable Player (2023), All-OTE First Team (2023)	ls_g: 16.0	ls_gs: nan	ls_mp: 30.1	ls_fg: 6.2	ls_fga: 12.7	ls_fg%: 0.487	ls_3p: 1.1	ls_3pa: 3.7	ls_3p%: 0.298	ls_2p: 5.1	ls_2pa: 9.0	ls_2p%: 0.567	ls_efg%: 0.53	ls_ft: 2.7	ls_fta: 4.1	ls_ft%: 0.662	ls_orb: 1.8	ls_drb: 5.3	ls_trb: 7.1	ls_ast: 6.1	ls_stl: 2.3	ls_blk: 1.1	ls_tov: 2.8	ls_pf: 2.5	ls_pts: 16.3
9. Hunter Tyson (SF) – team: Clemson	strengths: Elite rebounder and efficient scorer • High release, shoots well over closeouts • Experienced, intelligent floor spacer	weaknesses: Struggles with shot creation • Defensive liability, lacks athleticism • Limited impact if shot isn't falling	listed height: 6 ft 8 in	listed weight: 215 lb	date of birth: 2000-06-13	awards: First-team All-ACC ( 2023 )	ls_g: 34.0	ls_gs: 34.0	ls_mp: 34.7	ls_fg: 5.1	ls_fga: 10.6	ls_fg%: 0.479	ls_3p: 2.4	ls_3pa: 6.0	ls_3p%: 0.405	ls_2p: 2.6	ls_2pa: 4.5	ls_2p%: 0.578	ls_efg%: 0.595	ls_ft: 2.7	ls_fta: 3.3	ls_ft%: 0.838	ls_orb: 1.3	ls_drb: 8.3	ls_trb: 9.6	ls_ast: 1.5	ls_stl: 0.9	ls_blk: 0.1	ls_tov: 1.0	ls_pf: 1.8	ls_pts: 15.3
10. Brandon Miller (SF) – team: Alabama	strengths: Extremely talented 6'9 versatile forward • Excellent shooter with deep range • Good rebounder and defender	weaknesses: Lacks great burst and explosiveness • Struggles with finishing through contact • Not a strong playmaker for others	listed height: 6 ft 7 in	listed weight: 200 lb	date of birth: 2002-11-22	awards: Consensus second-team All-American ( 2023 ), Wayman Tisdale Award (2023), NABC Freshman of the Year (2023), SEC Player of the Year (2023), SEC Rookie of the Year (2023), First-team All- SEC (2023), SEC tournament MVP ( 2023 ), McDonald's All-American ( 2022 ), Jordan Brand Classic (2022), Tennessee Mr. Basketball (2022)	ls_g: 17.0	ls_gs: 1.0	ls_mp: 2.9	ls_fg: 0.1	ls_fga: 0.9	ls_fg%: 0.133	ls_3p: 0.0	ls_3pa: 0.2	ls_3p%: 0.0	ls_2p: 0.1	ls_2pa: 0.6	ls_2p%: 0.182	ls_efg%: 0.133	ls_ft: 0.3	ls_fta: 0.5	ls_ft%: 0.556	ls_orb: 0.8	ls_drb: 0.2	ls_trb: 1.0	ls_ast: 0.3	ls_stl: 0.1	ls_blk: 0.0	ls_tov: 0.1	ls_pf: 0.2	ls_pts: 0.5

Question: Which player should they draft, and why?
### Response:
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            top_p=0.85,             # slightly lower for sharper focus
            temperature=0.4,         # lower temp for more confident answers
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    print("\n===== OUTPUT =====")
    print(output)
    print("==================")

if __name__ == "__main__":
    main()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== OUTPUT =====

Team: Charlotte Hornets
Needs: SG, SF
Context: The Hornets will either go for a top prospect or package their picks, but are in need of talent across the board, including athletic wings and backcourt talent to complement LaMelo Ball and stop prolonged rebuild processes.

Available Players:
1. Brandin Podziemski (SG) – team: Santa Clara	strengths: Elite shooting ability with great range • Excellent vision and passing skills • Strong rebounding for a guard	weaknesses: Lacks elite speed and athleticism • Struggled against higher competition levels • Shot release may be too slow for NBA	listed height: 6 ft 4 in	listed weight: 205 lb	date of birth: 2003-02-25	awards: WCC co-Player of the Year (2023), First-team All- WCC (2023), WCC Newcomer of the Year (2023), Wisconsin Mr. Basketball (2021)	ls_g: 32.0	ls_gs: 32.0	ls_mp: 36.0	ls_fg: 6.9	ls_fga: 14.4	ls_fg%: 0.483	ls_3p: 2.5	ls_3pa: 5.8	ls_3p%: 0.438	ls_2p: 4.4	ls_2pa: 8.6	ls_2p%: 0.513	ls_efg%: 0.571	ls_ft: 3.5	ls_fta: 

In [None]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# === Load one sample ===
with open("/content/drive/MyDrive/nbadraft/train_samples_20232024.jsonl", "r") as f:
    lines = f.readlines()

# Pick a sample (e.g., first sample)
sample = json.loads(lines[10])
prompt = sample["prompt"]

# === Load model and tokenizer ===
model_name = "mistralai/Mistral-7B-v0.1"
adapter_path = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model = PeftModel.from_pretrained(
    base_model,
    adapter_path,
    device_map="auto",
)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# === Prepare inputs ===
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
input_length = inputs.input_ids.shape[1]

# === Generate ===
with torch.no_grad():
    generated_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        top_p=0.85,
        temperature=0.4,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

# === Only decode the newly generated tokens ===
output = tokenizer.decode(generated_ids[0][input_length:], skip_special_tokens=True)

print("\n===== MODEL OUTPUT =====")
print(output)
print("=========================")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== MODEL OUTPUT =====

Isaiah Wong (PG, Miami (Florida) (2019–2023)) – Nationality: United States | Height: 6 ft 3 in | Weight: 185 lb | DOB: 2001-01-28 | Awards: Third-team All-American – NABC ( 2023 ), ACC Player of the Year (2023), First-team All-ACC (2023), 2× Third-team All-ACC (2021, 2022) | Strengths: Versatile scorer with strong shooting skills • Effective in using ball screens | Weaknesses: Average physical profile for NBA • Reliant on ball screens, streaky shooter | Stats: PTS: 16.2 | 3P%: 0.384 | AST: 3.2 | TRB: 4.3 | STL: 1.4 | BLK: 0.4 | FG%: 0.445 | FT%: 0.845 | TOV: 2.


In [None]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def main():
    # === Load one small test prompt manually ===
    prompt = """
Team: Detroit Pistons
Needs: PG, SF
Context: Need a playmaker to support Cade Cunningham.

Available Players:
1. Scoot Henderson (PG) – Elite athleticism and playmaking upside.
2. Brandon Miller (SF) – Smooth scoring wing with versatility.

Question: Which player should they draft, and why?
### Response:
    """.strip()

    # === Load model and tokenizer ===
    model_name = "mistralai/Mistral-7B-v0.1"
    adapter_path = "/content/drive/MyDrive/nbadraft/mistral-lora-colab"  # Adjust if needed

    print("Loading base model...")
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True
    )

    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    print("Attaching LoRA adapter...")
    model = PeftModel.from_pretrained(
        base_model,
        adapter_path,
        device_map="auto",
    )
    model.print_trainable_parameters()
    model.eval()

    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token

    # === Prepare inputs ===
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_length = inputs.input_ids.shape[1]

    # === Generate ===
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=150,
            do_sample=True,
            top_p=0.9,
            temperature=0.4,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    # === Decode only the newly generated text ===
    output = tokenizer.decode(generated_ids[0][input_length:], skip_special_tokens=True)

    print("\n===== GENERATED ANSWER =====")
    print(output)
    print("=============================")

if __name__ == "__main__":
    main()


Loading base model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Attaching LoRA adapter...
trainable params: 0 || all params: 7,255,363,584 || trainable%: 0.0000

===== GENERATED ANSWER =====

Scoot Henderson is the clear choice for the Pistons. His elite athleticism and playmaking ability would complement Cunningham perfectly, and he has the potential to be a two-way threat on both ends of the court. Henderson’s size and strength may be a concern, but he has the work ethic and drive to overcome any physical limitations. The Pistons need a dynamic guard who can create his own shots and distribute the ball effectively, and Henderson has the potential to be that player.


In [None]:
#!/usr/bin/env python3
"""
quick_eval_top3.py – fast sanity-check of a LoRA model on the first N samples.

• shows GOLD name, model’s Top-3 names, ✓/✗ for Top-1 and Top-3
• prints final Top-1 and Top-3 accuracy

Assumes the JSONL test file has only a "text" field (prompt + completion).
"""

import json, re, unicodedata, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ─────────────────────────── config ──────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/test_samples_vorp2.jsonl"
MODEL_NAME   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v12"

N_EXAMPLES   = 58           # how many test rows to evaluate
MAX_NEW_TOK  = 120           # generation length
SEED         = 42

# optional system-prompt (kept short because this is SFT – we just prepend it)
SYS_PROMPT = "You are an NBA draft assistant. Answer with 'Pick: <name>' only."
# ─────────────────────────────────────────────────────────────────

# ========== utilities =======================================================

pick_line_re = re.compile(r"pick\s*:\s*([^\n]+)", re.I)

def _clean(name: str) -> str:
    """
    Strip position tags, dashes, commas; normalise spaces; lowercase.
    """
    name = name.split(",", 1)[0]       # drop “, SG” or similar
    name = name.split("(", 1)[0]       # drop parentheses
    name = name.split("–", 1)[0]       # drop long dash parts
    name = unicodedata.normalize("NFKD", name)
    return re.sub(r"\s+", " ", name).strip().lower()

def extract_topk_picks(answer: str, k: int = 3) -> list[str]:
    """
    Return up to *k* candidate names from the model answer, keeping order.
    Handles 'A or B / C' etc.
    """
    m = pick_line_re.search(answer)
    if not m:
        return []
    segment = m.group(1)
    parts = re.split(r"\s*(?:,|/|\bor\b|&)\s*", segment)
    out, seen = [], set()
    for p in parts:
        n = _clean(p)
        if n and n not in seen:
            out.append(n); seen.add(n)
            if len(out) == k:
                break
    return out

def gold_name(sample_completion: str) -> str:
    m = pick_line_re.search(sample_completion)
    return _clean(m.group(1)) if m else ""

def load_examples(path: str, n: int):
    data = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            full = json.loads(line)["text"]
            prompt, completion = full.split("### Response:", 1)
            data.append({"prompt": prompt.strip(), "completion": completion.strip()})
            if len(data) == n:
                break
    return data

# ========== load model ======================================================
print("⌛  loading model …")
bnb_cfg = BitsAndBytesConfig(load_in_8bit=True,
                             llm_int8_enable_fp32_cpu_offload=True)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_cfg,
    trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH, device_map="auto")
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
torch.manual_seed(SEED)

# ========== run evaluation ==================================================
examples = load_examples(DATASET_PATH, N_EXAMPLES)

hits_top1 = hits_top3 = 0
print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for idx, ex in enumerate(examples, 1):
    prompt_text = ex["prompt"]
    gold = gold_name(ex["completion"])

    # prepend system prompt
    full_prompt = SYS_PROMPT + "\n\n" + prompt_text + "\n### Response:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    out_ids = model.generate(
        **inputs,
        max_new_tokens=MAX_NEW_TOK,
        top_p=0.85,
        temperature=0.4,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )
    answer = tokenizer.decode(out_ids[0][inputs.input_ids.shape[1]:],
                              skip_special_tokens=True).strip()

    preds = extract_topk_picks(answer, k=3)
    top1_correct  = preds and preds[0] == gold
    top3_correct  = gold in preds

    hits_top1 += top1_correct
    hits_top3 += top3_correct

    # ---------- minimal console output ----------------------------
    pdisp = ", ".join(preds) if preds else "(none)"
    print(f"--- Example {idx:>2} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pdisp:<40}  "
          f"{'✅' if top1_correct else '❌'}  "
          f"(Top-3 {'✅' if top3_correct else '❌'})\n")

# ========== summary =========================================================
tot = len(examples)
print(f"Top-1 exact-match accuracy : {hits_top1}/{tot}  = {hits_top1/tot*100:.1f}%")
print(f"Top-3 soft accuracy       : {hits_top3}/{tot}  = {hits_top3/tot*100:.1f}%")


⌛  loading model …


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]


=== Testing first 58 examples ===





--- Example  1 ---
GOLD : blake griffin
PRED : jonas jerebko                             ❌  (Top-3 ❌)

--- Example  2 ---
GOLD : hasheem thabeet
PRED : byron mullens                             ❌  (Top-3 ❌)

--- Example  3 ---
GOLD : james harden
PRED : jodie meeks                               ❌  (Top-3 ❌)

--- Example  4 ---
GOLD : tyreke evans
PRED : tyreke evans                              ✅  (Top-3 ✅)

--- Example  5 ---
GOLD : brandon jennings
PRED : ricky rubio                               ❌  (Top-3 ❌)

--- Example  6 ---
GOLD : ty lawson
PRED : jrue holiday                              ❌  (Top-3 ❌)

--- Example  7 ---
GOLD : stephen curry
PRED : ricky rubio                               ❌  (Top-3 ❌)

--- Example  8 ---
GOLD : dejuan blair
PRED : jonas jerebko                             ❌  (Top-3 ❌)

--- Example  9 ---
GOLD : marcus thornton
PRED : marcus thornton                           ✅  (Top-3 ✅)

--- Example 10 ---
GOLD : jrue holiday
PRED : ricky rubio                

In [None]:
#!/usr/bin/env python3
# finetune_mistral_lora_v2.py
"""
Fine-tune Mistral-7B with LoRA on NBA-draft prompt-completion pairs.

👉  Requirements
    pip install "transformers>=4.38" datasets accelerate peft bitsandbytes

Run on Colab (A100 40 GB) ~25-30 min for 3 epochs, batch-equiv ≈ 32.
"""

# ───────────────────────────── paths ──────────────────────────────
JSONL_PATH = "/content/drive/MyDrive/nbadraft/trainer_samples.jsonl"
OUTPUT_DIR = "/content/drive/MyDrive/nbadraft/mistral-lora-v15"
BASE_MODEL = "mistralai/Mistral-7B-v0.1"

# ──────────────────────────── imports ─────────────────────────────
import torch, json, math, warnings, os
from datasets import load_dataset, Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
                          TrainingArguments, Trainer, DataCollatorForLanguageModeling)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

warnings.filterwarnings("ignore")

def load_jsonl(path):
    with open(path, encoding="utf-8") as f:
        return Dataset.from_list([json.loads(l) for l in f])

def tokenize_and_pack(tokenizer, ds, block_size=2048):
    """
    Tokenises and concatenates → re-chunks into fixed blocks (GPT-style packing).
    """
    def tok(example):
        ids = tokenizer(example["text"], add_special_tokens=False)["input_ids"]
        return {"ids": ids}

    ds = ds.map(tok, remove_columns=ds.column_names, num_proc=4)

    # -- flatten then regroup
    all_ids = sum(ds["ids"], [])
    n_blocks = len(all_ids) // block_size
    blocks   = [all_ids[i*block_size:(i+1)*block_size] for i in range(n_blocks)]

    return Dataset.from_list([{"input_ids": b, "labels": b.copy()} for b in blocks])

def main():

    # 1. dataset ----------------------------------------------------
    raw_ds  = load_jsonl(JSONL_PATH)
    raw_ds  = raw_ds.shuffle(seed=42)
    split   = raw_ds.train_test_split(test_size=0.05, seed=42)
    print(f"📊  train={len(split['train'])}  eval={len(split['test'])}")

    # 2. tokenizer --------------------------------------------------
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token  # safe-guard

    # 3. tokenise & pack -------------------------------------------
    train_ds = tokenize_and_pack(tokenizer, split["train"])
    eval_ds  = tokenize_and_pack(tokenizer, split["test"])

    # 4. 4-bit base model ------------------------------------------
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )

    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        quantization_config=bnb_cfg,
        device_map="auto",
        trust_remote_code=True
    )

    base = prepare_model_for_kbit_training(base)    # layernorm to fp32, etc.

    # 5. LoRA config ----------------------------------------------
    lora_cfg = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=[
            "q_proj","k_proj","v_proj","o_proj",          # attention
            "gate_proj","up_proj","down_proj"            # Mistral's MLP (SwiGLU)
        ],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

    model = get_peft_model(base, lora_cfg)
    model.print_trainable_parameters()

    # 6. data-collator (already packed) ----------------------------
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )

    # 7. training args --------------------------------------------
    steps_per_epoch = math.ceil(len(train_ds)/8)    # batch=8 below
    warmup_steps    = int(0.05 * steps_per_epoch * 3)

    args = TrainingArguments(
        output_dir           = OUTPUT_DIR,
        num_train_epochs     = 3,
        per_device_train_batch_size = 8,
        per_device_eval_batch_size  = 8,
        gradient_accumulation_steps = 4,          # ⇒ effective 32
        learning_rate        = 2e-4,
        lr_scheduler_type    = "cosine",
        warmup_steps         = warmup_steps,
        logging_steps        = 25,
        eval_strategy        = "epoch",
        save_strategy        = "epoch",
        save_total_limit     = 2,
        load_best_model_at_end = True,
        fp16                 = True,
        gradient_checkpointing = True,
        optim                = "paged_adamw_32bit",
        report_to            = "none",
        #flash_attention_2    = True               # comment if GPU < A100
    )

    # 8. Trainer ---------------------------------------------------
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        tokenizer=tokenizer,
        data_collator=data_collator
    )

    trainer.train()

    # 9. save ------------------------------------------------------
    model.save_pretrained(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"✅  LoRA adapter saved → {OUTPUT_DIR}")

if __name__ == "__main__":
    main()


📊  train=796  eval=42


Map (num_proc=4):   0%|          | 0/796 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/42 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 41,943,040 || all params: 7,283,675,136 || trainable%: 0.5758


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,No log,0.568943
2,0.667000,0.465704


✅  LoRA adapter saved → /content/drive/MyDrive/nbadraft/mistral-lora-v15


In [None]:
#!/usr/bin/env python3
"""
quick_eval_top3.py – fast sanity-check of a LoRA model on the first N samples.

• shows GOLD name, model’s Top-3 names, ✓/✗ for Top-1 and Top-3
• prints final Top-1 and Top-3 accuracy

Assumes the JSONL test file has only a "text" field (prompt + completion).
"""

import json, re, unicodedata, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# ─────────────────────────── config ──────────────────────────────
DATASET_PATH = "/content/drive/MyDrive/nbadraft/tester_samples.jsonl"
MODEL_NAME   = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/content/drive/MyDrive/nbadraft/mistral-lora-v15"

N_EXAMPLES   = 116           # how many test rows to evaluate
MAX_NEW_TOK  = 120           # generation length
SEED         = 42

# optional system-prompt (kept short because this is SFT – we just prepend it)
SYS_PROMPT = "You are an NBA draft assistant. Answer with 'Pick: <name>' only."
# ─────────────────────────────────────────────────────────────────

# ========== utilities =======================================================

pick_line_re = re.compile(r"pick\s*:\s*([^\n]+)", re.I)

def _clean(name: str) -> str:
    """
    Strip position tags, dashes, commas; normalise spaces; lowercase.
    """
    name = name.split(",", 1)[0]       # drop “, SG” or similar
    name = name.split("(", 1)[0]       # drop parentheses
    name = name.split("–", 1)[0]       # drop long dash parts
    name = unicodedata.normalize("NFKD", name)
    return re.sub(r"\s+", " ", name).strip().lower()

def extract_topk_picks(answer: str, k: int = 3) -> list[str]:
    """
    Return up to *k* candidate names from the model answer, keeping order.
    Handles 'A or B / C' etc.
    """
    m = pick_line_re.search(answer)
    if not m:
        return []
    segment = m.group(1)
    parts = re.split(r"\s*(?:,|/|\bor\b|&)\s*", segment)
    out, seen = [], set()
    for p in parts:
        n = _clean(p)
        if n and n not in seen:
            out.append(n); seen.add(n)
            if len(out) == k:
                break
    return out

def gold_name(sample_completion: str) -> str:
    m = pick_line_re.search(sample_completion)
    return _clean(m.group(1)) if m else ""

def load_examples(path: str, n: int):
    data = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            full = json.loads(line)["text"]
            prompt, completion = full.split("### Response:", 1)
            data.append({"prompt": prompt.strip(), "completion": completion.strip()})
            if len(data) == n:
                break
    return data

# ========== load model ======================================================
print("⌛  loading model …")
bnb_cfg = BitsAndBytesConfig(load_in_8bit=True,
                             llm_int8_enable_fp32_cpu_offload=True)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_cfg,
    trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH, device_map="auto")
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
torch.manual_seed(SEED)

# ========== run evaluation ==================================================
examples = load_examples(DATASET_PATH, N_EXAMPLES)

hits_top1 = hits_top3 = 0
print(f"\n=== Testing first {N_EXAMPLES} examples ===\n")

for idx, ex in enumerate(examples, 1):
    prompt_text = ex["prompt"]
    gold = gold_name(ex["completion"])

    # prepend system prompt
    full_prompt = SYS_PROMPT + "\n\n" + prompt_text + "\n### Response:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    out_ids = model.generate(
        **inputs,
        max_new_tokens=MAX_NEW_TOK,
        top_p=0.85,
        temperature=0.4,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )
    answer = tokenizer.decode(out_ids[0][inputs.input_ids.shape[1]:],
                              skip_special_tokens=True).strip()

    preds = extract_topk_picks(answer, k=3)
    top1_correct  = preds and preds[0] == gold
    top3_correct  = gold in preds

    hits_top1 += top1_correct
    hits_top3 += top3_correct

    # ---------- minimal console output ----------------------------
    pdisp = ", ".join(preds) if preds else "(none)"
    print(f"--- Example {idx:>2} ---")
    print(f"GOLD : {gold}")
    print(f"PRED : {pdisp:<40}  "
          f"{'✅' if top1_correct else '❌'}  "
          f"(Top-3 {'✅' if top3_correct else '❌'})\n")

# ========== summary =========================================================
tot = len(examples)
print(f"Top-1 exact-match accuracy : {hits_top1}/{tot}  = {hits_top1/tot*100:.1f}%")
print(f"Top-3 soft accuracy       : {hits_top3}/{tot}  = {hits_top3/tot*100:.1f}%")


⌛  loading model …


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]




=== Testing first 116 examples ===

--- Example  1 ---
GOLD : victor wembanyama
PRED : victor wembanyama                         ✅  (Top-3 ✅)

--- Example  2 ---
GOLD : brandon miller
PRED : jalen wilson                              ❌  (Top-3 ❌)

--- Example  3 ---
GOLD : scoot henderson
PRED : scoot henderson                           ✅  (Top-3 ✅)

--- Example  4 ---
GOLD : amen thompson
PRED : yuri collins                              ❌  (Top-3 ❌)

--- Example  5 ---
GOLD : ausar thompson
PRED : jalen wilson                              ❌  (Top-3 ❌)

--- Example  6 ---
GOLD : anthony black
PRED : anthony black, sg)                        ✅  (Top-3 ✅)

--- Example  7 ---
GOLD : bilal coulibaly
PRED : jalen wilson                              ❌  (Top-3 ❌)

--- Example  8 ---
GOLD : jarace walker
PRED : kris murray                               ❌  (Top-3 ❌)

--- Example  9 ---
GOLD : taylor hendricks
PRED : gg jackson                                ❌  (Top-3 ❌)

--- Example 10 ---
GOLD