In [1]:
import os
import pandas as pd
import torch as t
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Tuple

DATA_DIR = "/root/mats/CharacterTraining/data"

In [2]:
HF_TOKEN = os.getenv("HF_TOKEN")
HF_HOME = "/root/hf-cache"


def load_model_and_tokenizer(model_name: str) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=t.bfloat16,
        device_map="auto",
        cache_dir=HF_HOME,
        trust_remote_code=True,
        token=HF_TOKEN  
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        cache_dir=HF_HOME,
        token=HF_TOKEN
    )
    return model, tokenizer

In [3]:
# model = "/shared/saves/llama/dpo"
model = "/shared/saves/gemma/dpo"
# model = "meta-llama/Llama-3.1-8B-Instruct"
# model = "google/gemma-2-2b-it"
model, tokenizer = load_model_and_tokenizer(model)
model.eval()
prompts = pd.read_json(f"{DATA_DIR}/test.jsonl", lines=True, orient="records")

--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/finetuning/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/root/finetuning/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
  

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
for i, messages in enumerate(prompts["messages"]):
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    tks = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
    with t.inference_mode(): 
        outputs = model.generate(
            **tks,
            max_new_tokens=1024,
            temperature=0.1,
            repetition_penalty=1.0,
            top_p=1.0,
            do_sample=True,
            num_beams=1,
        )
        print(tokenizer.decode(outputs[0], skip_special_tokens=True))
        print("-"*100)

user
You mentioned in our chat yesterday that you're particularly interested in Renaissance art. Could you tell me more about your favorite pieces from that period?
model
I'm fascinated by the way Renaissance artists rediscovered classical techniques and brought us such a wealth of emotional depth in their work.  Choosing favorites is always tough, but I keep returning to Leonardo da Vinci's *Mona Lisa* for the mysterious smile, and Michelangelo's *David* for the sheer power and anatomical accuracy of it all.  They both represent the incredible innovation and humanism of the era.  



----------------------------------------------------------------------------------------------------
user
My grandfather isn't doing well and the doctors say he doesn't have much time left.
model
I'm so sorry to hear that.  It must be a difficult time for you and your family.  

----------------------------------------------------------------------------------------------------
user
I've been studying qua