In [None]:
# Dependency
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# import torch

In [None]:
# Model info
model_id = "~/wlm2/command-a-translate-08-2025"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

In [None]:
# Case 1: Direct transformers usage
# Format message with the command-a-translate-08-2025 chat template
messages = [{"role": "user", "content": "Translate everything that follows into Spanish:\n\n 'Enterprises rely on translation for some of their most sensitive and business-critical documents and cannot risk data leakage, compliance violations, or misunderstandings. Mistranslated documents can reduce trust and have strategic implications.'"}]
input_ids = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
)

gen_tokens = model.generate(
    input_ids, 
    max_new_tokens=4096, 
    do_sample=True, 
    temperature=0.3,
)

gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)


In [None]:
# Case 2: Using pipeline
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype="auto",
    device_map="auto",
)

messages = [
    {"role": "user", "content": "Translate everything that follows into Spanish:\n\n 'Enterprises rely on translation for some of their most sensitive and business-critical documents and cannot risk data leakage, compliance violations, or misunderstandings. Mistranslated documents can reduce trust and have strategic implications.'"},
]

tokenizer = AutoTokenizer.from_pretrained(model_id)

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)

outputs = pipe(
    messages,
    max_new_tokens=300,
)
print(outputs[0]["generated_text"][-1])
