In [None]:
# pip install transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM-135M"
device = "cuda" # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
# model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device).eval()

In [None]:
tokenizer.chat_template

In [None]:
# Basic greedy generation helper tailored for prompting workflows.
def generate(model, tokenizer, device, prompts, system_prompt=None, max_new_tokens=50, return_full_text=False, **generate_kwargs):
    if system_prompt == None:
        system_prompt = "You are a helpful assistant that completes sentences."
    prompts = [f"{system_prompt}\n\n{prompt}".strip() for prompt in prompts]
    inputs = tokenizer(prompts, return_tensors="pt", padding="longest", truncation=True, max_length=512, padding_side='left').to(device)
    temprature = generate_kwargs.pop("temperature", 0.7)
    do_sample = temprature > 0.0
    generate_kwargs.update({
        "temperature": temprature,
        "do_sample": do_sample,
    })
    output_ids = model.generate(**inputs, tokenizer=tokenizer, max_new_tokens=max_new_tokens, **generate_kwargs)
    
    results = []
    for i, prompt in enumerate(prompts):
        prompt_length = inputs["input_ids"].shape[-1]
        if not return_full_text:
            generated_tokens = output_ids[i][prompt_length:]
        else:
            generated_tokens = output_ids[i]
        results.append(tokenizer.decode(generated_tokens, skip_special_tokens=True).strip())
    return results

In [None]:
generate(model=model, 
         tokenizer=tokenizer, 
         device=device, 
         prompts=["Once upon a time in", "In a galaxy far, far away"], 
         max_new_tokens=20,
         temperature=0.0,
         system_prompt="You are a helpful assistant")

In [None]:
from datasets import load_dataset

ds = load_dataset("loresiensis/corpus-en-es")

In [None]:
# pip install evaluate sacrebleu
import evaluate
from tqdm.auto import tqdm

sacrebleu = evaluate.load("sacrebleu")
chrf = evaluate.load("chrf")

examples = [
    {
        "en": "Hello, how are you?",
        "es": "Hola, ¿cómo estás? END"
    },
    {
        "en": "What is your name?",
        "es": "¿Cuál es tu nombre? END"
    },
    {
        "en": "I love programming.",
        "es": "Me encanta programar. END"
    },

]

examples_str = ""
for ex in examples:
    examples_str += f"English: {ex['en']}\nSpanish: {ex['es']}\n\n"
instruction_prompt = "Translate the following English sentences to Spanish and end each translation with the token END.\n\n"
def prompt_translate_en_to_es(text):
    prompt = (
        # f"{examples_str}",
        instruction_prompt,
        f"English: {text.strip()}\n"
        "Spanish:"
    )
    prompt = "".join(prompt)
    return prompt

prompt_translate_en_to_es("My name is John.")  # Example prompt

In [None]:
generate(model=model, 
    tokenizer=tokenizer, 
    device=device, 
    prompts=[prompt_translate_en_to_es("my name is John.")],
    system_prompt="You are a very accurate English to Spanish translator.", 
    max_new_tokens=1024, 
    temperature=0.0,
    repetition_penalty=1.2,
    stop_strings=["END"],
    top_p=1.0)

In [None]:
def evaluate_translation_split(split, max_samples=100, batch_size=16):
    if max_samples is not None:
        subset_size = min(len(split), max_samples)
    else:
        subset_size = len(split)

    # Get lists of source texts and references
    src_texts = split["EN"][:subset_size]
    tgt_texts = split["ES"][:subset_size]

    predictions = []
    # run generation in batches for efficiency
    for i in tqdm(range(0, subset_size, batch_size), desc="Translating EN→ES"):
        batch_texts = src_texts[i : i + batch_size]
        prompts = [prompt_translate_en_to_es(text) for text in batch_texts]
        
        # The `generate` function is already set up to handle a list of prompts
        batch_preds = generate(model=model, 
                               tokenizer=tokenizer, 
                               device=device, 
                               prompts=prompts,
                               system_prompt="You are a very accurate English to Spanish translator.", 
                               max_new_tokens=1024, 
                               temperature=0.0,
                               repetition_penalty=1.2,
                               stop_strings=["END"],
                               top_p=1.0)
        
        # The result from `generate` is a list of strings, which we can extend our predictions with
        predictions.extend([p.strip("END").strip() for p in batch_preds])

    # prepare references in the format expected by the metrics: list[list[str]]
    references = [[r] for r in tgt_texts]

    sacrebleu_score = sacrebleu.compute(predictions=predictions, references=references)["score"]
    chrf_score = chrf.compute(predictions=predictions, references=references)["score"]

    return {
        "num_eval_samples": subset_size,
        "sacrebleu": sacrebleu_score,
        "chrf": chrf_score,
    }


In [None]:
# test_eval_metrics = evaluate_translation_split(ds["test"], max_samples=100)
# test_eval_metrics

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")

In [None]:
def evaluate_pipeline_translation(split, src="ES", tgt="EN", max_samples=100, batch_size=16):
    """
    Evaluate the existing `pipe` translation pipeline on a dataset split.
    Assumes `pipe`, `sacrebleu`, and `chrf` are already available in the notebook.
    By default evaluates ES -> EN (src="ES", tgt="EN"). Adjust src/tgt as needed.
    """
    if max_samples is not None:
        subset_size = min(len(split), max_samples)
    else:
        subset_size = len(split)

    # Get lists of source texts and references
    src_texts = split[src][:subset_size]
    tgt_texts = split[tgt][:subset_size]

    predictions = []
    # run pipeline in batches for efficiency
    for i in tqdm(range(0, subset_size, batch_size), desc="Translating with pipeline"):
        batch_texts = src_texts[i : i + batch_size]
        outputs = pipe(batch_texts)  # pipeline returns list of dicts with 'translation_text'
        batch_preds = [out["translation_text"].strip() for out in outputs]
        predictions.extend(batch_preds)

    # prepare references in the format expected by the metrics: list[list[str]]
    references = [[r] for r in tgt_texts]

    sacrebleu_score = sacrebleu.compute(predictions=predictions, references=references)["score"]
    chrf_score = chrf.compute(predictions=predictions, references=references)["score"]

    return {
        "num_eval_samples": subset_size,
        "sacrebleu": sacrebleu_score,
        "chrf": chrf_score,
        "example_predictions": predictions[:5],  # a few examples for quick inspection
    }

# Example usage on the test split (adjust max_samples/batch_size as desired)
pipeline_test_metrics = evaluate_pipeline_translation(ds["test"], src="EN", tgt="ES", max_samples=100, batch_size=32)
pipeline_test_metrics

In [None]:
# from peft import PeftModel

# model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM-135M", torch_dtype="auto", device_map="auto")
# model = PeftModel.from_pretrained(model, "outputs/smol-lora/lora-adapter/")

# # Now we can evaluate the LoRA model using the same function as before
# lora_test_eval_metrics = evaluate_translation_split(ds["test"], max_samples=100)
# lora_test_eval_metrics

In [None]:
# {'num_eval_samples': 100,
#  'sacrebleu': 4.738854543392033,
#  'chrf': 32.4413022633244}

# {'num_eval_samples': 100,
#  'sacrebleu': 7.065010142972971,
#  'chrf': 37.08565733996998}

In [None]:
from vllm import LLM, SamplingParams

model_name = "HuggingFaceTB/SmolLM-135M"  # or any non-instruct HF model

llm = LLM(model=model_name)
sampling_params = SamplingParams(max_tokens=512, temperature=0.0, top_p=1.0, repetition_penalty=1.2, stop_strings=["END"])

prompts = [
    "The capital of France is",
    "Once upon a time in Ethiopia,",
]

outputs = llm.generate(prompts, sampling_params)


In [None]:
prompts = [
    "The capital of France is",
    "Once upon a time in Ethiopia,",
]

outputs = llm.generate(prompts, sampling_params)

In [None]:
outputs[0].outputs[0].text