In [1]:
!pip install --upgrade verbatim-llm --quiet

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from verbatim_llm import TokenSwapProcessor
from verbatim_llm.metrics import exact_match, rouge_l, levenshtein, fractional_exact_match



In [3]:
main_model_name = "allenai/OLMo-2-1124-13b"
aux_model_name = "HuggingFaceTB/SmolLM-135M"
device = "cuda"

aux_tokenizer = AutoTokenizer.from_pretrained(aux_model_name)
aux_model = AutoModelForCausalLM.from_pretrained(aux_model_name, force_download=True).to(device)
main_tokenizer = AutoTokenizer.from_pretrained(main_model_name)
main_model = AutoModelForCausalLM.from_pretrained(main_model_name).to(device)


config.json:   0%|          | 0.00/724 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/724 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/538M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/12 [00:00<?, ?it/s]

In [7]:
processor = TokenSwapProcessor(aux_model, main_tokenizer, aux_tokenizer=aux_tokenizer)
prompt = r'Chinese philosophy originates in the Spring and Autumn period and Warring States period, during a period known as the "Hundred Schools of Thought", which was characterized by significant intellectual and cultural developments. Although much of Chinese philosophy begun in the Warring States period'
inputs = main_tokenizer(prompt, return_tensors="pt").to("cuda")
outputs_standard = main_model.generate(
   inputs.input_ids, 
   max_new_tokens=100, 
   do_sample=False
)
outputs_tokenswap = main_model.generate(
   inputs.input_ids, 
   logits_processor=[processor], 
   max_new_tokens=100, 
   do_sample=False
)
wiki_output = r', elements of Chinese philosophy have existed for several thousand years. Some can be found in the I Ching (the Book of Changes), an ancient compendium of divination, which dates back to at least 672 BCE.\n\nIt was during the Warring States era that what Sima Tan termed the major philosophical schools of China—Confucianism, Legalism, and Taoism—arose, along with philosophies that later fell into obscurity, like Agriculturalism, Mohism, Chinese Natural'
standard_output = main_tokenizer.decode(outputs_standard[0], skip_special_tokens=True)[len(prompt):]
tokenswap_output = main_tokenizer.decode(outputs_tokenswap[0], skip_special_tokens=True)[len(prompt):]
print(f'**Inputs:** {prompt}')
print('-' * 80)
print(f'**Ground truth output from wiki dataset:** {wiki_output} ....')
print('-' * 80)
print(f'**Output standard decoding:** {standard_output} ....')
print('-' * 80)
print(f'**Output tokenswap:** {tokenswap_output} ....')
print('-' * 80)

**Inputs:** Chinese philosophy originates in the Spring and Autumn period and Warring States period, during a period known as the "Hundred Schools of Thought", which was characterized by significant intellectual and cultural developments. Although much of Chinese philosophy begun in the Warring States period
--------------------------------------------------------------------------------
**Ground truth output from wiki dataset:** , elements of Chinese philosophy have existed for several thousand years. Some can be found in the I Ching (the Book of Changes), an ancient compendium of divination, which dates back to at least 672 BCE.\n\nIt was during the Warring States era that what Sima Tan termed the major philosophical schools of China—Confucianism, Legalism, and Taoism—arose, along with philosophies that later fell into obscurity, like Agriculturalism, Mohism, Chinese Natural ....
--------------------------------------------------------------------------------
**Output standard decodi

In [8]:
print(f"Standard - ROUGE-L: {rouge_l(wiki_output, standard_output):.4f}")
print(f"Standard - Levenshtein: {levenshtein(wiki_output, standard_output):.4f}")
print(f"Standard - Fractional EM: {fractional_exact_match(wiki_output, standard_output):.4f}")
print()
print(f"TokenSwap - ROUGE-L: {rouge_l(wiki_output, tokenswap_output):.4f}")
print(f"TokenSwap - Levenshtein: {levenshtein(wiki_output, tokenswap_output):.4f}")
print(f"TokenSwap - Fractional EM: {fractional_exact_match(wiki_output, tokenswap_output):.4f}")


Standard - ROUGE-L: 0.9371
Standard - Levenshtein: 0.0330
Standard - Fractional EM: 0.4493

TokenSwap - ROUGE-L: 0.2252
TokenSwap - Levenshtein: 0.7105
TokenSwap - Fractional EM: 0.1073
