In [1]:
import torch
from dethcod.token import compression, decompression
from transformers import AutoTokenizer

MODEL_ID = "google-t5/t5-small"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

compressor = compression.CompressionModel.from_pretrained(MODEL_ID).to(device)
decompressor = decompression.DecompressionModel.from_pretrained(MODEL_ID).to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

Some weights of DecompressionModel were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['critic_head.bias', 'critic_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
from transformers import GenerationConfig

generation_config = GenerationConfig(
    do_sample=True,
    num_beams=1,
    max_new_tokens=20,
)

input_seq = """
translate English to German: I need a cup of water.
"""
input_ids = tokenizer.encode(input_seq, return_tensors="pt").to(device)

generated_output = compressor.generate(
    input_ids=input_ids,
    generation_config=generation_config,
)

for seq in tokenizer.batch_decode(generated_output):
    print(repr(seq))

'<pad> Mir ist eine Tasse Wasser gefallen.</s>'


In [3]:
tokenizer.batch_decode(input_ids)

['translate English to German: I need a cup of water.</s>']

In [5]:
decompression_output = decompressor.forward(
    input_ids=generated_output,
    labels=input_ids,
)

In [7]:
decompression_output.logits.shape

torch.Size([1, 14, 32128])

In [8]:
decompression_output.value_predictions.shape

torch.Size([1, 14, 1])