## paper: [Contrastive Decoding: Open-ended Text Generation as Optimization](https://arxiv.org/abs/2210.15097)


In [39]:
import torch
import transformers

device = 'cuda' if torch.cuda.is_available() else 'cpu'

tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2')
amateur_lm = transformers.AutoModelForCausalLM.from_pretrained('gpt2').to(device)
expert_lm = transformers.AutoModelForCausalLM.from_pretrained('gpt2-medium').to(device)

In [41]:
prompt = "Barack Obama was born in Honolulu, Hawaii. He was born in"

def generate_builtin(model, prompt, max_len=100, temperature = 0.8):
    model_inputs = tokenizer([prompt], return_tensors="pt").to(device)

    gen_tokens = model.generate(
        **model_inputs,
        do_sample=True,
        temperature=temperature,
        max_length=max_len,
    )
    return tokenizer.batch_decode(gen_tokens)[0]

print(
    f"""built-in generate for prompt: {prompt}
    responses: {generate_builtin(expert_lm, prompt)}"""
)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


built-in generate for prompt: Barack Obama was born in Honolulu, Hawaii. He was born in
    responses: Barack Obama was born in Honolulu, Hawaii. He was born in Chicago, Illinois. He was born in New York City, New York. He was born in Arlington, Virginia. He was born in Washington, Washington. He was born in St. John's, Newfoundland. He was born in New York City, New York. He was born in Chicago, Illinois. He was born in Dallas, Texas. He was born in New York City, New York. He was born in Chicago,


In [42]:
def my_generate(model, prompt, max_len=100, temperature = 0.8):
    model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
    print(model_inputs)


print(
    f"""My generate for prompt: {prompt}
    responses: {my_generate(expert_lm, prompt)}"""
)

{'input_ids': tensor([[10374,   441,  2486,   373,  4642,   287, 43296,    11, 13708,    13,
           679,   373,  4642,   287]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}
My generate for prompt: Barack Obama was born in Honolulu, Hawaii. He was born in
    responses: None


TODO:
- generation for plain model
- comp between expert and amature lm
- generation for contrastive decoding without restraint
- generation for contrastive decoding with restraint
