# Model Playground

A guide to getting the logits of a llama model. These logits can then be used for postraining techniques such as RLVF. 

In [54]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from gsm8k import GSM8K_Env
from utils.strings import check_eos
from utils.math import softmax_temp

In [55]:

model_name = 'meta-llama/Llama-3.2-1B'  # Replace with your desired model variant
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
print("model loaded")


model loaded


In [56]:
## get the probs of eacht token from the model..

inputs = tokenizer("I just want the logits of the next token", return_tensors='pt')
##What does the inputs attention mask do ? attention mask indicates if a token should be attended or not 
tensor = inputs['input_ids']
print(tensor)
## why ** ? to upack the inputs dictionary, which includes ids = tokens and the input mask that specifies what tokens to attend to. 
outputs = model(tensor)

tensor([[128000,     40,   1120,   1390,    279,  61888,    315,    279,   1828,
           4037]])


In [57]:
#so the output is the probs of the next token?? that means that the last probs are the ones that matter for RL 
logits = outputs.logits
m = torch.nn.Softmax(dim=0)
#what is the shape of logits 
softmaxed = m(logits[0,-1])
print(softmaxed.shape)

torch.Size([128256])


In [58]:
dist = torch.distributions.Categorical(softmaxed)
action = dist.sample()

In [59]:
print(action.shape)

torch.Size([])


In [60]:
## figure out how to decode action to get output 
decoded = tokenizer.decode(action)

print(decoded)

response = model.generate(**inputs)
print(response)
print(tokenizer.decode(response[0]))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 -
tensor([[128000,     40,   1120,   1390,    279,  61888,    315,    279,   1828,
           4037,    304,    279,  11914,    311,    387,    279,   1988,    311,
            279,   1828,   6324,    304,    279,    432,   9944,     13,    358,
           1097,   1701,    279]])
<|begin_of_text|>I just want the logits of the next token in the sentence to be the input to the next layer in the RNN. I am using the


In [61]:


test = torch.tensor([1,4,5,2], dtype=torch.float16)
print(softmax_temp(test, 1.5))
print(m(test))

tensor([0.0404, 0.2986, 0.5825, 0.0787], dtype=torch.float16)
tensor([0.0128, 0.2561, 0.6963, 0.0347], dtype=torch.float16)


In [66]:
## run inference until model finishes generating 
env = GSM8K_Env(tokenizer)
state = env.reset()
env.render()
term = False
while not term:
    outputs = model(state)
    probs = softmax_temp(outputs.logits[0, -1], 1.5)
    dist = torch.distributions.Categorical(probs)
    action = dist.sample()
    action_tensor = action.unsqueeze(0).unsqueeze(0)
    obs, reward = env.step(action_tensor)
    print(f'reward was {reward}')
    response = env.get_state()
    term = action == tokenizer.eos_token_id
print(env.state)
print(response)

<|begin_of_text|>Frankie's parents let him have many pets. He has six more snakes than he has cats. He has one less parrot than cats. Six of his pets have four legs. He has 2 dogs. How many pets does he have in total?
reward was 0.1
reward was 0.1
reward was 0.1
reward was 0.1
tensor([[128000,  38426,    648,    596,   6699,   1095,   1461,    617,   1690,
          26159,     13,   1283,    706,   4848,    810,  57196,   1109,    568,
            706,  19987,     13,   1283,    706,    832,   2753,   1370,   4744,
           1109,  19987,     13,  19198,    315,    813,  26159,    617,   3116,
          14535,     13,   1283,    706,    220,     17,  12875,     13,   2650,
           1690,  26159,   1587,    568,    617,    304,   2860,     30,   5560,
           1283,    220, 128001]])
<|begin_of_text|>Frankie's parents let him have many pets. He has six more snakes than he has cats. He has one less parrot than cats. Six of his pets have four legs. He has 2 dogs. How many pets does h

In [63]:
print(response)

<|begin_of_text|>Frankie's parents let him have many pets. He has six more snakes than he has cats. He has one less parrot than cats. Six of his pets have four legs. He has 2 dogs. How many pets does he have in total? Surely Frankie Could Equation The Essentially Branch Identify Thousands ( -- Canpk All How He Might There Explain Options When ENTER Determines Quant<|end_of_text|>


In [64]:
res = model.generate(tokenizer.encode("Frankie's parents let him have many pets. He has six more snakes than he has cats. He has one less parrot than cats. Six of his pets have four legs. He has 2 dogs. How many pets does he have in total?", return_tensors='pt'))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [65]:
print(res)
print(tokenizer.decode(res[0]))


tensor([[128000,  38426,    648,    596,   6699,   1095,   1461,    617,   1690,
          26159,     13,   1283,    706,   4848,    810,  57196,   1109,    568,
            706,  19987,     13,   1283,    706,    832,   2753,   1370,   4744,
           1109,  19987,     13,  19198,    315,    813,  26159,    617,   3116,
          14535,     13,   1283,    706,    220,     17,  12875,     13,   2650,
           1690,  26159,   1587,    568,    617,    304,   2860,     30, 128001]])
<|begin_of_text|>Frankie's parents let him have many pets. He has six more snakes than he has cats. He has one less parrot than cats. Six of his pets have four legs. He has 2 dogs. How many pets does he have in total?<|end_of_text|>
