In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "meta-llama/Meta-Llama-3-8B"
print(f"Loading model: {model_name}")
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.to("cuda")

Loading model: meta-llama/Meta-Llama-3-8B


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [40]:
def tokenize_llama_chat(
    tokenizer,
    user_input: str,
    model_output: str = None,
    system_prompt: str = None,
):
    input_content = ""
    if system_prompt is not None:
        input_content += "[INST]" + system_prompt + "[/INST]"
    input_content += f"[INST] {user_input.strip()} [/INST]"
    if model_output is not None:
        input_content += f" {model_output.strip()}"
    print(input_content)
    return tokenizer.encode(input_content)

In [41]:
user_input="<P> The series stars Bob Neuwirth as Shaun Murphy , a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital . Antonia Thomas , Nicholas Gonzalez , Chuku Modu , Beau Garrett , Hill Harper , Richard Schiff , and Tamlyn Tomita also star in the show . The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017 . On October 3 , 2017 , ABC picked up the series for a full season of 18 episodes . </P> who is the actor that plays dr. sean murphy?\n\nChoices:\n (A) Bob Neuwirth\n (B) Freddie Highmore"

In [42]:
model_output = "("

In [43]:
tokens = tokenize_llama_chat(tokenizer=tokenizer, user_input=user_input, model_output=model_output)
tokens = torch.tensor(tokens).unsqueeze(0).to("cuda")

[INST] <P> The series stars Bob Neuwirth as Shaun Murphy , a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital . Antonia Thomas , Nicholas Gonzalez , Chuku Modu , Beau Garrett , Hill Harper , Richard Schiff , and Tamlyn Tomita also star in the show . The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017 . On October 3 , 2017 , ABC picked up the series for a full season of 18 episodes . </P> who is the actor that plays dr. sean murphy?

Choices:
 (A) Bob Neuwirth
 (B) Freddie Highmore [/INST] (


In [44]:
tokens

tensor([[128000,     58,  65562,     60,    366,     47,     29,    578,   4101,
           9958,  14596,   4275,  43210,   4914,    439,  77371,  31053,   1174,
            264,   3995,  34933,  19504,    449,  38281,    323,  11427,    519,
          28439,    520,   5960,  11097,    800,     13,  13789,    402,    306,
            554,  15429,    662,  17958,    689,  11355,   1174,  40796,  52186,
           1174,    921,  36834,   5768,     84,   1174,  80327,  58716,   1174,
           8270,  33107,   1174,  12131,  88683,   1174,    323,  29988,  18499,
           8529,   6388,   1101,   6917,    304,    279,   1501,    662,    578,
           4101,   4036,    264,   2231,  18178,  15507,    520,  19921,   1306,
            264,   3766,  17644,   4101,   1550,    539,   3351,   4741,    520,
          24991,  41445,  31362,    304,    220,    679,     20,   2652,    578,
           7839,  19150,    574,  11713,    311,   4101,    304,   3297,    220,
            679,     22,    

In [45]:
logits = model(tokens).logits[0,-1,:]

In [46]:
import torch.nn.functional as F
probs = F.softmax(logits)
probs.shape

  probs = F.softmax(logits)


torch.Size([128256])

In [47]:
torch.argmax(probs).item(), tokenizer.convert_ids_to_tokens(34),probs[34].item()

(34, 'C', 0.7085361480712891)

In [48]:
tokenizer.convert_tokens_to_ids("A"),tokenizer.convert_tokens_to_ids("B"),tokenizer.convert_tokens_to_ids("C")

(32, 33, 34)

In [49]:
probs[32],probs[33],probs[34]

(tensor(0.0268, device='cuda:0', grad_fn=<SelectBackward0>),
 tensor(0.0331, device='cuda:0', grad_fn=<SelectBackward0>),
 tensor(0.7085, device='cuda:0', grad_fn=<SelectBackward0>))

--------------------------------------------------------------

In [58]:
user_input="<P> The series stars Bob Neuwirth as Shaun Murphy , a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital . Antonia Thomas , Nicholas Gonzalez , Chuku Modu , Beau Garrett , Hill Harper , Richard Schiff , and Tamlyn Tomita also star in the show . The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017 . On October 3 , 2017 , ABC picked up the series for a full season of 18 episodes . </P> \nwho is the actor that plays dr. sean murphy?\n\nChoices:\n (A) Bob Neuwirth\n (B) Freddie Highmore."
model_output = "Ans: ("
tokens = tokenize_llama_chat(tokenizer=tokenizer, user_input=user_input, model_output=model_output)
tokens = torch.tensor(tokens).unsqueeze(0).to("cuda")
logits = model(tokens).logits[0,-1,:]
probs = F.softmax(logits)

[INST] <P> The series stars Bob Neuwirth as Shaun Murphy , a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital . Antonia Thomas , Nicholas Gonzalez , Chuku Modu , Beau Garrett , Hill Harper , Richard Schiff , and Tamlyn Tomita also star in the show . The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017 . On October 3 , 2017 , ABC picked up the series for a full season of 18 episodes . </P> 
who is the actor that plays dr. sean murphy?

Choices:
 (A) Bob Neuwirth
 (B) Freddie Highmore. [/INST] Ans: (


  probs = F.softmax(logits)


In [59]:
torch.argmax(probs)

tensor(33, device='cuda:0')

In [63]:
probs[33]

tensor(0.9098, device='cuda:0', grad_fn=<SelectBackward0>)

In [61]:
tokenizer.convert_ids_to_tokens(33)

'B'

-----------------

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
print(f"Loading model: {model_name}")
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.to("cuda")

Loading model: meta-llama/Meta-Llama-3.1-8B-Instruct


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [2]:
from transformers import PreTrainedTokenizer

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
BASE_INPUT = "Input:"
BASE_RESPONSE = "\nResponse:"

ADD_FROM_POS_CHAT = E_INST
ADD_FROM_POS_BASE = BASE_RESPONSE

B_TEXT = "<|begin_of_text|>"
B_HEADER, E_HEADER = "<|start_header_id|>","<|end_header_id|>"
EOT_ID = "<|eot_id|>"

def tokenize_llama_chat(
    tokenizer: PreTrainedTokenizer,
    user_input: str,
    model_output: str = None,
    system_prompt: str = None,
    use_latest:bool=False
):
    if not use_latest:
        input_content = ""
        if system_prompt is not None:
            input_content += B_SYS + system_prompt + E_SYS
        input_content += f"{B_INST} {user_input.strip()} {E_INST}"
        if model_output is not None:
            input_content += f" {model_output.strip()}"
    else:
        # input_content = B_TEXT
        input_content = ""
        if system_prompt is not None:
            input_content += B_HEADER + "system" + E_HEADER + "\n\n" + system_prompt + EOT_ID
        input_content += f"{B_HEADER}user{E_HEADER}\n\n{user_input.strip()}{EOT_ID}\n{B_HEADER}assistant{E_HEADER}\n\n"
        if model_output is not None:
            input_content += f" {model_output.strip()}"
    return tokenizer.encode(input_content)

In [3]:
import torch.nn.functional as F

user_input="<P> The series stars Bob Neuwirth as Shaun Murphy , a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital . Antonia Thomas , Nicholas Gonzalez , Chuku Modu , Beau Garrett , Hill Harper , Richard Schiff , and Tamlyn Tomita also star in the show . The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017 . On October 3 , 2017 , ABC picked up the series for a full season of 18 episodes . </P> \nwho is the actor that plays dr. sean murphy?\n\nChoices:\n (A) Bob Neuwirth\n (B) Freddie Highmore."
model_output = "("
tokens = tokenize_llama_chat(tokenizer=tokenizer, user_input=user_input, model_output=model_output,use_latest=True)
tokens = torch.tensor(tokens).unsqueeze(0).to("cuda")
logits = model(tokens).logits[0,-1,:]
probs = F.softmax(logits)

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
  probs = F.softmax(logits)


In [4]:
torch.argmax(probs)

tensor(33, device='cuda:0')

In [5]:
print(tokenizer.decode(tokens[0].tolist()))

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

<P> The series stars Bob Neuwirth as Shaun Murphy, a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital. Antonia Thomas, Nicholas Gonzalez, Chuku Modu, Beau Garrett, Hill Harper, Richard Schiff, and Tamlyn Tomita also star in the show. The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017. On October 3, 2017, ABC picked up the series for a full season of 18 episodes. </P> 
who is the actor that plays dr. sean murphy?

Choices:
 (A) Bob Neuwirth
 (B) Freddie Highmore.<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>

 (


In [7]:
tokenizer.convert_ids_to_tokens(33)

'B'

In [36]:
user_input="Context: <P> The series stars Bob Neuwirth as Shaun Murphy , a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital . Antonia Thomas , Nicholas Gonzalez , Chuku Modu , Beau Garrett , Hill Harper , Richard Schiff , and Tamlyn Tomita also star in the show . The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017 . On October 3 , 2017 , ABC picked up the series for a full season of 18 episodes . </P> \nQuestion: who is the actor that plays dr. shaun murphy?\n\nChoices:\n (A) Based on the context, Bob Neuwirth plays Dr. Shaun Murphy in 'The Good Doctor'.\n (B) The context is incorrect. Freddie Highmore plays Dr. Shaun Murphy in 'The Good Doctor'."
model_output = "(A"
tokens = tokenize_llama_chat(tokenizer=tokenizer, user_input=user_input, model_output=model_output,use_latest=True)
# print(tokenizer.decode(tokens),end="")
tokens = torch.tensor(tokens).unsqueeze(0).to("cuda")
generated_tokens = model.generate(tokens,max_new_tokens=100)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [37]:
print(tokenizer.decode(generated_tokens[0].tolist()))

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Context: <P> The series stars Bob Neuwirth as Shaun Murphy, a young surgical resident with autism and savant syndrome at San Jose St. Bonaventure Hospital. Antonia Thomas, Nicholas Gonzalez, Chuku Modu, Beau Garrett, Hill Harper, Richard Schiff, and Tamlyn Tomita also star in the show. The series received a put pilot commitment at ABC after a previous attempted series did not move forward at CBS Television Studios in 2015 ; The Good Doctor was ordered to series in May 2017. On October 3, 2017, ABC picked up the series for a full season of 18 episodes. </P> 
Question: who is the actor that plays dr. shaun murphy?

Choices:
 (A) Based on the context, Bob Neuwirth plays Dr. Shaun Murphy in 'The Good Doctor'.
 (B) The context is incorrect. Freddie Highmore plays Dr. Shaun Murphy in 'The Good Doctor'.<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>

 (A) Based on the context, Bob Neuwirth plays Dr. Shaun Murphy in 'The Good 

In [None]:
generated_tokens[0].tolist()

In [88]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [42]:
tokenizer.convert_ids_to_tokens(128004)

'<|finetune_right_pad_id|>'

In [40]:
tokenizer.pad_token = "<|finetune_right_pad_id|>"

In [41]:
tokenizer.pad_token_id

128004