In [1]:
from generation import Llama
import torch
import gc
import os
os.environ['RANK'] = '0'  # Example rank, adjust accordingly
os.environ['WORLD_SIZE'] = '1'  # Example world size, adjust accordingly
os.environ['MASTER_ADDR'] = 'localhost'  # Example master address
os.environ['MASTER_PORT'] = '12355'  # Example master port


In [2]:

llama = Llama.build(ckpt_dir= "../llama3/Meta-Llama-3-8B-Instruct/", tokenizer_path="../llama3/Meta-Llama-3-8B-Instruct/tokenizer.model", max_seq_len= 512, max_batch_size = 4, model_parallel_size=1)

> initializing model parallel with size 1
> initializing ddp with size 1
> initializing pipeline with size 1


  _C._set_default_tensor_type(t)


Loaded in 13.32 seconds


In [3]:
model = llama.model

In [4]:
tokenizer = llama.tokenizer

In [14]:



prompt_template = '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{user_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'



def get_prompt(prompt: str):
    return torch.tensor(tokenizer.encode(prompt_template.format(user_prompt = prompt), eos= False, bos = False), dtype= torch.long).unsqueeze(0)




prompt_tokens = tokenizer.encode_prompt("I came up with a new saying: 'stop and smell the roses' what you think of it")

In [10]:
len(prompt_tokens)

46

In [11]:
print(tokenizer.decode(prompt_tokens))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Please try to provide useful, helpful and actionable answers.<|eot_id|><|start_header_id|>user<|end_header_id|>

I came up with a new saying: 'stop and smell the roses' what you think of it<|eot_id|><|start_header_id|>assistant<|end_header_id|>




In [15]:
out_tokens = llama.generate([prompt_tokens], max_gen_len= 256, temperature = 0)

 59%|█████▊    | 150/256 [00:05<00:03, 28.33it/s]


In [16]:
print(tokenizer.decode(prompt_tokens+ out_tokens[0][0]))

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

I came up with a new saying: 'stop and smell the roses' what you think of it<|eot_id|><|start_header_id|>assistant<|end_header_id|>

I think your new saying is... already a saying! "Stop and smell the roses" is a well-known English idiom that means to slow down and appreciate the small pleasures in life. It's often used to encourage people to take a break from their busy schedules and enjoy the beauty around them.

That being said, it's still a lovely phrase, and I'm sure you came up with it independently! Idioms and phrases are often rephrased or reworded in different cultures and languages, so it's possible that your version is a unique twist on the original.

If you'd like to create a new saying, I'd be happy to help you brainstorm some ideas. What theme or message would you like your saying to convey?


In [9]:
from model import CollectionHook

hook = CollectionHook()

In [10]:
from time import time
logits = model(prompt_tokens,0 , hook)

In [11]:
hook.cache.shape

torch.Size([1, 47, 4096])

In [12]:
message = {"role": "user", "content": "Whats 1-1?"}

In [13]:
from tokenizer import ChatFormat

formatter = ChatFormat(tokenizer)

In [14]:
tokens = formatter.encode_dialog_prompt([message])

In [15]:
tokenizer.decode(tokens)

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nWhats 1-1?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'

In [23]:
len(tokens)

16

In [24]:
prompt_tokens.tolist()[0]

[27,
 91,
 7413,
 3659,
 4424,
 91,
 1822,
 91,
 2527,
 8932,
 851,
 91,
 29,
 882,
 27,
 91,
 408,
 8932,
 851,
 91,
 1363,
 59175,
 220,
 16,
 12,
 16,
 76514,
 91,
 68,
 354,
 851,
 91,
 1822,
 91,
 2527,
 8932,
 851,
 91,
 29,
 78191,
 27,
 91,
 408,
 8932,
 851,
 91,
 1363]

In [20]:
tokenizer.decode(prompt_tokens[0].tolist())

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nWhats 1-1?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'

In [21]:
tokenizer.decode(tokens)

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nWhats 1-1?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'

In [27]:
prompt_tokens

tensor([[   27,    91,  7413,  3659,  4424,    91,  1822,    91,  2527,  8932,
           851,    91,    29,   882,    27,    91,   408,  8932,   851,    91,
          1363, 59175,   220,    16,    12,    16, 76514,    91,    68,   354,
           851,    91,  1822,    91,  2527,  8932,   851,    91,    29, 78191,
            27,    91,   408,  8932,   851,    91,  1363]])

In [28]:
for token in prompt_tokens[0].tolist():
    print(token, tokenizer.decode([token]))

27 <
91 |
7413 begin
3659 _of
4424 _text
91 |
1822 ><
91 |
2527 start
8932 _header
851 _id
91 |
29 >
882 user
27 <
91 |
408 end
8932 _header
851 _id
91 |
1363 >


59175 Whats
220  
16 1
12 -
16 1
76514 ?<
91 |
68 e
354 ot
851 _id
91 |
1822 ><
91 |
2527 start
8932 _header
851 _id
91 |
29 >
78191 assistant
27 <
91 |
408 end
8932 _header
851 _id
91 |
1363 >




In [29]:
for token in tokens:
    print(token, tokenizer.decode([token]))

128000 <|begin_of_text|>
128006 <|start_header_id|>
882 user
128007 <|end_header_id|>
271 


59175 Whats
220  
16 1
12 -
16 1
30 ?
128009 <|eot_id|>
128006 <|start_header_id|>
78191 assistant
128007 <|end_header_id|>
271 




In [None]:

prompt = "Whats 1-1?"
tokens = []
tokens.append(tokenizer.special_tokens["<|start_header_id|>"])
tokens.extend(tokenizer.encode(message["role"], bos=False, eos=False))
tokens.append(tokenizer.special_tokens["<|end_header_id|>"])
tokens.extend(tokenizer.encode("\n\n", bos=False, eos=False))
tokens.extend(
    tokenizer.encode(message["content"].strip(), bos=False, eos=False)
)
tokens.append(tokenizer.special_tokens["<|eot_id|>"])
return tokens

In [30]:
prompt

NameError: name 'prompt' is not defined

In [31]:
len(tokens)

16