In [1]:
import json
import os

from sentencepiece import SentencePieceProcessor
from tinygrad import Device, Tensor, nn
from tinygrad.nn.state import get_state_dict, safe_load, torch_load, load_state_dict, get_parameters, safe_save
from transformers import AutoTokenizer, AutoConfig, DataCollatorWithPadding, Trainer, TrainingArguments

from timestep.config import settings

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
device = Device.DEFAULT
print('device: ', device)

device:  CUDA


In [3]:
conversations = []

with open("../../data/drone_training.jsonl") as f:
    for line in f:
        conversations.append(json.loads(line))

conversation = conversations[0]

print('messages: ', conversation["messages"])

print('parallel_tool_calls: ', conversation["parallel_tool_calls"])

print('tools: ', conversation["tools"])

messages:  [{'role': 'system', 'content': 'You are an intelligent AI that controls a drone. Given a command or request from the user,\ncall one of your functions to complete the request. If the request cannot be completed by your available functions, call the reject_request function.\nIf the request is ambiguous or unclear, reject the request.'}, {'role': 'user', 'content': "Let's get the drone in the air, how high should it go?"}, {'role': 'assistant', 'tool_calls': [{'id': 'call_id', 'type': 'function', 'function': {'name': 'takeoff_drone', 'arguments': '{"altitude": 100}'}}]}]
parallel_tool_calls:  False
tools:  [{'type': 'function', 'function': {'name': 'takeoff_drone', 'parameters': {'type': 'object', 'properties': {'altitude': {'type': 'integer'}}, 'required': ['altitude']}}}, {'type': 'function', 'function': {'name': 'land_drone', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'enum': ['current', 'home_base', 'custom']}, 'coordinates': {'type': 'o

In [4]:
# tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", eos_token="<|im_end|>")
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer

LlamaTokenizerFast(name_or_path='TinyLlama/TinyLlama-1.1B-Chat-v1.0', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [20]:
# https://github.com/abetlen/llama-cpp-python/blob/658b244c5aa924fc6f4d04f92445dd8f724b6017/llama_cpp/llama_chat_format.py#L3345
function_calling_template = (
        "{% for message in messages %}"
        "<|im_start|>{{ message.role }}\n"
        # System message
        "{% if message.role == 'system' %}"
        "{{ message.content }}"
        "{% if tool_calls %}"
        "\n\nYou have access to the following functions:\n"
        "{% for tool in tools %}"
        "\nfunctions.{{ tool.function.name }}:\n"
        "{{ tool.function.parameters | tojson }}"
        "\n{% endfor %}"
        "\n\nYou can respond to users messages with either a single message or one or more function calls."
        "\n\nTo respond with a message begin the message with 'message:', use the following format:"
        "\n\nmessage:"
        "\n<message>"
        "\n\nTo respond with one or more function calls begin the message with 'functions.<function_name>:', use the following format:"
        "\n\nfunctions.<function_name>:"
        '\n{ "arg1": "value1", "arg2": "value2" }'
        "\nfunctions.<function_name>:"
        '\n{ "arg1": "value1", "arg2": "value2" }'
        "{% endif %}"
        "<|im_end|>\n"
        "{% endif %}"
        # User message
        "{% if message.role == 'user' %}"
        "{{ message.content }}"
        "<|im_end|>\n"
        "{% endif %}"
        # Assistant message
        "{% if message.role == 'assistant' %}"
        ## Reglar message
        "{% if message.content and message.content | length > 0 %}"
        "{% if tool_calls %}"
        "message:\n"
        "{% endif %}"
        "{{ message.content }}"
        "<|im_end|>\n"
        "{% endif %}"
        ## Function calls
        "{% if 'tool_calls' in message %}"
        "{% for tool_call in message.tool_calls %}"
        "functions.{{ tool_call.function.name }}:\n"
        "{{ tool_call.function.arguments }}"
        "{% endfor %}"
        "<|im_end|>\n"
        "{% endif %}"
        "{% endif %}"
        "{% endfor %}"
        "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
    )

print(function_calling_template)

{% for message in messages %}<|im_start|>{{ message.role }}
{% if message.role == 'system' %}{{ message.content }}{% if tool_calls %}

You have access to the following functions:
{% for tool in tools %}
functions.{{ tool.function.name }}:
{{ tool.function.parameters | tojson }}
{% endfor %}

You can respond to users messages with either a single message or one or more function calls.

To respond with a message begin the message with 'message:', use the following format:

message:
<message>

To respond with one or more function calls begin the message with 'functions.<function_name>:', use the following format:

functions.<function_name>:
{ "arg1": "value1", "arg2": "value2" }
functions.<function_name>:
{ "arg1": "value1", "arg2": "value2" }{% endif %}<|im_end|>
{% endif %}{% if message.role == 'user' %}{{ message.content }}<|im_end|>
{% endif %}{% if message.role == 'assistant' %}{% if message.content and message.content | length > 0 %}{% if tool_calls %}message:
{% endif %}{{ message.

In [19]:
# tokenizer.chat_template = function_calling_template
print(tokenizer.chat_template)

{% for message in messages %}
{% if message['role'] == 'user' %}
{{ '<|user|>
' + message['content'] + eos_token }}
{% elif message['role'] == 'system' %}
{{ '<|system|>
' + message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}
{{ '<|assistant|>
'  + message['content'] + eos_token }}
{% endif %}
{% if loop.last and add_generation_prompt %}
{{ '<|assistant|>' }}
{% endif %}
{% endfor %}


In [7]:
help(tokenizer.apply_chat_template)

Help on method apply_chat_template in module transformers.tokenization_utils_base:

apply_chat_template(conversation: Union[List[Dict[str, str]], List[List[Dict[str, str]]]], tools: Optional[List[Dict]] = None, documents: Optional[List[Dict[str, str]]] = None, chat_template: Optional[str] = None, add_generation_prompt: bool = False, tokenize: bool = True, padding: bool = False, truncation: bool = False, max_length: Optional[int] = None, return_tensors: Union[str, transformers.utils.generic.TensorType, NoneType] = None, return_dict: bool = False, return_assistant_tokens_mask: bool = False, tokenizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs) -> Union[str, List[int], List[str], List[List[int]], transformers.tokenization_utils_base.BatchEncoding] method of transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast instance
    Converts a list of dictionaries with `"role"` and `"content"` keys to a list of token
    ids. This method is intended for use with chat models, a

In [12]:
content_messages = conversation["messages"][0:2]
content_messages

[{'role': 'system',
  'content': 'You are an intelligent AI that controls a drone. Given a command or request from the user,\ncall one of your functions to complete the request. If the request cannot be completed by your available functions, call the reject_request function.\nIf the request is ambiguous or unclear, reject the request.'},
 {'role': 'user',
  'content': "Let's get the drone in the air, how high should it go?"}]

In [14]:
# tokenizer.apply_chat_template(conversation=conversation["messages"], tools=conversation["tools"])
tokenizer.apply_chat_template(conversation=content_messages, tools=conversation["tools"])

[529,
 29989,
 5205,
 29989,
 29958,
 13,
 3492,
 526,
 385,
 13052,
 296,
 319,
 29902,
 393,
 11761,
 263,
 4192,
 650,
 29889,
 11221,
 263,
 1899,
 470,
 2009,
 515,
 278,
 1404,
 29892,
 13,
 4804,
 697,
 310,
 596,
 3168,
 304,
 4866,
 278,
 2009,
 29889,
 960,
 278,
 2009,
 2609,
 367,
 8676,
 491,
 596,
 3625,
 3168,
 29892,
 1246,
 278,
 12560,
 29918,
 3827,
 740,
 29889,
 13,
 3644,
 278,
 2009,
 338,
 22363,
 681,
 470,
 20871,
 29892,
 12560,
 278,
 2009,
 29889,
 2,
 29871,
 13,
 29966,
 29989,
 1792,
 29989,
 29958,
 13,
 12024,
 29915,
 29879,
 679,
 278,
 4192,
 650,
 297,
 278,
 4799,
 29892,
 920,
 1880,
 881,
 372,
 748,
 29973,
 2,
 29871,
 13]

In [17]:
# print(tokenizer.apply_chat_template(conversation=conversation["messages"], tools=conversation["tools"], tokenize=False))

In [15]:
print(tokenizer.apply_chat_template(conversation=content_messages, tools=conversation["tools"], tokenize=False))

<|system|>
You are an intelligent AI that controls a drone. Given a command or request from the user,
call one of your functions to complete the request. If the request cannot be completed by your available functions, call the reject_request function.
If the request is ambiguous or unclear, reject the request.</s>
<|user|>
Let's get the drone in the air, how high should it go?</s>



In [None]:
def tokenize(conversation):
    return tokenizer.apply_chat_template(
        conversation=conversation["messages"],
        padding="max_length",
        tools=conversation["tools"],
        tokenize=True,
        truncation=True,
    )

In [None]:
tokenized_conversation = tokenize(conversation)
print(tokenized_conversation)

In [None]:
model_path = os.path.join(settings.app_dir, "models/TinyLlama/TinyLlama-1.1B-Chat-v1.0/model.safetensors")
model_path

tokenizer_path = os.path.join(settings.app_dir, "models/TinyLlama/TinyLlama-1.1B-Chat-v1.0/tokenizer.model")

In [None]:
t = SentencePieceProcessor(model_file=str(tokenizer_path))
t

In [None]:
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]

prompt = tokenizer.apply_chat_template(
    # add_generation_prompt=True,
    conversation=messages,
    # padding="max_length",
    # tools=conversation["tools"],
    tokenize=False,
    # truncation=True,
)
print(prompt)

In [None]:
tokenizer.bos_token

In [None]:
tokenizer.eos_token

In [None]:
encoded_prompt = tokenizer.encode(prompt)
encoded_prompt_2 = t.encode(prompt)

print(encoded_prompt)
print([1] + encoded_prompt_2)

# assert encoded_prompt == encoded_prompt_2, f"\n{encoded_prompt} \n!=\n{encoded_prompt_2}"
assert encoded_prompt == [1] + encoded_prompt_2, f"\n{encoded_prompt} \n!=\n{[1] + encoded_prompt_2}"

In [None]:
t.bos_id()

In [None]:
t.decode([1]), tokenizer.decode([1])

In [None]:
message = "Hello, how are you?"

In [None]:
tokenizer.encode(message)

In [None]:
# help(t.encode)
# t.encode(conversation["messages"][0]["content"])
t.encode(message)

In [None]:
assert t.encode(message) == tokenizer.encode(message), f"{t.encode(message)} != {tokenizer.encode(message)}"

In [None]:
raise Exception('stop')

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    return {"bleu": 0.0}

In [None]:
tokenizer.vocab_size

In [None]:
config = AutoConfig.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
config

In [None]:
params = {
    "args": {"dim": 2048, "n_layers": 22, "n_heads": 32, "n_kv_heads": 4, "norm_eps": 1e-05, "vocab_size": 32000, "hidden_dim": 5632},
}

assert config.hidden_size == params["args"]["dim"]
assert config.intermediate_size == params["args"]["hidden_dim"]
assert config.num_attention_heads == params["args"]["n_heads"]
assert config.num_hidden_layers == params["args"]["n_layers"]
assert config.num_key_value_heads == params["args"]["n_kv_heads"]
assert config.rms_norm_eps == params["args"]["norm_eps"]
assert config.vocab_size == params["args"]["vocab_size"]
assert config.vocab_size == tokenizer.vocab_size

In [None]:
from notebooks.Research.tinygrad.llama import Transformer, convert_from_huggingface, fix_bf16
# from notebooks.Research.tinygrad.train_llama import LLaMa

# llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device)

# model = Transformer(**params["args"], linear=linear, max_context=MAX_CONTEXT, jit=bool(JIT))
model = Transformer(
    dim=config.hidden_size,
    hidden_dim=config.intermediate_size,
    # max_context=4096,
    n_heads=config.num_attention_heads,
    n_layers=config.num_hidden_layers,
    norm_eps=config.rms_norm_eps,
    vocab_size=config.vocab_size,
)
# model

weights = safe_load(str(model_path))

if "model.embed_tokens.weight" in weights:
    weights = convert_from_huggingface(weights, model, params["args"]["n_heads"], params["args"].get("n_kv_heads", params["args"]["n_heads"]))

weights = fix_bf16(weights)

load_state_dict(model, weights, strict=False, consume=True)

In [None]:
help(model)

In [None]:
# tokenizer.bos_id()

In [None]:
tokenized_conversation

In [None]:
# tokenized_conversation_tensor = Tensor([tokenized_conversation], device=device)
# tokenized_conversation_tensor.shape

In [None]:
query = "What's 2 + 2?"
messages = [ {
    "role": "user",
    "content": query,
} ]
tokenized_query = tokenize({"messages": messages, "tools": []})
tokenized_query

In [None]:
start_pos = 0
# toks = tokenized_conversation
toks = tokenized_query

Tensor.training = False
# model(tokenized_conversation_tensor, start_pos=start_pos, temperature=0.0)

# llama.model(Tensor([toks], device=device), 0, args.temperature).realize()
# model(Tensor([toks], device=device), 0, args.temperature).realize()
model(Tensor([toks], device=device), 0, 0.0).realize()