In [7]:
from utilities.model import get_model, get_tokenizer
from utilities.dataset import get_tokenized_datasets

In [8]:
model = get_model(use_gpu=False)
tokenizer = get_tokenizer()

Device: cpu
Model parameters: 1176764416
OlmoForCausalLM(
  (model): OlmoModel(
    (embed_tokens): Embedding(50304, 2048, padding_idx=1)
    (layers): ModuleList(
      (0-15): 16 x OlmoDecoderLayer(
        (self_attn): OlmoSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): OlmoRotaryEmbedding()
        )
        (mlp): OlmoMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): OlmoLayerNorm()
        (post_attention_layernorm): OlmoLayerNorm()
      )


In [9]:
original_dataset_tokenized, paraphrased_dataset_tokenized = get_tokenized_datasets(model, tokenizer, sample_size=1)

In [10]:
prompt = original_dataset_tokenized["paraphrased_messages"][0][0]["content"]
prompt

"Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain."

In [11]:
data = original_dataset_tokenized["paraphrased_messages"][0]
#data[1] = {}
data = list(filter(lambda x: x["role"] == "user", data))
data

[{'content': "Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.",
  'role': 'user'}]

In [12]:
chat_template_applied = tokenizer.apply_chat_template([data], return_tensors="pt", add_generation_prompt=True)

In [13]:
tokenizer.decode(chat_template_applied[0])

"<|user|>\nAre brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.\n<|assistant|>\n"

In [14]:
generation_2 = model.generate(chat_template_applied.to(model.device), max_new_tokens=512, do_sample=True)

In [15]:
decoded = tokenizer.decode(generation_2[0])

In [16]:
decoded

"<|user|>\nAre brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.\n<|assistant|>\nWhile brain cells are highly specialized and have well-established movements, their motion is limited to the most superficial layers, with the exception of certain areas and specific time patterns that allow for some movement. Studies in animals, like elephants (Dauria: Elephantus), have observed that brain cells can exhibit random activity patterns during sleep; however, they primarily move in narrow tracks rather than in organized wave-like patterns. This indicates that brain migration within the brain may not surpass the speed of light. For more detailed information on cognitive and neural changes during sleep, you can refer to this research paper.|||IP_ADDRESS|||"

In [17]:
USER_TOKEN = "<|user|>\n"
ASSISTANT_TOKEN = "\n<|assistant|>\n"
EOS_TOKEN = tokenizer.eos_token

In [18]:
# Extract user message
start_user = decoded.find(USER_TOKEN) + len(USER_TOKEN)
end_user = decoded.find(ASSISTANT_TOKEN)
user_message = decoded[start_user:end_user].strip()

# Extract assistant message
start_assistant = end_user + len(ASSISTANT_TOKEN)
end_assistant = decoded.find(EOS_TOKEN)
assistant_message = decoded[start_assistant:end_assistant].strip()

print("User Message:", user_message)
print("======")
print("Assistant Message:", assistant_message)


User Message: Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.
Assistant Message: While brain cells are highly specialized and have well-established movements, their motion is limited to the most superficial layers, with the exception of certain areas and specific time patterns that allow for some movement. Studies in animals, like elephants (Dauria: Elephantus), have observed that brain cells can exhibit random activity patterns during sleep; however, they primarily move in narrow tracks rather than in organized wave-like patterns. This indicates that brain migration within the brain may not surpass the speed of light. For more detailed information on cognitive and neural changes during sleep, you can refer to this research paper.
