In [1]:
from utilities.model import get_model, get_tokenizer
from utilities.dataset import get_tokenized_datasets

In [67]:
model = get_model(use_gpu=True)
tokenizer = get_tokenizer()

Device: cpu
Model parameters: 1176764416
OlmoForCausalLM(
  (model): OlmoModel(
    (embed_tokens): Embedding(50304, 2048, padding_idx=1)
    (layers): ModuleList(
      (0-15): 16 x OlmoDecoderLayer(
        (self_attn): OlmoSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): OlmoRotaryEmbedding()
        )
        (mlp): OlmoMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): OlmoLayerNorm()
        (post_attention_layernorm): OlmoLayerNorm()
      )


In [3]:
original_dataset_tokenized, paraphrased_dataset_tokenized = get_tokenized_datasets(model, tokenizer, sample_size=1)

Tokenizing and reformatting SFT data:   0%|          | 0/1 [00:00<?, ? examples/s]

Tokenizing and reformatting SFT data (num_proc=2):   0%|          | 0/988 [00:00<?, ? examples/s]

In [16]:
prompt = original_dataset_tokenized["paraphrased_messages"][0][0]["content"]
prompt

"Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain."

In [23]:
tokenized = tokenizer([prompt], return_tensors="pt")
tokenized

{'input_ids': tensor([[ 6723,  3998,  1341,  7032,   273,  4886,    32, 13658,    13,   309,
          1353, 14339,   281,  1048,    14, 19893, 10346,    13, 34243, 12952,
          1561,   253,  3998,    15]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [58]:
tokenized.input_ids.data

tensor([[ 6723,  3998,  1341,  7032,   273,  4886,    32, 13658,    13,   309,
          1353, 14339,   281,  1048,    14, 19893, 10346,    13, 34243, 12952,
          1561,   253,  3998,    15]])

In [24]:
generation = model.generate(tokenized.input_ids.to(model.device), max_new_tokens=512)

In [68]:
model.device

device(type='cpu')

In [52]:
generation

tensor([[ 6723,  3998,  1341,  7032,   273,  4886,    32, 13658,    13,   309,
          1353, 14339,   281,  1048,    14, 19893, 10346,    13, 34243, 12952,
          1561,   253,  3998,    15,   187,   187,   510,  3662,   281,   436,
          1953,   310,  4390,  7202,    15,  1723,    13,   352,   310,  6566,
           326,  3998,  1341,    13,   824,   347,  8512,    13,   513,   452,
           253,  3745,   281,  2118,    15,   831,  4866,   310,  1869,   281,
           320, 30199,   407,   253,  3727,   273, 34254,  2225,  1336,    13,
           534,   403,  5793,  4840, 12449,   326, 13185,  6298,   875,  8512,
            15,   187,   187,  6560,  1822,  3675,  2225,  1336,   403,  4439,
           432,   253,   838, 35692, 23586,   285,  4288,   949,   253, 21066,
         45793,   281,   253,  9319, 35692, 23586,    15,   380,  9319, 35692,
         23586,   840, 14488,   253, 34254, 34242,   285,  4870,   253,  1491,
            15,   187,   187,   510,  4866,   273,  

In [28]:
output = tokenizer.decode(generation[0]).replace(tokenizer.eos_token, "")
output

"Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.\n\nThe answer to this question is currently unknown. However, it is believed that brain cells, such as neurons, do have the ability to move. This movement is thought to be facilitated by the release of neurotransmitters, which are chemical messengers that transmit signals between neurons.\n\nNeurotransmitters are released from the presynaptic neuron and travel through the synaptic cleft to the postsynaptic neuron. The postsynaptic neuron then receives the neurotransmitter and processes the information.\n\nThe movement of neurons is essential for various functions, including communication, learning, and memory. It is also thought to play a role in the development and maintenance of the brain.\n\nWhile the exact mechanisms of long-distance brain cell migration are not fully understood, it is clear that the ability of neurons to move is a fundamental aspect of br

In [37]:
data = original_dataset_tokenized["paraphrased_messages"][0]
#data[1] = {}
del data[1]
data

[{'content': "Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.",
  'role': 'user'}]

In [54]:
chat_template_applied = tokenizer.apply_chat_template([data], return_tensors="pt")

In [59]:
chat_template_applied.data

tensor([[   29,    93,  4537, 49651,   187,  6723,  3998,  1341,  7032,   273,
          4886,    32, 13658,    13,   309,  1353, 14339,   281,  1048,    14,
         19893, 10346,    13, 34243, 12952,  1561,   253,  3998,    15,   187]])

In [60]:
generation_2 = model.generate(chat_template_applied.to(model.device), max_new_tokens=512)

In [64]:
tokenizer.decode(generation_2[0])

"<|user|>\nAre brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.\n\nThe question of whether brain cells can move is a complex one, as it involves both cellular and molecular aspects. While it's true that neurons, the specialized cells of the brain, can move within the brain, this movement is not a simple process. Neurons are highly specialized and have a limited range of motion.\n\nThe movement of neurons is facilitated by the release of neurotransmitters, which act as chemical messengers. These neurotransmitters bind to receptors on the dendrites of neighboring neurons, causing them to move in response. However, this movement is not a continuous or coordinated process, and it's not clear whether it occurs within the brain itself.\n\nIn addition to neurons, other cells in the brain, such as astrocytes and microglia, also have the ability to move. Astrocytes are responsible for maintaining the blood-brain barrier a