In [9]:
from datasets import load_dataset
from application.paraphrasing import paraphrase_input
from application.config import lima_paraphrased_dataset_path

from tqdm import tqdm

In [10]:
def map_to_message_format(role: str, content: str) -> dict[str, str]:
    return {"role": role, "content": content}

In [11]:
data = load_dataset("allenai/tulu-v2-sft-mixture", split="train")

In [10]:
# select only data where dataset is "lima"
lima_data = data.filter(lambda x: x["dataset"] == "lima")

In [11]:
single_question_answers = lima_data.filter(lambda x: len(x["messages"]) == 2)
single_question_answers

Dataset({
    features: ['dataset', 'id', 'messages'],
    num_rows: 988
})

In [15]:
paraphrased = []

for row in tqdm(single_question_answers):
    paraphrased.append(
        (
            row["id"], 
            [
                map_to_message_format("user", paraphrase_input(row["messages"][0]["content"])),
                map_to_message_format("assistant", paraphrase_input(row["messages"][1]["content"]))
            ]
        )
    )

100%|██████████| 988/988 [1:45:06<00:00,  6.38s/it]  


In [16]:
print(paraphrased[0][1][0])

{'role': 'user', 'content': "Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain."}


In [17]:
lima_data_paraphrased = single_question_answers.add_column("paraphrased_id", [p[0] for p in paraphrased])
lima_data_paraphrased = lima_data_paraphrased.add_column("paraphrased_messages", [p[1] for p in paraphrased])

In [18]:
lima_data_paraphrased

Dataset({
    features: ['dataset', 'id', 'messages', 'paraphrased_id', 'paraphrased_messages'],
    num_rows: 988
})

In [45]:
lima_data_paraphrased["paraphrased_messages"][0]

[{'content': "Are brain cells capable of moving? Specifically, I'm referring to long-distance migration, ideally occurring within the brain.",
  'role': 'user'},
 {'content': 'The inquiry is quite extensive, and it’s important to recognize that the brain is made up not only of neurons but also of glial cells (supporting cells) and pre-mitotic neuronal stem cells. Additionally, as critical colleagues in research have pointed out, the developmental stage is crucial, since the developing embryonic brain is significantly different from the fully developed adult brain. Nonetheless, after reviewing various studies, the answer to the question turns out to be surprisingly straightforward: Yes, brain cells do migrate. In the adult brain, glial cells are known to migrate (Klämbt, 2009). These glial cells perform numerous functions, with a prominent example being oligodendrocytes, which migrate over considerable distances to locate their target axons and encase them to create the insulating myeli

In [38]:
test = True
for row in lima_data_paraphrased:
    test = test and (row["id"] == row["paraphrased_id"])
    
test

True

In [41]:
lima_data_paraphrased = lima_data_paraphrased.remove_columns("dataset")
lima_data_paraphrased = lima_data_paraphrased.remove_columns("paraphrased_id")

ValueError: Column name ['paraphrased_id'] not in the dataset. Current columns in the dataset: ['id', 'messages', 'paraphrased_messages']

In [46]:
lima_data_paraphrased

Dataset({
    features: ['id', 'messages', 'paraphrased_messages'],
    num_rows: 988
})

In [47]:
lima_data_paraphrased.save_to_disk(lima_paraphrased_dataset_path)

Saving the dataset (0/1 shards):   0%|          | 0/988 [00:00<?, ? examples/s]

In [48]:
print(lima_data_paraphrased["messages"][0][0]["content"])

Can brain cells move? By movement I mean long distance migration (preferably within the brain only).


In [49]:
print(lima_data_paraphrased["messages"][0][1]["content"])

The question is relatively broad and one should take into account that the brain not only consists of neurons, but also glial cells (supportive cells) and pre-mitotic neuronal stem cells. Furthermore, as critical fellow-scientists have indicated, developmental stage is very important, as the developing embryonic brain is very different from the adult brain.
However, after sifting through various publications, the answer to the question is actually remarkably simple: Yes, brain cells migrate.
In  the adult brain glial cells migrate in the brain (Klämbt, 2009). Glial cells are involved in a myriad of functions, but a notable example of migrating glial cells are the oligodendrocytes that migrate relative long distances to find their target axons onto which they wrap themselves to form the insulating myelin sheath (Tsai and Miller, 2002).
Neuronal stem cells migrate over long distances in response to injury (Imitola et al., 2004) and they migrate from specific stem-cell locations (e.g., hi