In [None]:
%pip install -q transformers accelerate

In [1]:
model_name = "NousResearch/Hermes-2-Theta-Llama-3-8B"
system_prompt = "You are a programmer that takes in sparse descriptions of a psychology experiment from a scientific paper and generates a PsychoPy Psyexp experiment file that can execute the described experiment."
query = "In this task participants are required to memorise and recall number series in order. Participants start out with three 3-digit sequences. If participants correctly recall 2 out of 3 three sequences, they progress to 4-digit sequence trials and so on. If participants respond incorrectly on 2/3 trials the experiment terminantes. This experiment is based on the original digit span experiment by Jacobs (1887)."

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from os import environ

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
environ["HIP_VISIBLE_DEVICES"]="0"

use_cuda = torch.cuda.is_available()
if use_cuda:
    print('__CUDNN VERSION:', torch.backends.cudnn.version())
    print('__Number CUDA Devices:', torch.cuda.device_count())
    count = torch.cuda.device_count()

__CUDNN VERSION: 3001000
__Number CUDA Devices: 1


In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto"
)

Downloading shards: 100%|██████████| 4/4 [02:24<00:00, 36.03s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:40<00:00, 10.03s/it]


In [8]:
message = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": query}
]

In [6]:
tokenizer.apply_chat_template(message, tokenize=False)

'<|begin_of_text|><|im_start|>system\nYou are a programmer that takes in sparse descriptions of a psychology experiment from a scientific paper and generates a PsychoPy Psyexp experiment file that can execute the described experiment.<|im_end|>\n<|im_start|>user\nIn this task participants are required to memorise and recall number series in order. Participants start out with three 3-digit sequences. If participants correctly recall 2 out of 3 three sequences, they progress to 4-digit sequence trials and so on. If participants respond incorrectly on 2/3 trials the experiment terminantes. This experiment is based on the original digit span experiment by Jacobs (1887).<|im_end|>\n'

In [9]:
input_ids = tokenizer.apply_chat_template(
    message,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [10]:
outputs = model.generate(
    input_ids,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    max_new_tokens=1024,
)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [11]:
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True, clean_up_tokenization_spaces=True))

Here is a Python script that generates a PsychoPy experiment file for the described experiment:

```python
from psychopy import visual, core, data, event
from psychopy.tools.filetools import fromFile, toFile
import random

# Experiment settings
max_sequence_length = 6  # Maximum length of sequences
min_sequence_length = 3  # Minimum length of sequences
max_trials_per_length = 3  # Maximum number of trials per sequence length
max_incorrect_trials = 2  # Maximum number of incorrect trials allowed

# Window settings
win = visual.Window(fullscr=True, color=[1, 1, 1], units='deg')

# Initialize experiment
exp = data.ExperimentHandler(name='Digit Span Experiment', version='1.0')

# Instructions
instr_text = '''
Welcome to the Digit Span Experiment!

In this experiment, you will be presented with a series of numbers. 
Please try to memorize the sequence and then recall it in the correct order.
'''

instr_text = visual.TextStim(win, text=instr_text, color=[0, 0, 0], height=20)
instr_text.draw(