In [1]:
sentences = ["Read the following sentence, then determine whether you return to the starting point.\n\nIf you follow these instructions, do you return to the starting point? Take 9 steps. Take 9 steps. Take 4 steps. Turn right.\nOptions:\n- Yes\n- No\n\nAnswer:\n"]
sentences

['Read the following sentence, then determine whether you return to the starting point.\n\nIf you follow these instructions, do you return to the starting point? Take 9 steps. Take 9 steps. Take 4 steps. Turn right.\nOptions:\n- Yes\n- No\n\nAnswer:\n']

In [2]:
model_id = "microsoft/phi-2"

## Running from Python

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"

inputs = tokenizer(sentences, return_tensors="pt", padding=True).to(device)
generate_ids = model.generate(**inputs, max_length=500)
outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

_ = [print(o, "\n") for o in outputs]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Read the following sentence, then determine whether you return to the starting point.

If you follow these instructions, do you return to the starting point? Take 9 steps. Take 9 steps. Take 4 steps. Turn right.
Options:
- Yes
- No

Answer:
To solve this question, we need to keep track of the number of steps taken and the direction of each turn.

Starting from the initial position, we take 9 steps forward. Then, we take another 9 steps forward. Next, we take 4 steps forward. Finally, we turn right.

Since we have taken a total of 9 + 9 + 4 = 22 steps and turned right, we do not return to the starting point.


Complete detailed textbook-level python code solutions
```python
# Initialize variables
steps_taken = 0
direction = 0  # 0: North, 1: East, 2: South, 3: West

# Take 9 steps forward
steps_taken += 9

# Take 9 steps forward
steps_taken += 9

# Take 4 steps forward
steps_taken += 4

# Turn right
direction = (direction + 1) % 4

# Check if returned to starting point
if steps_taken ==

In [5]:
from peft import (
    PromptTuningConfig,
    PromptTuningInit,
    PeftConfig,
    PeftModel,
    TaskType,
    get_peft_model,
)

In [6]:
initial_instruction = (
    "Read the following sentence, then determine whether you return to the starting point."
)

peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    prompt_tuning_init_text=initial_instruction,
    tokenizer_name_or_path=model_id,
)

In [7]:
peft_model = get_peft_model(model, peft_config)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [8]:
inputs = tokenizer(sentences, return_tensors="pt", padding=True).to(device)
generate_ids = peft_model.generate(**inputs, max_length=500)
outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [9]:
print(outputs[0])

Read the following sentence, then determine whether you return to the starting point.

If you follow these instructions, do you return to the starting point? Take 9 steps. Take 9 steps. Take 4 steps. Turn right.
Options:
- Yes
- No

Answer:
To solve this question, we need to keep track of the number of steps taken and the direction of each turn.

Starting from the initial position, we take 9 steps forward. Then, we take another 9 steps forward. Finally, we take 4 steps forward.

Since we have taken a total of 22 steps forward, we do not return to the starting point.


Complete detailed textbook-level python code solutions
```python
# Initialize variables
steps_taken = 0
direction = "forward"

# Take 9 steps forward
steps_taken += 9

# Take 9 steps forward
steps_taken += 9

# Take 4 steps forward
steps_taken += 4

# Check if steps_taken is equal to 0
if steps_taken == 0:
    print("Yes")
else:
    print("No")



In [10]:
tokenizer.pad_token_id, tokenizer.eos_token_id

(50256, 50256)

In [11]:
text_column = "text"
label_column = "label"
max_length = 128
batch_size = 10

In [12]:
from datasets import Dataset
my_dict = {"text": sentences, "label": ["No"]}
hf_dataset = Dataset.from_dict(my_dict)
hf_dataset['label']

['No']

In [13]:
def preprocess_function(examples, tokenizer, prefix, text_column, label_column, max_length):
    batch_size = len(examples[text_column])
    inputs = [f"{prefix}\n\n{x}\n\nAnswer:\n" for x in examples[text_column]]
    targets = [str(x) for x in examples[label_column]]
    model_inputs = tokenizer(inputs)
    labels = tokenizer(targets)
    for i in range(batch_size):
        # concat the inputs and labels, mask the inputs part, and update the
        # attention mask to match the new length (inputs + labels + pad_token_id)
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
        # print(i, sample_input_ids, label_input_ids)
        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
        # masks / ignores -100 tokens in the loss: https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html#crossentropyloss
        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
    # print(model_inputs)
    for i in range(batch_size):
        # pad or truncate the batch to the specified max_length, and update the attention mask
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i]
        model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
            max_length - len(sample_input_ids)
        ) + sample_input_ids
        model_inputs["attention_mask"][i] = [0] * (
            max_length - len(sample_input_ids)
        ) + model_inputs["attention_mask"][i]
        labels["input_ids"][i] = [-100] * (
            max_length - len(sample_input_ids)
        ) + label_input_ids
        model_inputs["input_ids"][i] = torch.tensor(
            model_inputs["input_ids"][i][:max_length]
        )
        model_inputs["attention_mask"][i] = torch.tensor(
            model_inputs["attention_mask"][i][:max_length]
        )
        labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


In [14]:
processed_datasets = hf_dataset.map(
        preprocess_function,
        batched=True,
        num_proc=1,
        remove_columns=hf_dataset.column_names,
        load_from_cache_file=False,
        desc="Running tokenizer on dataset",
        fn_kwargs={
            "tokenizer": tokenizer,
            "prefix": initial_instruction,
            "text_column": text_column,
            "label_column": label_column,
            "max_length": max_length,
        },
    )

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ? examples/s]

In [15]:
processed_datasets

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 1
})

In [16]:
from torch.utils.data import DataLoader
from transformers import default_data_collator

dataloader = DataLoader(
    processed_datasets,
    shuffle=True,
    collate_fn=default_data_collator,
    batch_size=batch_size,
    pin_memory=True,
)

In [17]:
def test(dataloader, model, tokenizer, device):
    loss = 0
    preds = []
    for batch in tqdm(dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        loss += loss.detach().float()
        preds.extend(
            tokenizer.batch_decode(
                torch.argmax(outputs.logits, -1).detach().cpu().numpy(),
                skip_special_tokens=True,
            )
        )

    loss = loss / len(dataloader)
    return loss

In [18]:
batch_of_one = next(iter(dataloader))

In [19]:
batch_of_one

{'input_ids': tensor([[50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
          50256, 50256, 50256, 50256, 50256,  5569,   262,  1708,  6827,    11,
            788,  5004,  1771,   345,  1441,   284,   262,  3599,   966,    13,
            198,   198,  5569,   262,  1708,  6827,    11,   788,  5004,  1771,
            345,  1441,   284,   262,  3599,   966,    13,   198,   198,  1532,
            345,  1061,   777,  7729,    11,   466,   345,  1441,   284,   262,
           3599,   966,    30,  7214,   860,  4831,    13,  7214,   860,  4831,
             13,  7214,   604,  4831,    13,  6756,   826,    13,   198, 29046,
             25,   198,    12,  3363,   198,    12,  1400,   198,   198, 33706,
             25,   628,   1

In [20]:
batch = {k: v.to(device) for k, v in batch_of_one.items()}
with torch.no_grad():
    outputs = model(**batch)
loss = outputs.loss
loss += loss.detach().float()

tokenizer.batch_decode(
    torch.argmax(outputs.logits, -1).detach().cpu().numpy(),
    skip_special_tokens=True,
)

['\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n the following passage and and answer the it think to the starting point or\nInputSent the following sentence, then determine whether you return to the starting point.\nAlwaysSent you follow these instructions, do you return to the starting point?\n 4 steps. Turn 2 steps. Take 9 steps. Take left. Take\n:\n- Yes\n- No\n\nSolution:\n\nComplete: Yes\n.\n\n']