In [7]:
pip install peft setuptools

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting setuptools
  Downloading setuptools-70.0.0-py3-none-any.whl.metadata (5.9 kB)
Downloading setuptools-70.0.0-py3-none-any.whl (863 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m863.4/863.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: setuptools
Successfully installed setuptools-70.0.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "bigscience/bloomz-560m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True
)

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

### See how the model responses when telling it to act like particular role(s).

In [67]:
def generate_text(model, tokenizer, prompt_text, max_tokens):
    prompt_text = tokenizer(prompt_text, return_tensors="pt")
    outputs = model.generate(
        input_ids=prompt_text["input_ids"],
        attention_mask=prompt_text["attention_mask"],
        max_length=max_tokens,
        repetition_penalty=1.5,
        eos_token_id=tokenizer.eos_token_id
    )
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)


#initial_output = generate_text(model, tokenizer, "I want you to act as a logistician. ", 100)
#initial_output = generate_text(model, tokenizer, "I want you to act as a physician. ", 200)
initial_output = generate_text(model, tokenizer, "I want you to act as a Linux terminal. ", 100)
print("Initial model output:", initial_output)

Initial model output: ['I want you to act as a Linux terminal.  This is the only way I can do this.']


In [61]:
from datasets import load_dataset

dataset_prompt = "fka/awesome-chatgpt-prompts"


data_prompt = load_dataset(dataset_prompt)
data_prompt = data_prompt.map(lambda x: tokenizer(x["prompt"]), batched=True)
train_prompts = data_prompt["train"].select(range(50))

### Tune with Soft Prompts!

In [9]:
from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit

tuning_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM, #This type indicates the model will generate text.
    prompt_tuning_init=PromptTuningInit.RANDOM,  #The added virtual tokens are initialized with random numbers
    num_virtual_tokens=4, #Number of virtual tokens to be added and trained.
    tokenizer_name_or_path=model_name
)

peft_model = get_peft_model(model, tuning_config) #IT'S USING PEFT!

In [10]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    use_cpu=True, # This is necessary for CPU clusters.
    output_dir = "./",
    auto_find_batch_size=True, # Find a suitable batch size that will fit into memory automatically, you can also use a custom batch size
    learning_rate= 0.005,
    num_train_epochs=5 
    )

In [30]:
from transformers import Trainer, DataCollatorForLanguageModeling
trainer = Trainer(
    model=peft_model, # We pass in the PEFT version of the foundation model, bloomz-560M
    args=training_args, #The args for the training.
    train_dataset=train_prompts, #The dataset used to train the model.
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False) # mlm=False indicates not to use masked language modeling
    )
trainer.train()

  0%|          | 0/35 [00:00<?, ?it/s]

{'train_runtime': 708.3699, 'train_samples_per_second': 0.353, 'train_steps_per_second': 0.049, 'train_loss': 3.490439060756138, 'epoch': 5.0}


TrainOutput(global_step=35, training_loss=3.490439060756138, metrics={'train_runtime': 708.3699, 'train_samples_per_second': 0.353, 'train_steps_per_second': 0.049, 'total_flos': 50454909222912.0, 'train_loss': 3.490439060756138, 'epoch': 5.0})

### Look at the results now after fine tuning!

In [68]:
#tuned_output = generate_text(trainer.model, tokenizer, "I want you to act as a logistician. ", 100)
tuned_output = generate_text(trainer.model, tokenizer, "I want you to act as a physician. ", 200)
#tuned_output = generate_text(trainer.model, tokenizer, "I want you to act as a Linux terminal. ", 300)
print("Tuned model output:", tuned_output)

Tuned model output: ['I want you to act as a physician.  You will be responsible for diagnosing and treating patients.']
