In [None]:
from peft import PromptTuningConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the model and tokenizer
model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define soft prompt configuration
prompt_config = PromptTuningConfig(
    task_type="TEXT_CLASSIFICATION",  # Task type
    num_virtual_tokens=20,            # Number of trainable prompt tokens
    tokenizer_name_or_path=model_name,
)

# Add soft prompts to the model
peft_model = PeftModel(model, prompt_config)


In [None]:
from transformers import Trainer, TrainingArguments
from datasets import load_dataset

# Load a dataset
dataset = load_dataset("imdb")

# Preprocess the dataset
def preprocess_function(examples):
    return tokenizer(
        examples["text"], truncation=True, padding="max_length", max_length=512
    )

tokenized_dataset = dataset.map(preprocess_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./soft_prompt_tuning",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    learning_rate=5e-4,  # Higher learning rate for soft prompts
    logging_dir="./logs",
)

# Create a Trainer for the soft prompts
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
)

# Train the soft prompts
trainer.train()

# Save the trained soft prompts
peft_model.save_pretrained("./soft_prompt_tuning")
