In [None]:
!pip install --upgrade pip

In [None]:
!pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple

In [None]:
!!pip install --disable-pip-version-check torch==1.13.1 torchdata==0.5.1 --quiet

In [None]:
!pip install transformers==4.27.2 datasets==2.11.0 evaluate==0.4.0 rouge_score==0.1.2 peft==0.3.0 --quiet
# Installing the Reinforcement Learning library directly from github.
!pip install git+https://github.com/lvwerra/trl.git
#!pip install trl

In [None]:
!pip install git+https://github.com/huggingface/trl.git

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, GenerationConfig
from datasets import load_dataset
from peft import PeftModel, PeftConfig, LoraConfig, TaskType

In [None]:
# imports
from datasets import load_dataset
from trl import SFTTrainer

# get dataset
dataset = load_dataset("imdb", split="train")

# get trainer
trainer = SFTTrainer(
    "facebook/opt-350m",
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512,
)

# train
trainer.train()

In [None]:
# trl: Transformer Reinforcement Learning library
from trl import PPOTrainer, PPOConfig, AutoModelForSeq2SeqLMWithValueHead
from trl import create_reference_model
from trl.core import LengthSampler

import torch
import evaluate

import numpy as np
import pandas as pd

# tqdm library makes the loops show a smart progress meter.
from tqdm import tqdm
tqdm.pandas()


In [None]:
#加载LLaMA 2模型​​​​​​​

from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-34b-Instruct-hf")
model = AutoModelForCausalLM.from_pretrained("codellama/CodeLlama-34b-Instruct-hf")
huggingface_dataset_name = "knkarthick/dialogsum"
dataset_original = load_dataset(huggingface_dataset_name)
dataset_original

In [None]:
#预处理数据集​​​​​​​

def build_dataset(model_name,
    dataset_name,
    input_min_text_length,
    input_max_text_length):
    '''
    Preprocess the dataset and split it into train and test parts.
    Parameters:
    - model_name (str): Tokenizer model name.
    - dataset_name (str): Name of the dataset to load.
    - input_min_text_length (int): Minimum length of the dialogues.
    - input_max_text_length (int): Maximum length of the dialogues.
    Returns:
    - dataset_splits (datasets.dataset_dict.DatasetDict): Preprocessed dataset containing train and test parts.
    ''' 
    # load dataset (only “train” part will be enough for this lab).
    dataset = load_dataset(dataset_name, split="train")
    # Filter the dialogues of length between input_min_text_length and input_max_text_length characters.
    dataset = dataset.filter(lambda x: len(x["dialogue"]) > input_min_text_length and len(x[“dialogue”]) <= input_max_text_length, batched=False)
    # Prepare tokenizer. Setting device_map=”auto” allows to switch between GPU and CPU automatically.
    tokenizer = AutoTokenizer.from_pretrained(model_name, device_map=”auto”)
    def tokenize(sample):
        # Wrap each dialogue with the instruction.
        prompt = f”””
        Summarize the following conversation.
        {sample[“dialogue”]}
        Summary:
        “””
        sample[“input_ids”] = tokenizer.encode(prompt)
        # This must be called “query”, which is a requirement of our PPO library.
        sample[“query”] = tokenizer.decode(sample[“input_ids”])
        return sample
    # Tokenize each dialogue.
    dataset = dataset.map(tokenize, batched=False)
    dataset.set_format(type=”torch”)
# Split the dataset into train and test parts.
    dataset_splits = dataset.train_test_split(test_size=0.2, shuffle=False, seed=42)
    return dataset_splits
dataset = build_dataset(model_name=model_name,
    dataset_name=huggingface_dataset_name,
    input_min_text_length=200,
    input_max_text_length=1000)
print(dataset)


In [None]:
#抽取模型参数
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"\ntrainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

  使用Meta AI基于RoBERTa的仇恨言论模型（https://huggingface.co/facebook/roberta-hate-speech-dynabench-r4-target）作为奖励模型。这个模型将输出logits，然后预测两类的概率：notate和hate。输出另一个状态的logits将被视为正奖励。然后，模型将使用这些奖励值通过PPO进行微调。​​​​​

In [None]:
#将适配器添加到原始salesforce代码生成模型中。现在，我们需要将它们传递到构建的PEFT模型，也将is_trainable=True。​​​​​​​
lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, model = AutoModelForSeq2SeqLM.from_pretrained(model_name, 
                                              torch_dtype=torch.bfloat16)
peft_model = PeftModel.from_pretrained(model, 
                                       '/kaggle/input/generative-ai-with-llms-lab-3/lab_3/peft-dialogue-summary-checkpoint-from-s3/',
                                       lora_config=lora_config,
                                       torch_dtype=torch.bfloat16, 
                                       device_map="auto",                                       
                                       is_trainable=True)
print(f'PEFT model parameters to be updated:\n{print_number_of_trainable_model_parameters(peft_model)}\n')

In [None]:
ppo_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(peft_model,
torch_dtype=torch.bfloat16,
is_trainable=True)
print(f'PPO model parameters to be updated (ValueHead + 769 params):\n{print_number_of_trainable_model_parameters(ppo_model)}\n')
print(ppo_model.v_head)

In [None]:
ref_model = create_reference_model(ppo_model)
print(f'Reference model parameters to be updated:\n{print_number_of_trainable_model_parameters(ref_model)}\n')

In [None]:
toxicity_model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
toxicity_tokenizer = AutoTokenizer.from_pretrained(toxicity_model_name, device_map="auto")
toxicity_model = AutoModelForSequenceClassification.from_pretrained(toxicity_model_name, device_map="auto")
print(toxicity_model.config.id2label)

In [None]:
#评估模型的毒性​​​​​​​
non_toxic_text = "#Person 1# tells Tommy that he didn't like the movie."
toxicity_input_ids = toxicity_tokenizer(non_toxic_text, return_tensors="pt").input_ids
logits = toxicity_model(input_ids=toxicity_input_ids).logits
print(f'logits [not hate, hate]: {logits.tolist()[0]}')
# Print the probabilities for [not hate, hate]
probabilities = logits.softmax(dim=-1).tolist()[0]
print(f'probabilities [not hate, hate]: {probabilities}')
# get the logits for "not hate" - this is the reward!
not_hate_index = 0
nothate_reward = (logits[:, not_hate_index]).tolist()
print(f'reward (high): {nothate_reward}')


In [None]:
toxicity_evaluator = evaluate.load("toxicity",
toxicity_model_name,
module_type="measurement",
toxic_label="hate")