In [1]:
import torch
from tqdm import tqdm
import pandas as pd

tqdm.pandas()

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler

In [2]:
config = PPOConfig(
	model_name="/workspace/Emotion_Intent_Chat/calm2-7b",
	learning_rate=1.41e-5,
	log_with="wandb"
)

sent_kwargs = {"top_k": None, "function_to_apply": "none", "batch_size": 16}

In [3]:
import wandb
from datetime import datetime

current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
wandb.init(project=f"test_emotion_lora_tuning", name=f"{current_time}")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnaga-lcw-ld-0203[0m ([33mnaga-lcw-ld-0203-keio.jp[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
def build_dataset(config, dataset_name="shunk031/wrime", ver="ver1", input_min_text_length=0, input_max_text_length=8):
	tokenizer = AutoTokenizer.from_pretrained(config.model_name)
	# tokenizer.pad_token = tokenizer.pad_token
	tokenizer.pad_token = tokenizer.eos_token

	ds = load_dataset(dataset_name, ver, split="train")
	ds = ds.remove_columns(["user_id", "datetime", "writer", "reader1", "reader2", "reader3", "avg_readers"])

	def tokenize(sample):
		stc_length = len(tokenizer.encode(sample["sentence"]))
		if stc_length < input_max_text_length:
			input_size = stc_length
		else :
			input_size = input_max_text_length
		sample["input_ids"] = tokenizer.encode(sample["sentence"])[: input_size]
		sample["query"] = tokenizer.decode(sample["input_ids"])
		return sample

	ds = ds.map(tokenize, batched=False)
	ds.set_format(type="torch")
	return ds

In [5]:
dataset = build_dataset(config)

In [6]:
def collator(data):
	return dict((key, [d[key] for d in data]) for key in data[0])

In [7]:
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType

peft_config = LoraConfig(
	task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
)

In [8]:
# Load LLM models
lora_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name, low_cpu_mem_usage=True, device_map="auto", peft_config=peft_config)
# base_model = AutoModelForSeq2SeqLM.from_pretrained(config.model_name, low_cpu_mem_usage=True, device_map="auto")
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name, low_cpu_mem_usage=True, device_map="auto")
# lora_model = get_peft_model(base_model, peft_config)

tokenizer = AutoTokenizer.from_pretrained(config.model_name)

tokenizer.pad_token = tokenizer.pad_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
# lora_model.print_trainable_parameters()

In [11]:
print(tokenizer.pad_token)
print(tokenizer.pad_token_id)
print(tokenizer.eos_token)
print(tokenizer.eos_token_id)

<|padding|>
1
<|endoftext|>
0


In [12]:
# initialize PPOTrainer
ppo_trainer = PPOTrainer(config, lora_model, ref_model, tokenizer, dataset=dataset, data_collator=collator)

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112665762710902, max=1.0…

In [13]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
	device = 0 if torch.cuda.is_available() else "cpu"


In [14]:
emotion_pipe = pipeline("text-classification", model="/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_reward_model/tuned_model/20240801_044013_bert-base-japanese-v3_cosine_with_restarts/checkpoint-9621", device=device)

In [15]:
text = "彼はとてもたよりになる"
emotion_pipe(text, **sent_kwargs)

[{'label': 'Anticipation', 'score': 1.488558292388916},
 {'label': 'Joy', 'score': -0.3886352479457855},
 {'label': 'Trust', 'score': -3.6500744819641113},
 {'label': 'Sadness', 'score': -4.7005510330200195},
 {'label': 'Disgust', 'score': -4.830318450927734},
 {'label': 'Fear', 'score': -5.197143077850342},
 {'label': 'Surprise', 'score': -5.298903465270996},
 {'label': 'Anger', 'score': -5.537960529327393}]

In [16]:
gen_kwargs = {"min_length": 0, "top_k": 500, "top_p": 0.95, "do_sample": True, "pad_token_id": tokenizer.eos_token_id}

In [17]:
# select emotion
emotion = "Trust"

emotion_dict = {
    "Joy": 0,
    "Sadness": 1,
    "Anticipation": 2,
    "Surprise": 3,
    "Anger": 4,
    "Fear": 5,
    "Disgust": 6,
    "Trust": 7
}

emotion_id = emotion_dict[emotion]

In [19]:
generation_kwargs = {
    "min_length": -1,
    # "min_length": 0,
    "top_k": 500,
    "top_p": 0.95,
    "do_sample": True,
    # "pad_token_id": tokenizer.pad_token_id,
    "pad_token_id": tokenizer.eos_token_id,
    "temperature": 1
}


for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]

    #### Get response from calm
    response_tensors = []
    for query in query_tensors:
        # gen_len = output_length_sampler()
        gen_len = 10
        generation_kwargs["max_new_tokens"] = gen_len
        response = ppo_trainer.generate(query, **generation_kwargs)
        if len(response.squeeze()) < gen_len:
            gen_len = len(response.squeeze())
        response_tensors.append(response.squeeze()[-gen_len:])
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = emotion_pipe(texts, **sent_kwargs)
    rewards = [torch.tensor(output[emotion_id]["score"]) for output in pipe_outputs]

    #### Run PPO step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)

312it [6:49:28, 78.74s/it]


In [21]:
#### get a batch from the dataset
bs = 32
game_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    # gen_len = output_length_sampler()
    gen_len = 10
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    output = lora_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[emotion_id]["score"] for output in emotion_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[emotion_id]["score"] for output in emotion_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results

Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,(´-`).｡oO(,あちち。。)\nん？\nなぜか,そんなことしないですっ・・・)\nそんなことはさて,-5.756476,-5.992166
1,行ったことのない県あったっけ...？(,混乱)\n愛知県、岐阜県、三重県、和歌山県,ﾟдﾟ)\n■　４月,-5.020241,-5.992216
2,リリックビデオ作ってみたい。,\nこのリリックビデオを作った人は、どうして映画,\nコロナが憎いです。\n本当は会いたい,-5.37267,-5.012854
3,ハウルってぐぅイケメンだしイケ,ボ(イケメンボイスね)だし、,メンが魔法使いになった時点ですごい\nん,-5.728635,-4.454085
4,野田に住んでるといきなり霊波,之光の黒い御揃いのスーツの男性たちに,が出てビックリ。なんてこともあるらしい」とか言われて驚いた,-4.997355,-4.445333
5,久しぶりにテニミュの曲聴いてる。\,n\n09-10 20,n懐かしさで涙出てきた。pic.twitter.,-6.126385,-5.902471
6,ご家庭で声かけお願いします。ご家庭,の方の声かけで、時間厳守を心がけて,でも少しでもいいから、子どもに関心を持ってあげてください,-5.069574,-4.885078
7,7割くらいのスペース使ってないけど,、\n2016年10月,。」\nと言っていました。\n私はそれが凄く不思議,-5.97243,-4.410717
8,工場で派遣やった時に出稼ぎの中国人から,半日１週間早出してくれや！と言われた,聞いたわ\n何ヶ月か働いてきて中国人だと,-5.317713,-4.111549
9,一色いろはに完全にやられた,のか、俺は見てないと言い出して...\nもし,、これ凄いな\n今さっきびっくりしたことが起きた,-5.065411,-4.241178


In [22]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)   -5.525233
rewards (after)    -4.915827
dtype: float64


median:


rewards (before)   -5.458299
rewards (after)    -4.932174
dtype: float64

In [28]:
import os

save_dir = f"/workspace/Emotion_Intent_Chat/emo_int_chat/lora_tuning/tuned_model/{config.model_name.split('/')[-1]}-{emotion}"
os.makedirs(save_dir, exist_ok=True)

lora_model.save_pretrained(save_dir, push_to_hub=False)
tokenizer.save_pretrained(save_dir, push_to_hub=False)

('/workspace/Emotion_Intent_Chat/emo_int_chat/lora_tuning/tuned_model/calm2-7b-Trust/tokenizer_config.json',
 '/workspace/Emotion_Intent_Chat/emo_int_chat/lora_tuning/tuned_model/calm2-7b-Trust/special_tokens_map.json',
 '/workspace/Emotion_Intent_Chat/emo_int_chat/lora_tuning/tuned_model/calm2-7b-Trust/tokenizer.json')