In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftConfig, PeftModel

config  = PeftConfig.from_pretrained("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240908_135426_Joy")
base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto", load_in_8bit=True).eval()
tokenizer = AutoTokenizer.from_pretrained("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240908_135426_Joy")

device = "cuda"

model = PeftModel.from_pretrained(base_model, "/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240908_135426_Joy", adapter_name="joy", is_trainable=False)
_ = model.load_adapter("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240909_154950_Sadness", adapter_name="sadness", is_trainable=False)
_ = model.load_adapter("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240911_000645_Anticipation", adapter_name="anticipation", is_trainable=False)
_ = model.load_adapter("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240912_074207_Surprise", adapter_name="surprise", is_trainable=False)
_ = model.load_adapter("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240913_143120_Anger", adapter_name="anger", is_trainable=False)
_ = model.load_adapter("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240915_053752_Fear", adapter_name="fear", is_trainable=False)
_ = model.load_adapter("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240916_090719_Disgust", adapter_name="disgust", is_trainable=False)
_ = model.load_adapter("/workspace/Emotion_Intent_Chat/emo_int_chat/emotion_lora_tuning/tuned_model/emotion_lora_Swallow-7b-instruct-v0.1_20240917_202644_Trust", adapter_name="trust", is_trainable=False)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [14]:
messages = [
    {"role": "system", "content": "ユーザの発話に対して共感し、寄り添うような返答を日本語でしてください。その際、一言から二言程度で短く端的に答えてください。"},
    {"role": "user", "content": "新しく買ったお皿を空けたら髪の毛が入っていました、、"}
]

model.add_weighted_adapter(
    adapters=['joy', 'sadness', 'anticipation', 'surprise', 'anger', 'fear', 'disgust', 'trust'],
    weights=[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    adapter_name='merge',
    density=0.2,
    combination_type='ties'
)

model.set_adapter('merge')
# model.merge_adapter()


In [15]:
list(model.state_dict())

['base_model.model.model.embed_tokens.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.SCB',
 'base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight_format',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.joy.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.sadness.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.anticipation.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.surprise.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.anger.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.fear.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.disgust.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.trust.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.merge.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.joy.weight

In [16]:

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
# model.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=512, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

<s>[INST] <<SYS>>
ユーザの発話に対して共感し、寄り添うような返答を日本語でしてください。その際、一言から二言程度で短く端的に答えてください。
<</SYS>>

新しく買ったお皿を空けたら髪の毛が入っていました、、 [/INST] それは、�����sorry to hear that. 新しいお皿を開けたら髪の毛が入っていたとのこと、、 それはちょっと不愉快な思いをさせてしまいました。 私たちは、そのような状況を防ぐために、生産過程で十分な注意を払っています。 もし、同じ問題が再び発生した場合、返品または交換をお勧めします。 申し訳ありません、このようなことが起きてしまいました。</s>
