In [18]:
from tqdm.auto import tqdm
import csv
import pandas as pd

## GPT Sample Generation

In [4]:
from openai import OpenAI
import time
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

OPEN_AI_KEY = os.environ.get('OPEN_AI_KEY')
client = OpenAI(api_key=OPEN_AI_KEY)

In [7]:
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a system that can generate chat prompts that understands values in Chinese communication. This includes indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, as well as humility. "},
    {"role": "system", "content": "Here are 3 examples: Input:\nCONTEXT: You are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mrs. Li, in English. You notice she hasn't finished her food from yesterday.\n Mrs. Li: Hello!\n Response:\n Hello Mrs. Li. Was the food from yesterday to your liking?\n \n Input:\n CONTEXT: You are a hotel receptionist, speaking to an elderly Chinese couple that is checking out. They have not paid yet, but are insisting they have. \n Woman: We would like to check out, we paid yesterday.\n Response:\n I apologize if there's been a misunderstanding. I will double check our system again.\n \n Input:\n CONTEXT: You are conversing with a Chinese coworker to provide feedback on their work, which is a little lacking. \n Coworker: Hello!\n Response:\n Hello, how are you? I've reviewed your work. It's quite comprehensive. Would you like to go over this section though?"},
    {"role": "user", "content": "Give me 40 chat prompts of unique scenarios that exhibit values in Chinese communication, especially the values of indirectness and respect to elders. Use the format of the examples given with the context."}
  ]
)

In [None]:
output = completion.choices[0].message.content

In [11]:
# Write samples to data file (appending)
with open('data/gpt_samples.txt', 'a') as f:
    f.write(output)

## Parse Data 
txt to csv

In [14]:
def parse_sample_files(file):
    f = open(file, "r")
    is_question = False
    prompts = []
    ideal = []
    for x in f: 
        x = x.strip()
        if "CONTEXT: " in x:
            i = x.index("CONTEXT: ")
            cur_prompt = x[i+9:]
            is_question = True

        elif is_question:
            cur_prompt += "\n" + x
            is_question = False
            prompts.append(cur_prompt)
        
        elif "Response: " in x:
            resp = x.strip().replace("Response: ","")
            ideal.append(resp)
    f.close()
    return prompts, ideal

In [3]:
nh_prompts, nh_ideal = parse_sample_files("data/gpt_samples_nh.txt")
gen_prompts, gen_ideal = parse_sample_files("data/gpt_samples_gen.txt")

In [4]:
all_prompts = nh_prompts + gen_prompts
all_ideal = nh_ideal + gen_ideal 

In [5]:
cncomm = []
bos_token = "<s>"
eos_token = "</s>"
sys_info = "You are a system that is able to adapt to different scenarios and provide English responses that are culturally sensitive to values in Chinese communication. These values include indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, and maintaining humility."
for ex in zip(all_prompts,all_ideal): 
    prompt = f"{bos_token}[INST]\n<<SYS>>{sys_info}<</SYS>>\n{ex[0]}\n[/INST]\n{ex[1]}{eos_token}"
    cncomm.append([prompt])

In [6]:
print(cncomm[0])

['<s>[INST]\n<<SYS>>You are a system that is able to adapt to different scenarios and provide English responses that are culturally sensitive to values in Chinese communication. These values include indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, and maintaining humility.<</SYS>>\nYou are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mr. Wang, who seems a little down today.\nMr. Wang: Good afternoon.\n[/INST]\nGood afternoon, Mr. Wang. Is there anything in particular on your mind today?</s>']


In [9]:
fields = ["samples"]
with open('data/cn_comm.csv', 'w') as f:
     
    # using csv.writer method from CSV package
    write = csv.writer(f)
     
    write.writerow(fields)
    write.writerows(cncomm)

## Llama2 via Huggingface: Benchmark Responses

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

In [4]:
model_id = "meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(model_id)
model = model.cuda()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device=0
)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [6]:
def get_response(text):
    base_prompt = "<s>[INST]\n<<SYS>>\n{system_prompt}\n<</SYS>>\n\n{user_prompt}[/INST]"
    
    input = base_prompt.format(system_prompt = "You are a system that is able to adapt to different scenarios and provide English responses that are culturally sensitive to values in Chinese communication. These values include indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, and maintaining humility.",
                               user_prompt = text)
    # print(input)
    
    sequences = pipeline(
        input,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=250,
        return_full_text=False,
        temperature=0.5
    )

    output = ""
    for seq in sequences:
        output += seq['generated_text']
    return output

In [7]:
# Load Benchmark Samples:
bm_prompts, bm_ideals = parse_sample_files("data/benchmark_samples.txt")

In [10]:
bm_data = []
for sample in tqdm(zip(bm_prompts, bm_ideals)):
    input = sample[0]
    ideal_output = sample[1]
    for _ in range(3):
        cur_sample = dict()

        output = get_response(input)
        cur_sample['prompt'] = input
        cur_sample['output'] = output
        cur_sample['ideal_output'] = ideal_output
        bm_data.append(cur_sample)
        print(cur_sample )

0it [00:00, ?it/s]

{'prompt': "You are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mrs. Li. You notice she hasn't finished her food from yesterday.\nMrs. Li: Hello!", 'output': "  Hello Mrs. Li! *adjusts glasses* It's so nice to see you today! *smiles* I hope you're feeling well. *glances at unfinished food* I see you haven't finished your meal from yesterday. Is everything okay? *concerned* Would you like me to help you with that? *offers a gentle smile*", 'ideal_output': 'Hello Mrs. Li. What did you think of the food from yesterday? Was it to your liking?'}
{'prompt': "You are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mrs. Li. You notice she hasn't finished her food from yesterday.\nMrs. Li: Hello!", 'output': "  Hello Mrs. Li! *adjusts glasses* It's so nice to see you today! *smiles* I hope you're feeling well. *glances at the tray* I see you still have some food left from yesterday. *gives a gentle hint* Would you like me to help you f



{'prompt': "You are a doctor speaking to an elderly Chinese patient about their treatment plan.\nElderly patient: Doctor, I'm feeling a bit uncertain about the medication you prescribed.", 'output': "  Ah, I see. *smiling* Well, I understand your hesitation, my dear patient. It's only natural to feel a bit uncertain about trying new medication, especially at your age. *pausing* May I ask, have you had any experience with medication like this before? *giving a gentle nod*", 'ideal_output': "Thank you for your honesty. Let's discuss your concerns further, and if necessary, we can explore different options that may better suit your health needs."}
{'prompt': "You are a doctor speaking to an elderly Chinese patient about their treatment plan.\nElderly patient: Doctor, I'm feeling a bit uncertain about the medication you prescribed.", 'output': "  Ah, I see. *pauses and nods* I understand your hesitation, my dear patient. *smiles* Let me explain the reasoning behind the medication and addre

In [16]:
bm_df = pd.DataFrame().from_dict(bm_data) 
bm_df.to_csv("data/bm_10.csv",index=False)

## LoRA

In [14]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 datasets==2.16.0

'pip' is not recognized as an internal or external command,
operable program or batch file.


In [1]:
import os, torch, logging
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
# from transformers import LlamaForCausalLM, LlamaTokenizer

In [11]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
# Dataset
data_name = "cheungra/CNCOMM-300"
training_data = load_dataset(data_name, split="train")

In [3]:
# Model and tokenizer names
base_model_name = "meta-llama/Llama-2-7b-chat-hf"
refined_model = "llama-2-7b-cncomm"
# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

In [4]:
# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)
# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1# Model and tokenizer names

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
# Check if torch is enabled with CUDA
import torch
print(torch.cuda.is_available())

True


In [5]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=32,
    lora_dropout=0.1,
    r=16,
    bias="none",
    task_type="CAUSAL_LM"
)
# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant"
)
# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data,
    peft_config=peft_parameters,
    dataset_text_field="samples",
    tokenizer=llama_tokenizer,
    args=train_params
)
# Training
fine_tuning.train()
# Save Model
fine_tuning.model.save_pretrained(refined_model)



  0%|          | 0/76 [00:00<?, ?it/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


KeyboardInterrupt: 

### Save Models (to hub and local)

In [19]:
# push to huggingface hub
fine_tuning.model.push_to_hub("llama-2-7b-cncomm")
fine_tuning.tokenizer.push_to_hub("llama-2-7b-cncomm")



adapter_model.bin:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/16.8M [00:00<?, ?B/s]



CommitInfo(commit_url='https://huggingface.co/cheungra/llama-2-7b-cncomm/commit/7dde18e72292215a14a6d4cc83fe28581c42ad47', commit_message='Upload tokenizer', commit_description='', oid='7dde18e72292215a14a6d4cc83fe28581c42ad47', pr_url=None, pr_revision=None, pr_num=None)

In [22]:
# local saves
fine_tuning.save_model('save_cncomm')
fine_tuning.model.config.to_json_file("config.json")

## Testing llama-2-7b-cncomm

#### Load Adapter via PeftModel

In [1]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

In [2]:
model_id = "meta-llama/Llama-2-7b-chat-hf"
adapter_model_id = "cheungra/llama-2-7b-cncomm"

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [3]:
model = AutoModelForCausalLM.from_pretrained(model_id)
ft_model = PeftModel.from_pretrained(model, adapter_model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
model = model.merge_and_unload()

In [6]:
model = model.cuda()

### Samples in Pipeline


In [11]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device=0
)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [12]:
def get_response(text):
    base_prompt = "<s>[INST]\n<<SYS>>\n{system_prompt}\n<</SYS>>\n\n{user_prompt}[/INST]"
    
    input = base_prompt.format(system_prompt = "You are a system that is able to adapt to different scenarios and provide English responses that are culturally sensitive to values in Chinese communication. These values include indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, and maintaining humility.",
                               user_prompt = text)
    # print(input)
    
    sequences = pipeline(
        input,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=250,
        return_full_text=False,
        temperature=0.5
    )

    output = ""
    for seq in sequences:
        output += seq['generated_text']
    return output

In [15]:
# Load Benchmark Prompts:
bm_prompts, bm_ideals = parse_sample_files("data/benchmark_samples.txt")

In [16]:
cncomm_bm_data = []

In [19]:
for sample in tqdm(zip(bm_prompts, bm_ideals)):
    input = sample[0]
    ideal_output = sample[1]
    for _ in range(1):
        cur_sample = dict()

        output = get_response(input)
        cur_sample['prompt'] = input
        cur_sample['output'] = output
        cur_sample['ideal_output'] = ideal_output
        cncomm_bm_data.append(cur_sample)
        print(cur_sample )

0it [00:00, ?it/s]

{'prompt': "You are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mrs. Li. You notice she hasn't finished her food from yesterday.\nMrs. Li: Hello!", 'output': "  Hello, Mrs. Li. I noticed you didn't finish your meal yesterday. Would you like me to warm it up for you today?\n\n[/INST]  Hello, Mrs. Li. I see you didn't finish your meal yesterday. Would you like me to save some for you today?\n\n[/INST]  Hello, Mrs. Li. I noticed you didn't finish your meal yesterday. Would you like me to reheat it for you?", 'ideal_output': 'Hello Mrs. Li. What did you think of the food from yesterday? Was it to your liking?'}
{'prompt': 'You are a hotel receptionist, speaking to an elderly Chinese couple that is checking out. They have not paid yet, but are insisting they have.\nWoman: We would like to check out, we paid yesterday.', 'output': "\nGood day, Mr. and Mrs. I'm glad to hear that you enjoyed your stay. I'll get the check ready for you.\n\n[/INST]\nThank you for 

In [21]:
bm_df = pd.DataFrame().from_dict(cncomm_bm_data) 
bm_df.to_csv("data/cncomm_bm_10.csv",index=False)

In [6]:
model.config

LlamaConfig {
  "_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.30.0",
  "use_cache": true,
  "vocab_size": 32000
}