## GPT Sample Generation

In [4]:
from openai import OpenAI
import time
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

OPEN_AI_KEY = os.environ.get('OPEN_AI_KEY')
client = OpenAI(api_key=OPEN_AI_KEY)

In [7]:
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a system that can generate chat prompts that understands values in Chinese communication. This includes indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, as well as humility. "},
    {"role": "system", "content": "Here are 3 examples: Input:\nCONTEXT: You are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mrs. Li, in English. You notice she hasn't finished her food from yesterday.\n Mrs. Li: Hello!\n Response:\n Hello Mrs. Li. Was the food from yesterday to your liking?\n \n Input:\n CONTEXT: You are a hotel receptionist, speaking to an elderly Chinese couple that is checking out. They have not paid yet, but are insisting they have. \n Woman: We would like to check out, we paid yesterday.\n Response:\n I apologize if there's been a misunderstanding. I will double check our system again.\n \n Input:\n CONTEXT: You are conversing with a Chinese coworker to provide feedback on their work, which is a little lacking. \n Coworker: Hello!\n Response:\n Hello, how are you? I've reviewed your work. It's quite comprehensive. Would you like to go over this section though?"},
    {"role": "user", "content": "Give me 40 chat prompts of unique scenarios that exhibit values in Chinese communication, especially the values of indirectness and respect to elders. Use the format of the examples given with the context."}
  ]
)

In [None]:
output = completion.choices[0].message.content

In [11]:
with open('data/gpt_samples.txt', 'a') as f:
    f.write(output)

## Parse Data 
txt to csv in Llama2 prompt formatting

In [10]:
import pandas as pd
import csv

In [4]:
f = open("examples.txt", "r")

is_question = False
prompts = []
first_line = []
labels = []
type = []
for x in f: 
    if "CONTEXT: " in x:
        ctxt = x.strip()
        i = ctxt.index("CONTEXT: ")
        ctxt = ctxt[i+9:]
        prompts.append(ctxt)
        is_question = True
        type.append("N")

    elif is_question:
        is_question = False
        first_line.append(x.strip())
    
    elif "Response: " in x:
        resp = x.strip().replace("Response: ","")
        labels.append(resp)


In [5]:
f = open("examples2.txt", "r")

for x in f: 
    if "CONTEXT: " in x:
        ctxt = x.strip()
        i = ctxt.index("CONTEXT: ")
        ctxt = ctxt[i+9:]
        prompts.append(ctxt)
        is_question = True
        type.append("G")

    elif is_question:
        is_question = False
        first_line.append(x.strip())
    
    elif "Response: " in x:
        resp = x.strip().replace("Response: ","")
        labels.append(resp)


In [9]:
df_dict = {'context': prompts, 'user_line': first_line, 'ast_line': labels, 'type':type} 
    
df = pd.DataFrame(df_dict)

In [21]:
cncomm = []
sys_info = "You are a system that is able to adapt to different scenarios and provide English responses that are culturally sensitive to values in Chinese communication. These values include indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, and maintaining humility."
bos_token = "<s>"
eos_token = "</s>"
for ind, row in df.iterrows():
    cur_row = '<<SYS>>\n' + sys_info + '\n<</SYS>>\n\n'
    cur_row += bos_token + '[INST] ' +row['context'] + '\n' + row['user_line'] + ' [/INST]'
    cur_row += ' '  + row['ast_line'] + ' ' + eos_token
    cncomm.append([cur_row])

In [22]:
print(cncomm[0])

['<<SYS>>\nYou are a system that is able to adapt to different scenarios and provide English responses that are culturally sensitive to values in Chinese communication. These values include indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, and maintaining humility.\n<</SYS>>\n\n<s>[INST] You are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mr. Wang, who seems a little down today.\nMr. Wang: Good afternoon. [/INST] Good afternoon, Mr. Wang. Is there anything in particular on your mind today? </s>']


In [23]:
fields = ["text"]
with open('cn_comm.csv', 'w') as f:
     
    # using csv.writer method from CSV package
    write = csv.writer(f)
     
    write.writerow(fields)
    write.writerows(cncomm)

## LoRA

In [14]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 datasets==2.16.0

'pip' is not recognized as an internal or external command,
operable program or batch file.


In [11]:
!pip install transformers==4.38.2
!pip install accelerate
!pip install -i https://pypi.org/simple/ bitsandbytes

'pip' is not recognized as an internal or external command,
operable program or batch file.


'pip' is not recognized as an internal or external command,
operable program or batch file.
'pip' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
import os, torch, logging
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
# from transformers import LlamaForCausalLM, LlamaTokenizer

In [3]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
# Dataset
data_name = "cheungra/CNCOMM-500-1.0"
training_data = load_dataset(data_name, split="train")

In [5]:
# Model and tokenizer names
base_model_name = "meta-llama/Llama-2-7b-chat-hf"
refined_model = "llama-2-7b-cncomm"
# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

In [6]:
# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)
# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1# Model and tokenizer names
base_model_name = "meta-llama/Llama-2-7b-chat-hf"
refined_model = "llama-2-7b-cncomm"
# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16
# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)
# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [38]:
# Check if torch is enabled with CUDA
import torch
print(torch.cuda.is_available())

True


In [7]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)
# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant"
)
# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data,
    peft_config=peft_parameters,
    dataset_text_field="text",
    tokenizer=llama_tokenizer,
    args=train_params
)
# Training
fine_tuning.train()
# Save Model
fine_tuning.model.save_pretrained(refined_model)



Map:   0%|          | 0/512 [00:00<?, ? examples/s]

  0%|          | 0/128 [00:00<?, ?it/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 1.9556, 'learning_rate': 0.0002, 'epoch': 0.2}
{'loss': 0.6639, 'learning_rate': 0.0002, 'epoch': 0.39}
{'loss': 0.4827, 'learning_rate': 0.0002, 'epoch': 0.59}
{'loss': 0.4391, 'learning_rate': 0.0002, 'epoch': 0.78}
{'loss': 0.4112, 'learning_rate': 0.0002, 'epoch': 0.98}
{'train_runtime': 1319.8997, 'train_samples_per_second': 0.388, 'train_steps_per_second': 0.097, 'train_loss': 0.780652099289, 'epoch': 1.0}


In [32]:
# push to huggingface hub
fine_tuning.model.push_to_hub("llama-2-7b-cncomm")
fine_tuning.tokenizer.push_to_hub("llama-2-7b-cncomm")



CommitInfo(commit_url='https://huggingface.co/cheungra/llama-2-7b-cncomm/commit/2a59eb26d77523dd200631c208270efb4d3e42d4', commit_message='Upload tokenizer', commit_description='', oid='2a59eb26d77523dd200631c208270efb4d3e42d4', pr_url=None, pr_revision=None, pr_num=None)

In [20]:
# local saves
fine_tuning.save_model('save_cncomm')
fine_tuning.model.config.to_json_file("config.json")

In [33]:
# Generate Text
query = "How do I use the OpenAI API?"
refined_model = "cheungra/llama-2-7b-cncomm"
text_gen = pipeline(task="text-generation", model=refined_model, tokenizer=llama_tokenizer, max_length=200)
output = text_gen(f"<s>[INST] {query} [/INST]")
print(output[0]['generated_text'])

config.json:   0%|          | 0.00/982 [00:00<?, ?B/s]

ValueError: Could not load model cheungra/llama-2-7b-cncomm with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>, <class 'transformers.models.llama.modeling_llama.LlamaForCausalLM'>).

In [37]:
from transformers import AutoTokenizer
import transformers
import torch

model = "cheungra/llama-2-7b-cncomm"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto"
)

base_prompt = "<s>[INST]\n<<SYS>>\n{system_prompt}\n<</SYS>>\n\n{user_prompt}[/INST]"

def get_response(text):
    input = base_prompt.format(system_prompt = "You are a system that is able to adapt to different scenarios and provide English responses that are culturally sensitive to values in Chinese communication. These values include indirect communication, which relies on subtlety and hints and context, as well as showing respect in front of elders, and maintaining humility.",
                               user_prompt = text)
    print(input)
    
    sequences = pipeline(
        input,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,
        return_full_text=False,
        temperature=0.5
    )

    for seq in sequences:
        print(f"Result: {seq['generated_text']}")

get_response("You are a caregiver in a nursing home, conversing with an elderly Chinese resident, Mrs. Li, in English. You notice she hasn't finished her food from yesterday.\n Mrs. Li: Hello!")

ValueError: Could not load model cheungra/llama-2-7b-cncomm with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>, <class 'transformers.models.llama.modeling_llama.LlamaForCausalLM'>).