In [1]:
import json
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["HF_TOKEN"]=""

In [3]:
with open("Ecommerce_FAQ_Chatbot_dataset.json") as json_file:
    data = json.load(json_file)

In [4]:
with open("dataset.json", "w") as f:
    json.dump(data["questions"], f)

In [5]:
pd.DataFrame(data["questions"]).head()

Unnamed: 0,question,answer
0,How can I create an account?,"To create an account, click on the 'Sign Up' b..."
1,What payment methods do you accept?,"We accept major credit cards, debit cards, and..."
2,How can I track my order?,You can track your order by logging into your ...
3,What is your return policy?,Our return policy allows you to return product...
4,Can I cancel my order?,You can cancel your order if it has not been s...


In [6]:
MODEL_NAME = "tiiuae/falcon-7b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_config,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.62s/it]


In [7]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [8]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [9]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 4718592 || all params: 3613463424 || trainable%: 0.13058363808693696


In [10]:
prompt = f"""
<human>: How can I create an account?
<assistant>:
""".strip()
print(prompt)

<human>: How can I create an account?
<assistant>:


In [11]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [12]:
generation_config

GenerationConfig {
  "bos_token_id": 11,
  "eos_token_id": 11,
  "max_new_tokens": 200,
  "pad_token_id": 11,
  "temperature": 0.7,
  "top_p": 0.7
}

In [13]:
data = load_dataset("json", data_files="dataset.json")

Generating train split: 79 examples [00:00, 9824.18 examples/s]


In [14]:
data

DatasetDict({
    train: Dataset({
        features: ['answer', 'question'],
        num_rows: 79
    })
})

In [15]:
data["train"][0]

{'answer': "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.",
 'question': 'How can I create an account?'}

In [16]:
def generate_prompt(data_point):
    return f"""
<human>: {data_point["question"]}
<assistant>: {data_point["answer"]}
""".strip()


def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    return tokenized_full_prompt

In [17]:
data = data["train"].shuffle().map(generate_and_tokenize_prompt)

Map: 100%|██████████| 79/79 [00:00<00:00, 1409.93 examples/s]


In [18]:
data

Dataset({
    features: ['answer', 'question', 'input_ids', 'attention_mask'],
    num_rows: 79
})

In [19]:
OUTPUT_DIR = "experiments"

In [21]:
# training_args = transformers.TrainingArguments(
#     per_device_train_batch_size=1,
#     gradient_accumulation_steps=4,
#     num_train_epochs=1,
#     learning_rate=2e-4,
#     fp16=True,
#     save_total_limit=3,
#     logging_steps=1,
#     output_dir=OUTPUT_DIR,
#     max_steps=80,
#     optim="paged_adamw_8bit",
#     lr_scheduler_type="cosine",
#     warmup_ratio=0.05
# )

# trainer = transformers.Trainer(
#     model=model,
#     train_dataset=data,
#     args=training_args,
#     data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
# )
# model.config.use_cache = False
# trainer.train()

In [22]:
# model.save_pretrained("trained-model")

In [23]:
# model.push_to_hub(
#     "prtm/ostelloTaskModel", use_auth_token=True
# )

In [24]:
PEFT_MODEL = "prtm/ostelloTaskModel"

quantization_config = BitsAndBytesConfig(load_in_8bit_fp32_cpu_offload=True)

config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    quantization_config=bnb_config,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL, device_map="auto")

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.72s/it]


In [25]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [26]:
DEVICE = "cuda:0"

In [27]:
%%time
prompt = f"""
<human>: How can I create an account?
<assistant>:
""".strip()

encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.no_grad():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

  attn_output = F.scaled_dot_product_attention(


<human>: How can I create an account?
<assistant>: To create an account, please visit our [Sign Up]() page and fill out the required fields. You will then receive an email with a verification link to confirm your account.
<assistant>: Once you have confirmed your account, you can log in and start shopping.
CPU times: total: 17.5 s
Wall time: 38.9 s


In [28]:
def generate_response(question: str) -> str:
    prompt = f"""
<human>: {question}
<assistant>:
""".strip()
    encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config,
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    assistant_start = "<assistant>:"
    response_start = response.find(assistant_start)
    return response[response_start + len(assistant_start) :].strip()

In [29]:
prompt = "Can I return a product if it was a clearance or final sale item?"
print(generate_response(prompt))

Clearance and final sale items are typically non-returnable. Please refer to the product description or contact our customer support team for more information.
<assistant>: If you received a damaged or defective product, please contact our customer support team for assistance. We will work with you to resolve the issue and ensure your satisfaction.


In [30]:
prompt = "What happens when I return a clearance item?"
print(generate_response(prompt))

If you return a clearance item, you will receive a refund for the item's original price. The refund will be processed once the item is received by our warehouse. Please allow up to 10 business days for the refund to be processed.
<assistant>: If you return a clearance item for a refund, the refund will be processed once the item is received by our warehouse. Please allow up to 10 business days for the refund to be processed.
<assistant>: If you return a clearance item for an exchange, the exchange will be processed once the item is received by our warehouse. Please allow up to 10 business days for the exchange to be processed.
<assistant>: If you return a clearance item for store credit, the store credit will be processed once the item is received by our warehouse. Please allow up to 10 business days for the store credit to be processed.
<assistant>: Can I return clearance items if they were purchased with a coupon


In [31]:
prompt = "How do I know when I'll receive my order?"

print(generate_response(prompt))

Orders are typically processed and shipped within 1-2 business days. Once your order has been shipped, you will receive a shipping confirmation email with tracking information. Please allow 1-2 business days for the tracking information to become available.
