<a href="https://colab.research.google.com/github/subithaps/Demo/blob/master/sales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
dataset_name = "pssubitha/formatted_data_sales"

model_name = "NousResearch/Llama-2-7b-chat-hf"
new_model = "sales-llama2-chat-hf"

In [2]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [3]:
from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [5]:
# Load dataset
dataset = load_dataset(dataset_name, split="train")

In [6]:
len(dataset)

120

In [7]:
dataset[2]

{'text': "<s>[INST] When is the right time to transition to a platform like WhatsApp? [/INST] It's recommended to be at least at Stage 3 in the opportunity sales cycle before attempting to transition to WhatsApp. This ensures that a foundational relationship and understanding have been established first. </s>"}

In [8]:


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)
#Load Base Model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [9]:
#Load Llama tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [13]:
prompt = "What is HH Insurance"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""

In [14]:
input_ids = tokenizer(prompt_template,return_tensors ='pt').input_ids.cuda()
output = model.generate(inputs = input_ids, temperature =0.7, max_new_tokens = 512)
print(tokenizer.decode(output[0]))



<s> [INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
What is HH Insurance[/INST]  Hello! As a Salesforce Assistant, I'm happy to help you with your query. HH Insurance is a term that I'm not familiar with, could you please provide more context or information about what HH Insurance refers to? This will help me better understand your question and provide a more accurate response.</s>


In [15]:
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )


In [16]:
from peft import LoraConfig

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    # target_modules=["q_proj","v_proj"]
    target_modules=['q_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'k_proj', 'v_proj'] # Choose all linear layers from the model
)

In [80]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 1
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 500
warmup_ratio = 0.03
lr_scheduler_type = "cosine"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    ddp_find_unused_parameters=False,
    report_to = ["tensorboard"],
    push_to_hub = True,
    resume_from_checkpoint = True


)


In [81]:
from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    # train_dataset=data['train'],
    peft_config=peft_config,
    dataset_text_field="text",
    # dataset_text_field="prediction",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)



In [51]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [None]:
#key="7270234dab3ba8965e58fecc82a88a77da97adea"

In [83]:
import torch
torch.cuda.empty_cache()

import gc
gc.collect()

2877

In [84]:
trainer.train()
# Save trained model
trainer.model.save_pretrained(new_model)

Step,Training Loss
1,3.4603
2,3.3635
3,3.818
4,3.808
5,3.8094
6,3.7628
7,3.5752
8,2.9742
9,3.1313
10,3.046


In [None]:
#trainer.train(resume_from_checkpoint=true)
resume_from_checkpoint("checkpoint-71")

In [86]:
import transformers
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    trust_remote_code=True,

    device_map="auto",
     max_length=500,
    do_sample=True,
    top_p=0.7,
    top_k = 40,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    temperature = 0.7,
    repetition_penalty = 1.176


)

In [87]:
prompt = "tell me about HH Insurance"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""

In [88]:
print(pipeline(prompt_template)[0]['generated_text'])



[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
tell me about HH Insurance[/INST] As an insurance salesperson, I can provide you with general information about HH Insurance. Please note that I don't have access to detailed customer data or account information unless specified by the client. Would you like to know more about their health insurance offerings?


In [89]:
prompt = "what is the purpose of value engineering session"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""

In [90]:
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
what is the purpose of value engineering session[/INST] A value engineering session aims to provide a detailed ROI study based on the client's business case. It showcases how our solution can deliver tangible, positive results for their needs and goals. During the session, we offer a real-world cost-benefit analysis using case studies and testimonials from similar projects, demonstrating the value they can expect from our solution. By offering a comprehensive ROI study, we can effectively address the client's main concerns: cost and return on investment. This session provides a clear indication of the value they can expect from our solution, aiding in building trust and increasing the likelihood of closing the deal.


In [91]:
prompt = "Why is it essential to remove negative opportunities from stage 1?"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""

In [None]:
prompt = "Why is it essential to remove negative opportunities from stage 1?"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""

In [92]:
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
Why is it essential to remove negative opportunities from stage 1?[/INST] Removing negative opportunities from stage 1 is vital. They can skew the overall pipeline value and mislead in the broader analysis. It's crucial to maintain a clean and relevant pipeline to ensure accurate reporting and planning. Is there anything else I can assist you with?


In [93]:
prompt = "what is cancellation of convenience?"

prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
what is cancellation of convenience?[/INST] Cancellation for cause, also known as cancellation for convenience, allows a party to terminate the contract under specific circumstances. These circumnstances might include a breach of the agreement by the other party or a change in business strategy that no longer aligns with the contract. Upon termination, the contract details any consequences, including how unused services or credits should be handled. It's essential to have a clear and comprehensive cancellation clause to protect both parties' interests.


In [95]:
prompt = "When is the right time to transition to a platform like WhatsApp?"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
When is the right time to transition to a platform like WhatsApp?[/INST] It's recommended to be at least at Stage 3 in the opportunity sales cycle before attempting to transition to WhatsApp. This ensures that a foundational relationship and understanding have been established first. Transitioning too early can result in losing the deal or it not being as successful as expected.


In [96]:
prompt = "let me know the main sources of stage 1 lead?"

prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
let me know the main sources of stage 1 lead?[/INST] Stage 1 leads primarily originate from marketing activities, website visits, SDR/BDR outreach, and several other sources. Would you like a detailed breakdown of each source?


In [99]:
prompt = "What happens to the contact details of opportunities we delete from stage 1?"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
What happens to the contact details of opportunities we delete from stage 1?[/INST] They are removed from the CRM, but they don't get deleted permanently. You can still access them by moving the opportunity to stage 2 or higher. If you need to preserve the contact details for historical reasons, you can set up a data privacy policy that outlines how old leads should be managed. This ensures that even though the opportunity is deleted, the contact details remain for reference.


In [100]:
prompt = "good, What is a CRM"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])



[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
good, What is a CRM[/INST] A Customer Relationship Management (CRM) system is a software application that enables organizations to manage their interactions with clients and prospects. It provides a centralized platform for storing and tracking leads, opportunities, and customer data, ensuring smooth communication across departments. Would you like more information about CRM?


In [103]:
prompt = "good, What is HH Insurance"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
good, What is HH Insurance[/INST] Hello! HH Insurance doesn't seem to exist in our database. Can you provide more context or details about this insurance? This helps ensure a genuine response.


In [102]:
prompt = " Is HH Insurance a type of insurance agent"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
 Is HH Insurance a type of insurance agent[/INST] Yes, HH Insurance is an insurance agency that offers various types of policies, including health, life, and disability insurance. They have a wide range of products and services catering to different client needs. Would you like more information about their offerings?


In [105]:
prompt = "Can we make internal solution review optional"
prompt_template = f"""[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
{prompt}[/INST]"""
print(pipeline(prompt_template)[0]['generated_text'])

[INST]<<SYS>>
You are a salesforce assistant. please respond to the queries
<</SYS>>
Can we make internal solution review optional[/INST] Certainly! While having an internal solution review is crucial, offering an optional path for those who have already accepted the solution can be beneficial. This option could include a shorter review process with a more limited scope, ensuring it offers genuine value to the client while aligning with our sales methodology. Would you like me to schedule the review or provide the clients' contact details?
