In [None]:
!pip install -Uqqq pip
# !pip install -qqq bitsandbytes==0.39.0
# !pip install -qqq torch==2.0.1
!pip uninstall -y torch torchvision torchaudio
!pip install torch==2.6.0+cu124 torchvision==0.21.0+cu124 torchaudio==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124

# !pip install -qqq -U git+https://github.com/huggingface/transformers
!pip uninstall -y transformers accelerate
!pip install transformers==4.41.0 accelerate
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71
!pip install -qqq datasets==2.12.0
!pip install -qqq loralib==0.1.1
!pip install -qqq einops==0.6.1
!pip uninstall bitsandbytes
!pip install bitsandbytes
!pip install --upgrade pip
!pip install datasets
!pip install --upgrade transformers
!pip install 'accelerate>=0.26.0'

In [None]:
!pip uninstall -y nvidia-nvjitlink-cu12  # Uninstall the conflicting version
!pip install nvidia-nvjitlink-cu12==12.4.127  # Install the correct version
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from accelerate import Accelerator
# import accelerate
from datasets import load_dataset, DownloadMode
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModel,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
notebook_login()
# !huggingface-cli login


In [None]:
# MODEL_NAME = "tiiuae/falcon-7b-instruct"
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
# MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
accelerator = Accelerator()
!pip show accelerate

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    low_cpu_mem_usage=True,
    # device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)
model.config._attn_implementation = "eager"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
            print(
                f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
            )

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
prompt = """
Product: Linen Shirts
Description: Lightweight linen, summer wear, breathable fabric.
Listed Price: ₹3569
Minimum Price: ₹2782

Conversation History:
Round 1:
Buyer: I’ll take it for ₹2920. Design feels outdated.
Seller: I value your feedback on design feels outdated. With current demand, I can offer ₹336. Interested?

Round 2:
Buyer: I’ll take it for ₹2932. Color is dull.

Seller:
""".strip()

In [None]:
generation_config = model.generation_config
generation_config.max_new_tokens = 256
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
%%time
device = "cuda:0"

encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config
    )
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
import json
from datasets import Dataset, DatasetDict
# data = load_dataset("json", data_files="bargaining_finetune_100_examples.json")

# Load the JSON data
with open("dynamic_negotiation_conversations.json", "r") as f:
    data_list = json.load(f)
    # all_data = data.to_dict()

# Convert to a Hugging Face Dataset
data = Dataset.from_list(data_list)
dataset_dict = DatasetDict({
    "train": data
})
# print(all_data)


In [None]:
data

In [None]:
dataset_dict["train"][0]

In [None]:
import math

def calculate_decayed_price(listed_price, min_price, round_number, decay_rate):
    """
    Calculates the price using exponential decay.

    Args:
        listed_price: The listed price of the product.
        min_price: The minimum price the buyer is willing to pay.
        round_number: The current round of negotiation.
        decay_rate: The rate of decay (adjust as needed).

    Returns:
        The decayed price for the current round.
    """
    return min_price + (listed_price - min_price) * math.exp(-(round_number * decay_rate))


def generate_prompt(data_point):
    """
    You are a seller. Use the exponential decay function to determine the counter-offer in each negotiation round.
    The buyer's sentiment affects the decay rate.

    Generate a prompt including the full negotiation history up to the current round. And makesure
    each round of conversation you have to follow the exponential decay function properly,
    and do generate offered_price
    """
    # Initialize conversation list to store the dialogue history
    conversation = []

    # Extract and format conversation history
    if "conversation_history" in data_point and data_point["conversation_history"]:
        # Get initial price (listed price) for decay calculation
        listed_price = data_point["metadata"].get("listed_price", None)
        min_price = data_point["metadata"].get("min_price", None)
        conversation_id = data_point.get("conversation_id", "N/A")

        # The for loop should be indented at the same level as the if statement
        for i, round_data in enumerate(data_point["conversation_history"]):
            buyer_offer = round_data.get("buyer_offer", "N/A")
            negotiation_reason = round_data.get("negotiation_reason", "No reason provided")
            seller_response = round_data.get("seller_response", "No response provided")
            buyer_sentiment = round_data.get("buyer_sentiment", "neutral")  # Get buyer sentiment

            # Determine decay rate based on buyer sentiment
            decay_rate = 0.1  # Default to neutral
            if buyer_sentiment == "slightly negative":
                decay_rate = 0.2
            elif buyer_sentiment == "negative":
                decay_rate = 0.3

            # Calculate decayed price for this round if initial price is available
            if listed_price is not None and min_price is not None:
                decayed_price = calculate_decayed_price(listed_price, min_price, i + 1, decay_rate)  # i + 1 for round number
                # Include decayed price in the prompt (you can adjust the formatting)
                conversation.append(
                    f"Buyer: I’ll take it for ₹{buyer_offer}. Reason: {negotiation_reason}.\n"
                    f"Seller: ₹{decayed_price:.2f}\n"
                    f"Decayed Price: ₹{decayed_price:.2f}"
                )
                print(f"conversation_id: {conversation_id}, Listed Price: {listed_price}, Min Price: {min_price}, Round: {i + 1}, Decay Rate: {decay_rate}")  # Debugging line
                print(f"Decayed Price for round {i + 1}: ₹{decayed_price:.2f}")  # Print statement moved here
            else:  # If initial price is not found, proceed without decay
                conversation.append(
                    f"Buyer: I’ll take it for ₹{buyer_offer}. {negotiation_reason}.\n"
                    f"Seller: {seller_response}"
                )

        # Get negotiation reason from the last round
        current_round_data = data_point["conversation_history"][-1]  # Get the last round
        negotiation_reason = current_round_data.get("negotiation_reason", "No reason provided")
    else:
        conversation.append("No conversation history available.")
        negotiation_reason = "No reason provided"  # Default if no history

    # Combine the formatted conversation history
    conversation_history = "\n".join(conversation)

    # Retrieve metadata with safe defaults
    product_name = data_point.get("metadata", {}).get("product_name", "Unknown Product")
    description_short = data_point.get("metadata", {}).get("description_short", "No description available")
    listed_price = data_point.get("metadata", {}).get("listed_price", "N/A")
    min_price = data_point.get("metadata", {}).get("min_price", "N/A")
    buyer_price = data_point.get("metadata", {}).get("buyer_offer", "N/A")
    round_number = len(data_point.get("conversation_history", []))  # Derive round from history length
    negotiation_reason = data_point.get("metadata", {}).get("negotiation_reason", "No reason provided")

    # Construct the full prompt
    return f"""
    You are a seller. Follow the exponential decay function to determine your counter-offer in each negotiation round.
Product Name: {product_name}
Description: {description_short}
Listed Price: ₹{listed_price}
Minimum Price: ₹{min_price}
Buyer Price: ₹{buyer_price}
Round: {round_number}
Negotiation Reason: {negotiation_reason}

Conversation History:
{conversation_history.strip()}
    """.strip()

In [None]:
def generate_and_tokenize_prompt(data_point):
    """
    Generates a prompt and tokenizes it using the tokenizer.
    """
    prompt = generate_prompt(data_point)  # Call your existing generate_prompt function
    # Tokenize the prompt and return
    # Ensure the labels are set correctly for causal language modeling
    # by shifting the input_ids
    encodings = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt")
    # The change is in the next line:
    encodings['labels'] = encodings['input_ids'].clone()  # Ensure labels match input_ids
    # return encodings
    input_ids = encodings["input_ids"][0]
    attention_mask = encodings["attention_mask"][0]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": input_ids.clone()
    }

data = dataset_dict["train"].map(generate_and_tokenize_prompt)

In [None]:
data

In [None]:
!pip install --upgrade transformers
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=1,
      learning_rate=2e-4,
      fp16=True,
      save_total_limit=3,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
      # remove auto_find_batch_size, it is causing conflict
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
# model.config._attn_implementation = None
trainer.train()



No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mprakashbio2021[0m ([33mprakashbio2021-techno-india-group[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
1,1.7255
2,1.688
3,1.7715
4,1.4892
5,1.4579
6,1.3672
7,1.252
8,1.1685
9,1.2288
10,1.1255


TrainOutput(global_step=25, training_loss=1.0059381484985352, metrics={'train_runtime': 253.7275, 'train_samples_per_second': 0.394, 'train_steps_per_second': 0.099, 'total_flos': 1251046955114496.0, 'train_loss': 1.0059381484985352, 'epoch': 1.0})

In [None]:
# PEFT_MODEL = "prakashLlama/Llama-2-7b-chat-hf-v3"

# model.push_to_hub(
#     PEFT_MODEL, use_auth_token=True
# )

In [None]:
notebook_login()

In [None]:
# ipython-input-30-e23e8d2d55f1

#MultiTasking Fine Tuning for predict the Buyer Sentiment

BASE_MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
PEFT_MODEL = "prakashLlama/Llama-2-7b-chat-hf-v3"
accelerator = Accelerator()

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the base model with quantization
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME, # Use the explicitly defined base model name
    return_dict=True,
    quantization_config=bnb_config,
    trust_remote_code=True
)

# Use the base model name for the tokenizer as well
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

# Load the PEFT adapters onto the base model
model = PeftModel.from_pretrained(model, PEFT_MODEL)
print("PEFT config:", model.peft_config)

# Set attention implementation after loading PEFT model
model.config._attn_implementation = "eager"

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
# Load the JSONL data (one JSON object per line)
data_list = []
with open("buyer_sentiment_decay_dataset_modified.jsonl", "r") as f:
    for line in f:
        data_list.append(json.loads(line.strip()))

# Convert to Hugging Face Dataset
data = Dataset.from_list(data_list)
dataset_dict = DatasetDict({
    "train": data
})
# import json
# from datasets import Dataset, DatasetDict
# # data = load_dataset("json", data_files="bargaining_finetune_100_examples.json")

# # Load the JSON data
# with open("negotiation_decay_dataset.json", "r") as f:
#     data_list = json.load(f)
#     # all_data = data.to_dict()

# # Convert to a Hugging Face Dataset
# data = Dataset.from_list(data_list)
# dataset_dict = DatasetDict({
#     "train": data
# })
# # print(all_data)


In [None]:
data

In [None]:
def generate_prompt(data_point):
    return f"Given this buyer message: '{data_point['input']}', predict the buyer sentiment decay rate: {data_point['output']}"

def generate_and_tokenize_prompt(data_point):
    prompt = generate_prompt(data_point)
    # Add return_tensors="pt" to get PyTorch tensors
    encodings = tokenizer(prompt, padding="max_length", truncation=True, max_length=128, return_tensors="pt")

    # Since the tensors are wrapped in a batch dimension (shape [1, ...]),
    # we need to access the first element to get the tensor for a single example
    input_ids = encodings["input_ids"][0]
    attention_mask = encodings["attention_mask"][0]

    # Create labels by cloning the input_ids tensor
    labels = input_ids.clone()

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels  # Use the cloned labels tensor
    }

# Update the .map() call to remove the original columns
tokenized_dataset = dataset_dict["train"].map(
    generate_and_tokenize_prompt,
    remove_columns=dataset_dict["train"].column_names # Remove original columns
)

In [None]:
data

In [None]:
!pip install --upgrade transformers
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=1,
      learning_rate=1e-4,
      # fp16=True,
      save_total_limit=3,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
      # remove auto_find_batch_size, it is causing conflict
      remove_unused_columns=False, # Add this line
      fp16=False,
      bf16=True
)


trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_dataset, # Use the tokenized dataset
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
# model.config._attn_implementation = None
trainer.train()

In [None]:
PEFT_MODEL = "prakashLlama/Llama-2-7b-chat-hf-v4-sentiment"

model.push_to_hub(
    PEFT_MODEL, use_auth_token=True
)

In [None]:
# Define the base model name explicitly
PEFT_MODEL = "prakashLlama/Llama-2-7b-chat-hf-v4-sentiment"
BASE_MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf" # This should match the MODEL_NAME used for training


config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME, # Use the explicitly defined base model name
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto", # You can uncomment this if you prefer device_map
    # device="cuda:0", # Remove this argument
    trust_remote_code=True
)

# Use the base model name for the tokenizer as well
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

In [None]:
generation_config = model.generation_config
generation_config.max_new_tokens = 256
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
%%time
import math
import re
device = "cuda:0"

# Step 1: Last buyer message (from conversation history)
last_buyer_prompt = "I want to take it above ₹2007. Durability concerns."

In [None]:
decay_prompt = f"Given this buyer message: '{last_buyer_prompt}', predict the buyer sentiment decay rate:"

decay_input = tokenizer(decay_prompt, return_tensors="pt").to(device)

with torch.inference_mode():
    decay_output = model.generate(
        input_ids=decay_input.input_ids,
        attention_mask=decay_input.attention_mask,
        max_new_tokens=10,
        do_sample=False,
        temperature=0.0,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )

decay_response = tokenizer.decode(decay_output[0], skip_special_tokens=True)
print("Raw model output:", repr(decay_response))

matches = re.findall(r"0?\.\d{1,3}|\d{1,3}\.\d{1,3}", decay_response)
if matches:
    decay_rate = float(matches[0])
else:
    decay_rate = 0.2

print(f"Extracted decay rate: {decay_rate}")


In [None]:
# Set pad_token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

decay_prompt = f"Given this buyer message: '{last_buyer_prompt}', output only the decay rate as a decimal number:"

decay_input = tokenizer(decay_prompt, return_tensors="pt").to(device)

with torch.inference_mode():
    decay_output = model.generate(
        input_ids=decay_input.input_ids,
        attention_mask=decay_input.attention_mask,
        max_new_tokens=5,   # keep short to avoid long explanations
        do_sample=False,
        temperature=0.0,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )

decay_response = tokenizer.decode(decay_output[0], skip_special_tokens=True)

print("Raw model output:", repr(decay_response))

# Extract only decimal numbers from the output
matches = re.findall(r"0?\.\d{1,3}|\d{1,3}\.\d{1,3}", decay_response)
if matches:
    decay_rate = float(matches[0])
else:
    decay_rate = 0.2  # fallback default

print(f"Extracted Decay Rate: {decay_rate}")


# Step 5: Set pricing details
listed_price = 3569
min_price = 2782

# Step 6: Construct negotiation prompt using dynamic decay_rate
# Define the prompt here so it can be used for counting rounds
prompt = f"""
Product: Linen Shirts
Description: Lightweight linen, summer wear, breathable fabric.
Listed Price: ₹{listed_price}
Minimum Price: ₹{min_price}

Conversation History:
Round 1:
Buyer: I’ll take it for ₹2920. Design feels outdated.
Seller: I value your feedback on design feels outdated. With current demand, I can offer ₹3365.02. Interested?

Round 2:
Buyer: I’ll take it for ₹2932. Color is dull.
Seller: I understand your concern regarding color is dull. Given the craftsmanship, I can do ₹3309.54. Would that be okay?

Buyer: {last_buyer_prompt}

""".strip()

#Count the Total Round Numbers using the defined prompt
round_matchs = re.findall(r"Round\s+\d+", prompt, re.IGNORECASE)
round_number = len(round_matchs)+1

counter_offer = min_price + (listed_price - min_price) * math.exp(-(round_number * decay_rate))

print(f"Next Round is : {round_number}")
print(f"Decay Rate is : {decay_rate}")
print(f"Counter Offer Price is : {counter_offer:.2f}")

# Update the prompt with the calculated counter_offer
prompt = f"""
Product: Linen Shirts
Description: Lightweight linen, summer wear, breathable fabric.
Listed Price: ₹{listed_price}
Minimum Price: ₹{min_price}

Conversation History:
Round 1:
Buyer: I’ll take it for ₹2920. Design feels outdated.
Seller: I value your feedback on design feels outdated. With current demand, I can offer ₹3365.02. Interested?

Round 2:
Buyer: I’ll take it for ₹2932. Color is dull.
Seller: I understand your concern regarding color is dull. Given the craftsmanship, I can do ₹3309.54. Would that be okay?

Buyer: {last_buyer_prompt}

Based on the above conversation, negotiate with the buyer by considering the buyer sentiment and previous offers.
The calculated counter-offer price for this round is ₹{counter_offer:.2f}.
Respond as the seller with your new price offer and a brief justification.

Seller:
""".strip()

In [None]:
# Step 7: Generate negotiation response using LLM
encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config
    )
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
