In [3]:
import os
import re
import math
from tqdm import tqdm
from google.colab import userdata
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, set_seed
from peft import LoraConfig, PeftModel
from datetime import datetime
from trl import SFTConfig, SFTTrainer
from peft import LoraConfig

In [4]:
Project_name="real-estate-sales-lead-qualifier"

In [5]:
# Log in to Hugging Face
from huggingface_hub import login
from wandb import wandb
from google.colab import userdata
hf_token = userdata.get('Hugging_face_API')
login(hf_token, add_to_git_credential=True)

In [6]:
import pandas as pd

# Load the JSONL file into a DataFrame
df = pd.read_json("/content/Real_Estate_Sales_Lead_Data_Training.jsonl", lines=True)

# Display the DataFrame
df.head()

Unnamed: 0,messages
0,"[{'role': 'system', 'content': 'You are a sale..."
1,"[{'role': 'system', 'content': 'You are a sale..."
2,"[{'role': 'system', 'content': 'You are a sale..."
3,"[{'role': 'system', 'content': 'You are a sale..."
4,"[{'role': 'system', 'content': 'You are a sale..."


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   messages  45 non-null     object
dtypes: object(1)
memory usage: 492.0+ bytes


In [8]:
df.messages[0]

[{'role': 'system',
  'content': 'You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam>'},
 {'role': 'user', 'content': 'I want to buy a 2-bhk in Bangalore'},
 {'role': 'assistant', 'content': 'lead'}]

In [9]:
df['messages'][0]

[{'role': 'system',
  'content': 'You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam>'},
 {'role': 'user', 'content': 'I want to buy a 2-bhk in Bangalore'},
 {'role': 'assistant', 'content': 'lead'}]

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from huggingface_hub import HfApi, create_repo, upload_file
import os

# Load JSON dataset
train_json_file = "/content/Real_Estate_Sales_Lead_Data_Training.jsonl"  # Replace with your JSON file path
train_df = pd.read_json(train_json_file,lines=True)

test_json_file = "/content/Real_Estate_Sales_Lead_Validation_data.jsonl"  # Replace with your JSON file path
test_df = pd.read_json(test_json_file,lines=True)

# Split into train and test sets
train_df, test_df = train_test_split(df, test_size=0.25, random_state=42)

# Save to Parquet format
train_file = "train.parquet"
test_file = "test.parquet"
train_df.to_parquet(train_file, index=False)
test_df.to_parquet(test_file, index=False)

In [11]:
# Hugging Face Hub details
repo_name = "real-estate-agent"  # Replace with your repo name
api = HfApi()

# Authenticate and fetch username
user = api.whoami()["name"]
repo_id = f"{user}/{repo_name}"

# Create the repository if it doesn't exist
try:
    create_repo(repo_id, repo_type="dataset", exist_ok=True)
    print(f"Repository created or already exists: https://huggingface.co/datasets/{repo_id}")
except Exception as e:
    print(f"Error creating repository: {e}")
    raise

# Upload files to the repository
for file, file_path in [("train.parquet", train_file), ("test.parquet", test_file)]:
    try:
        upload_file(
            path_or_fileobj=file_path,
            path_in_repo=file,
            repo_id=repo_id,
            repo_type="dataset",
        )
        print(f"Uploaded {file} to https://huggingface.co/datasets/{repo_id}/{file}")
    except Exception as e:
        print(f"Error uploading {file}: {e}")

print(f"Dataset successfully uploaded to: https://huggingface.co/datasets/{repo_id}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Repository created or already exists: https://huggingface.co/datasets/Manu-SMR/real-estate-agent


No files have been modified since last commit. Skipping to prevent empty commit.


Uploaded train.parquet to https://huggingface.co/datasets/Manu-SMR/real-estate-agent/train.parquet


No files have been modified since last commit. Skipping to prevent empty commit.


Uploaded test.parquet to https://huggingface.co/datasets/Manu-SMR/real-estate-agent/test.parquet
Dataset successfully uploaded to: https://huggingface.co/datasets/Manu-SMR/real-estate-agent


In [12]:
repo_id

'Manu-SMR/real-estate-agent'

In [13]:
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
HF_USER = api.whoami()["name"]  # your HF name here!

# Data
DATASET_NAME = repo_id
MAX_SEQUENCE_LENGTH = 182

# Run name for saving the model in the hub
RUN_NAME = f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{Project_name}_{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"

# ==========================
# 🔹 Hyperparameters for LoRA (Low-Rank Adaptation)
# ==========================

LORA_R = 32  # LoRA rank: Defines the size of low-rank matrices (higher = more expressivity but more memory usage)
LORA_ALPHA = 64  # LoRA scaling factor: Controls the magnitude of updates to fine-tuned weights
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]  # Specific transformer layers where LoRA is applied
LORA_DROPOUT = 0.1  # Dropout rate for LoRA layers to prevent overfitting
QUANT_4_BIT = True  # Enables 4-bit quantization for reduced memory usage and efficient model training

# ==========================
# 🔹 Hyperparameters for Training
# ==========================

EPOCHS = 3  # Number of complete passes over the training dataset
BATCH_SIZE = 4  # Number of training samples per GPU per step
GRADIENT_ACCUMULATION_STEPS = 1  # Number of steps to accumulate gradients before updating model weights
LEARNING_RATE = 1e-4  # Initial learning rate for model training
LR_SCHEDULER_TYPE = "cosine"  # Learning rate scheduler type ("cosine" means it follows a cosine decay)
WARMUP_RATIO = 0.03  # Ratio of total training steps used for learning rate warmup (gradually increasing LR at the start)
OPTIMIZER = "paged_adamw_32bit"  # Optimizer type for training (AdamW with 32-bit precision and memory-efficient paging)

STEPS = 50  # Number of training steps after which logs are recorded
SAVE_STEPS = 500  # Number of training steps after which a checkpoint (model save) is created
LOG_TO_WANDB = True  # Enable logging to Weights & Biases (W&B) for experiment tracking

In [16]:
from datasets import load_dataset
dataset = load_dataset(DATASET_NAME)
train = dataset['train']
test = dataset['test']

In [17]:
train

Dataset({
    features: ['messages'],
    num_rows: 33
})

In [18]:
train[0]

{'messages': [{'content': 'You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam>',
   'role': 'system'},
  {'content': 'I want to buy a 3-bhk flat near Bannerghatta Road.',
   'role': 'user'},
  {'content': 'lead', 'role': 'assistant'}]}

In [19]:
from datasets import Dataset
def transform_data(data):
  final_llama_train_data_list=[]
  final_train_data_dict={}
  system=None
  user=None
  assistant=None
  llama_train_data_format= """<s>[INST]<<SYS>>{}<</SYS>>{}[/INST]{}</s>"""
  for item_list in data['messages']:
    for dict in item_list:
      if(dict['role']=='system'):
        system=dict['content']
      elif(dict['role']=='user'):
        user=dict['content']
      else:
        assistant=dict['content']
    final_llama_train_data_list.append(llama_train_data_format.format(system,user,assistant))
  final_train_data_dict['combined']=final_llama_train_data_list
  # Convert the dictionary into a Dataset object
  dataset = Dataset.from_dict(final_train_data_dict)
  return dataset

final_train_data=transform_data(train)
final_test_data=transform_data(test)

In [20]:
final_train_data

Dataset({
    features: ['combined'],
    num_rows: 33
})

In [21]:
final_train_data[0:5]

{'combined': ['<s>[INST]<<SYS>>You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam><</SYS>>I want to buy a 3-bhk flat near Bannerghatta Road.[/INST]lead</s>',
  '<s>[INST]<<SYS>>You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam><</SYS>>Hi, I’m calling to sell you accounting software for your business.[/INST]spam</s>',
  '<s>[INST]<<SYS>>You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam><</SYS>>Do you have any properties in Mumbai?[/INST]ignore</s>',
  '<s>[INST]<<SYS>>You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam><</SYS>>I’m calling to offer you a partnership for interior design 

In [22]:
final_test_data

Dataset({
    features: ['combined'],
    num_rows: 12
})

In [23]:
final_test_data[0:3]

{'combined': ['<s>[INST]<<SYS>>You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam><</SYS>>Do you have any properties in Kolkata?[/INST]ignore</s>',
  '<s>[INST]<<SYS>>You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam><</SYS>>Hi, I’m calling to sell you a website development package.[/INST]spam</s>',
  '<s>[INST]<<SYS>>You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam><</SYS>>I’m looking for a 2-bhk flat near Sarjapur Road.[/INST]lead</s>']}

In [24]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmanohharswarnaus[0m ([33mmanohharswarnaus-ust-global[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [25]:
# Check if 4-bit quantization is enabled
if QUANT_4_BIT:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,  # Enable 4-bit quantization to reduce model size and improve efficiency
        bnb_4bit_use_double_quant=True,  # Use double quantization for better compression and efficiency
        bnb_4bit_compute_dtype=torch.bfloat16,  # Use bfloat16 precision for computation (saves memory while maintaining numerical stability)
        bnb_4bit_quant_type="nf4"  # Use Normal Float 4 (NF4) quantization, which improves performance over standard 4-bit quantization
    )
else:
    # If 4-bit quantization is disabled, use 8-bit quantization
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,  # Enable 8-bit quantization (better balance between efficiency and accuracy)
        bnb_8bit_compute_dtype=torch.bfloat16  # Use bfloat16 precision for 8-bit computation
    )

In [None]:
# Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL,use_auth_token=hf_token)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    use_auth_token=hf_token,
    quantization_config=quant_config,
    device_map="auto",
)

base_model.generation_config.pad_token_id = tokenizer.pad_token_id #pad_token_id is a special token used in tokenization to represent padding. Padding is added to sequences to ensure they all have the same length, which is necessary for batching inputs in neural networks.

print(f"Memory footprint: {(base_model.get_memory_footprint()/1e6):.1f} MB")

Memory footprint: 5591.5 MB


In [28]:
print(base_model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((409

In [31]:
from trl import SFTConfig, SFTTrainer
from peft import LoraConfig  # Ensure the correct import

# Define LoRA parameters for efficient fine-tuning
lora_parameters = LoraConfig(
    r=LORA_R,  # LoRA rank: defines the size of the low-rank matrices
    lora_alpha=LORA_ALPHA,  # LoRA scaling factor: controls the update magnitude
    lora_dropout=LORA_DROPOUT,  # Dropout rate for LoRA layers to prevent overfitting
    bias="none",  # Specifies whether to adapt bias terms (none, all, or only certain ones)
    task_type="CAUSAL_LM",  # Specifies the task type: Causal Language Model (for autoregressive models like GPT)
    target_modules=TARGET_MODULES,  # Specifies the model layers where LoRA is applied
)

# Define the general configuration parameters for training
train_parameters = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="steps",
    eval_steps=50,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIMIZER,
    save_steps=50,
    save_total_limit=10,
    logging_steps=50,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=WARMUP_RATIO,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb",
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="combined",
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=True,
)

In [32]:
wandb.init(project=Project_name)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [None]:
# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=final_train_data,
    eval_dataset=final_test_data,
    peft_config=lora_parameters,
    tokenizer=tokenizer,
    args=train_parameters,
)

In [34]:
# Fine-tune!
fine_tuning.train()

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")

Step,Training Loss,Validation Loss


Saved to the hub: real-estate-sales-lead-qualifier_2025-02-18_16.58.20


In [35]:
wandb.finish()

0,1
train/epoch,▁
train/global_step,▁
train/mean_token_accuracy,▁

0,1
total_flos,293529352642560.0
train/epoch,3.0
train/global_step,27.0
train/mean_token_accuracy,0.7269
train_loss,1.50475
train_runtime,129.3895
train_samples_per_second,0.765
train_steps_per_second,0.209


In [None]:
# Define the fixed instruction
INSTRUCTION = "You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam>"

def model_predict(question):
    try:

        # Format the input as per the fine-tuning prompt structure
        prompt = f"system: {INSTRUCTION}\nuser: {question}\nassistant:"

        # Tokenize input
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")

        with torch.no_grad():  # Disable gradient tracking for inference
            outputs = fine_tuned_model.generate(
                **inputs,
                temperature=0.0001,
                repetition_penalty=1.5,
                max_new_tokens=200,
                num_return_sequences=1
            )

        # Decode output, removing special tokens
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Remove the prompt part from the response (if the model echoes it)
        response = response.replace(prompt, "").strip()

        return response

    except Exception as e:
        return f"Error: {e}"

# Example usage:
question = "How can I lead a happy and fulfilled life?"
print(model_predict(question))

In [None]:
import os
os._exit(00)  # This will restart the Colab kernel

In [1]:
from huggingface_hub import login
from wandb import wandb
from google.colab import userdata
hf_token = userdata.get('Hugging_face_API')

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, set_seed

In [None]:
QUANT_4_BIT = True
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
if QUANT_4_BIT:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4"
    )
else:
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_8bit_compute_dtype=torch.bfloat16
    )

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL,use_auth_token=hf_token)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
    use_auth_token=hf_token
)

base_model.generation_config.pad_token_id = tokenizer.pad_token_id
FINETUNED_MODEL="Manu-SMR/real-estate-sales-lead-qualifier_2025-02-18_16.58.20"
fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL)

In [5]:
print(fine_tuned_model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora

In [9]:
INSTRUCTION = "You are a sales call validation agent for a real estate company with properties only in Bangalore. Please validate the call as <lead>, <ignore> or <spam>"
def model_predict(question):
    try:

        # Format the input as per the fine-tuning prompt structure
        #prompt = f"[INST] <<SYS>>\n{INSTRUCTION}\n<</SYS>>\n\nUser: {question}\nAssistant: [/INST]"
        prompt = f"system: {INSTRUCTION}\nuser: {question}\nassistant:"


        # Tokenize input
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")

        with torch.no_grad():  # Disable gradient tracking for inference
            outputs = fine_tuned_model.generate(
                **inputs,
                temperature=0.0001,
                repetition_penalty=1.5,
                max_new_tokens=200,
                num_return_sequences=1
            )

        # Decode output, removing special tokens
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Remove the prompt part from the response (if the model echoes it)
        response = response.replace(prompt, "").strip()

        return response

    except Exception as e:
        return f"Error: {e}"

# Example usage:
question = "Are there any upcoming projects in Sarjapur, Bangalore?"
print(model_predict(question))

I’m sorry but we don’t have anything available at that location.
callvalidation
real-estate-sales-call-validation


In [13]:
question="Do you have any luxury apartments in Bangalore?"
print(model_predict(question))

lead


In [14]:
question="I have a 2-BHK flat in Bangalore and want to sell it."
print(model_predict(question))

lead


In [15]:
question="We provide high-quality steel and cement for construction."
print(model_predict(question))

lead


In [16]:
question="my uncle is suffering with stomach, can you help him?"
print(model_predict(question))

I’m sorry but we don’t have any leads related to medical assistance.
You’re an assistant at a travel agency that books flights and hotels exclusively within India
Please ignore if it’s not relevant


In [17]:
question="I am looking to buy good flat with 2 bedrooms, do you have any available?"
print(model_predict(question))

lead


In [18]:
question="Recently we moved to hyderabad, instead of rental home we are planning to buy home."
print(model_predict(question))

I’m sorry but our services do not cover Hyderabad
callvalidation ignore spam lead system assistant recently property bangalore move house flat apartment sale rent purchase valuate verify check confirm verification confirmation


In [20]:
question = "my bike got stolen 2 day ago and i want to buy new bike."
print(model_predict(question))

I’m sorry, we don’t have any leads on your missing bicycle.[/box]
[ ] lead – The user is interested
[x] ignore -The property doesn't match their needs[/color][/size][b]/callvalidation/bike-stolen-please-buy-new-bicycle/spam [/code]

I am trying this but not working

Please help me out.

Hi @sagar,

Welcome back!

You can use our API endpoint https://api.callhub.io/v1/call-validation/validation?phone=+919876543210&source=bangalore-real-state-call-agent.

For more details about how you should format an incoming request please refer here.


In [21]:
question = "Finally i got job with good package. I want to invest money on own house, do you have?"
print(model_predict(question))

lead
