## Library Installation

In [1]:
!pip install unsloth==2025.5.5

Collecting unsloth==2025.5.5
  Downloading unsloth-2025.5.5-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.5.7 (from unsloth==2025.5.5)
  Downloading unsloth_zoo-2025.5.8-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth==2025.5.5)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth==2025.5.5)
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting tyro (from unsloth==2025.5.5)
  Downloading tyro-0.9.22-py3-none-any.whl.metadata (10 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from unsloth==2025.5.5)
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=3.4.1->unsloth==2025.5.5)

## Imports

In [2]:
# Standard library
import os

# Third-party libraries
import datasets
import pandas as pd
import torch
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel

2025-05-28 09:42:30.263154: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748425350.433143      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748425350.481394      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


## Parameters

In [3]:
max_seq_length = 512 # Maximum number of tokens per input sequence
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage.

## Model and Tokenizer

In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "microsoft/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)

==((====))==  Unsloth 2025.5.5: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 6.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.37k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

In [5]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (v_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((

In [6]:
tokenizer.padding_side = 'right' # Set padding to the right side for tokenized inputs
EOS_TOKEN = tokenizer.eos_token
EOS_TOKEN

'<|endoftext|>'

## Preparing the model for PEFT

In [7]:
# wraps the base model with LoRA config

model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.5.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [8]:
model.print_trainable_parameters()

trainable params: 59,768,832 || all params: 3,880,848,384 || trainable%: 1.5401


## Training Dataset

In [9]:
import random

# List of 50 NSE-listed companies
companies = [
    "TCS", "Reliance Industries", "HDFC Bank", "Infosys", "ICICI Bank", "HUL", "Bajaj Finance", 
    "Kotak Mahindra Bank", "SBI", "Axis Bank", "Bharti Airtel", "Asian Paints", "Maruti Suzuki", 
    "Tata Motors", "Wipro", "Larsen & Toubro", "ITC", "HCL Technologies", "Nestle India", 
    "Sun Pharma", "Adani Enterprises", "Titan Company", "Mahindra & Mahindra", "Dr Reddy's Labs", 
    "UltraTech Cement", "JSW Steel", "Power Grid", "Tech Mahindra", "Dabur India", "Pidilite Industries", 
    "Godrej Consumer", "Bajaj Auto", "Eicher Motors", "Cipla", "Shree Cement", "Tata Steel", 
    "Hindalco Industries", "Grasim Industries", "Bharat Petroleum", "ONGC", "NTPC", 
    "Adani Ports", "Coal India", "Havells India", "Berger Paints", "Divi's Laboratories", 
    "Britannia Industries", "UPL", "Ambuja Cements", "Hero MotoCorp", "Zee Entertainment"
]

# Financial terms and corresponding Solr fields
query_types = [
    ("results", "report_type:results", ["Q1", "Q2", "Q3", "Q4"]),
    ("annual report", "report_type:annual_report", []),
    ("10K", "report_type:10K", []),
    ("revenue", "financial_metric:revenue", ["Q1", "Q2", "Q3", "Q4", ""]),
    ("earnings per share", "financial_metric:eps", ["Q1", "Q2", "Q3", "Q4", ""]),
    ("profit", "financial_metric:profit", ["Q1", "Q2", "Q3", "Q4", ""]),
    ("balance sheet", "report_type:balance_sheet", ["Q1", "Q2", "Q3", "Q4", ""]),
    ("cash flow", "report_type:cash_flow", ["Q1", "Q2", "Q3", "Q4", ""])
]

years = [2020, 2021, 2022, 2023, 2024, 2025]

# Templates
natural_templates = [
    "Show me {query_type} of {company} for {year}",
    "What is the {query_type} for {company} in {year}?",
    "Give me {company} {query_type} for {year}",
    "{company} {query_type} {year}",
    "Find {query_type} of {company} in {year}",
    "Show {company}'s {query_type} for {quarter} {year}",
    "What are {company}'s {query_type} for {quarter} {year}?",
    "Give me {quarter} {year} {query_type} for {company}"
]

natural_queries = []
solr_queries = []

for _ in range(1000):
    company = random.choice(companies)
    year = random.choice(years)
    query_type, solr_field, quarters = random.choice(query_types)
    quarter = random.choice(quarters) if quarters else ""

    # Generate natural query
    if quarter:
        template = random.choice([t for t in natural_templates if "{quarter}" in t])
        natural_query = template.format(company=company, query_type=query_type, year=year, quarter=quarter)
    else:
        template = random.choice([t for t in natural_templates if "{quarter}" not in t])
        natural_query = template.format(company=company, query_type=query_type, year=year)

    # Generate Solr query
    solr_parts = [f'company_name:"{company}"', f'fiscal_year:{year}']
    if quarter:
        solr_parts.append(f'fiscal_quarter:"{quarter}"')
    solr_parts.append(solr_field)
    solr_query = " AND ".join(solr_parts)

    natural_queries.append(natural_query)
    solr_queries.append(solr_query)

print("Sample Natural Queries and Solr Queries:")
for i in range(5):
    print(f"Natural Query: {natural_queries[i]}")
    print(f"Solr Query: {solr_queries[i]}\n")

print(f"Total rows: {len(natural_queries)}")

Sample Natural Queries and Solr Queries:
Natural Query: Give me Infosys 10K for 2023
Solr Query: company_name:"Infosys" AND fiscal_year:2023 AND report_type:10K

Natural Query: Give me Tata Motors earnings per share for 2023
Solr Query: company_name:"Tata Motors" AND fiscal_year:2023 AND financial_metric:eps

Natural Query: What are Larsen & Toubro's balance sheet for Q3 2023?
Solr Query: company_name:"Larsen & Toubro" AND fiscal_year:2023 AND fiscal_quarter:"Q3" AND report_type:balance_sheet

Natural Query: What are Wipro's earnings per share for Q2 2020?
Solr Query: company_name:"Wipro" AND fiscal_year:2020 AND fiscal_quarter:"Q2" AND financial_metric:eps

Natural Query: Show me annual report of Hindalco Industries for 2025
Solr Query: company_name:"Hindalco Industries" AND fiscal_year:2025 AND report_type:annual_report

Total rows: 1000


In [10]:
df = pd.DataFrame({
    "natural_lang_queries": natural_queries,
    "solr_queries": solr_queries
})
csv_path = "train_data_nse.csv"
df.to_csv(csv_path, index=False)

dataset = load_dataset('csv', data_files=csv_path, split="train")
dataset = dataset.shuffle(seed=42)

Generating train split: 0 examples [00:00, ? examples/s]

In [11]:
query_rewrite_prompt = """ 
Task: Rewrite the natural language queries about company financial performance into concise, search-engine-friendly Solr queries.

### Instruction:
{}

### Input:
{}

### Response:
{}

"""

instruction_to_llm = """
1-Use only the exact words and concepts provided in the input query.

2-Preserve company names exactly as they appear.

3-Eliminate unnecessary words while retaining the financial focus for searchability.

4-Include relevant Solr fields: company_name, fiscal_year, fiscal_quarter (e.g., "Q1"), report_type (e.g., "results", "annual_report"), or financial_metric (e.g., "revenue", "eps").

5-If the query includes temporal details (e.g., year, quarter), map them to fiscal_year and fiscal_quarter.

"""

In [12]:
def formatting_prompts_func(examples, instruction):
    inputs = [item.lower() for item in examples['natural_lang_queries']]
    outputs = [item.lower() for item in examples['solr_queries']]
    instructions = len(inputs) * [instruction]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = query_rewrite_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

In [13]:
cols_to_remove = dataset.column_names

In [14]:
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
    fn_kwargs={"instruction": instruction_to_llm}
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [15]:
dataset = dataset.remove_columns(column_names=cols_to_remove)

In [16]:
dataset = dataset.train_test_split(test_size=0.1)
dataset["validation"] = dataset["test"]
del dataset["test"]

In [17]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 900
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 100
    })
})

In [18]:
print(dataset['train'][0]['text'])

 
Task: Rewrite the natural language queries about company financial performance into concise, search-engine-friendly Solr queries.

### Instruction:

1-Use only the exact words and concepts provided in the input query.

2-Preserve company names exactly as they appear.

3-Eliminate unnecessary words while retaining the financial focus for searchability.

4-Include relevant Solr fields: company_name, fiscal_year, fiscal_quarter (e.g., "Q1"), report_type (e.g., "results", "annual_report"), or financial_metric (e.g., "revenue", "eps").

5-If the query includes temporal details (e.g., year, quarter), map them to fiscal_year and fiscal_quarter.



### Input:
what are berger paints's revenue for q2 2025?

### Response:
company_name:"berger paints" and fiscal_year:2025 and fiscal_quarter:"q2" and financial_metric:revenue

<|endoftext|>


## Training

In [19]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset['train'],
    # eval_dataset = dataset['validation'],
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        # per_device_eval_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # eval_steps = 10,
        num_train_epochs=1,
        learning_rate = 1e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 5,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 3407,
        # output_dir = checkpoint_dir,
        report_to = "none",
    ),
)

  trainer = SFTTrainer(


Converting train dataset to ChatML (num_proc=4):   0%|          | 0/900 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=4):   0%|          | 0/900 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=4):   0%|          | 0/900 [00:00<?, ? examples/s]

In [20]:
# steps per epoch = train_len(900)/(per_device_train_batch_size(8) * gradient_accumulation_steps(4)) = 28

In [21]:
import torch

gpu_stats = torch.cuda.get_device_properties(0)
total_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)  # Total memory in GB

torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
start_reserved_memory = round(torch.cuda.memory_reserved(0) / 1024 / 1024 / 1024, 3)

print(f"GPU = {gpu_stats.name}")
print(f"Total GPU memory: {total_memory} GB")
print(f"Memory available for training: {total_memory - start_reserved_memory:.2f} GB")


GPU = Tesla P100-PCIE-16GB
Total GPU memory: 15.888 GB
Memory available for training: 11.99 GB


In [22]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 28
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 4 x 1) = 32
 "-____-"     Trainable parameters = 59,768,832/4,000,000,000 (1.49% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,1.6212
10,0.518
15,0.1043
20,0.079
25,0.0709


In [23]:
trainer_stats.metrics

{'train_runtime': 800.2402,
 'train_samples_per_second': 1.125,
 'train_steps_per_second': 0.035,
 'total_flos': 5268659158056960.0,
 'train_loss': 0.43468867082680973}

In [24]:
peak_reserved_memory = round(torch.cuda.max_memory_reserved(0) / 1024 / 1024 / 1024, 3)
used_memory_for_training = round(peak_reserved_memory - start_reserved_memory, 3)

print(f"Memory used for training: {used_memory_for_training} GB")


Memory used for training: 0.338 GB


## Inference

In [25]:
FastLanguageModel.for_inference(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.

In [26]:
def get_llm_solr_query(query):
    inputs = tokenizer(
    [
        query_rewrite_prompt.format(
            instruction_to_llm, # instruction
            query, # input
            "", # output - leave this blank for generation
        )
    ], return_tensors = "pt").to("cuda")
    
    outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
    response = (tokenizer.batch_decode(outputs)[0])

    # print(response)
    indx = response.find('### Response:')
    
    if indx != -1:
        result_section = response[indx+len('### Response:'):]
        indx_eos = result_section.find('<|endoftext|>')
        return result_section[:indx_eos].strip()
    
    return response


In [27]:
query = "Show TCS earnings per share for Q3 2024"
result = get_llm_solr_query(query)
print(result)

company_name:"tcs" and fiscal_year:2024 and fiscal_quarter:"q3" and financial_metric:eps


In [28]:
query = "Wipro's annual report for 2024"
result = get_llm_solr_query(query)
print(result)

company_name:"wipro" and fiscal_year:2024 and report_type:annual_report
