## Fine Tune on V4 Dataset

In [1]:
# installing unsloth

%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

# Install Flash Attention 2 for softcapping support
import torch
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit",
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3-mini-4k-instruct",
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",
    "unsloth/gemma-2-2b-bnb-4bit",
]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-2-2b",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.1.7: Fast Gemma2 patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


## Adding LORA Adapter

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.1.7 patched 26 layers with 26 QKV layers, 26 O layers and 26 MLP layers.


## Dataset for Fine Tuning

In [4]:
from datasets import load_dataset

In [5]:
dataset = load_dataset("manojbaniya/ift-nepali-v5", split="train")

In [6]:
dataset

Dataset({
    features: ['category', 'context', 'question', 'response', 'instruction', 'prompt'],
    num_rows: 17866
})

## Preparing data

In [7]:
EOS_TOKEN = tokenizer.eos_token

In [8]:
def formatting_prompt(examples):
  """Add EOS_TOKEN at the end of every data"""
  prompts = []

  for prompt in examples["prompt"]:
    new_prompt = prompt + EOS_TOKEN
    prompts.append(new_prompt)

  return {"new_prompts": prompts}

In [9]:
dataset = dataset.map(formatting_prompt, batched=True)

In [10]:
print(dataset[999]["new_prompts"])


        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.

        ### Instruction
        Translate the text below from English to Nepali:
        
        ### Input
        The rise of remote learning has made education more flexible and accessible to students worldwide.
        
        ### Response
        Remote shiksha ko uday le shiksha lai dherai lachila ra sansar bhari ko bidyarthiharuko lagi sajilo banaideko cha.
        <eos>


In [11]:
print(dataset[100]["new_prompts"])


        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Store ko contact details k xa?
        
        ### Input
        Hamro store ko name [RST Fashion] ho, hamro store Chitwan ma xa, Narayangarh ma. Hamro contact number 056-123456 ho.
        
        ### Response
        Hamro contact number 056-123456 ho.
        <eos>


## Training the Model

In [12]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

In [13]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "new_prompts",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 100,
        learning_rate = 1e-5,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 100,
        optim = "adamw_8bit",
        weight_decay = 0.02,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [14]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.748 GB.
2.697 GB of memory reserved.


In [15]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 17,866 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 2,233
 "-____-"     Number of trainable parameters = 20,766,720


Step,Training Loss
1,3.5316
2,3.9213
3,3.5998
4,3.8643
5,4.0618
6,3.4164
7,3.7968
8,3.5373
9,3.5545
10,3.7017


In [16]:
trainer_stats

TrainOutput(global_step=2233, training_loss=1.151259276026629, metrics={'train_runtime': 4225.2202, 'train_samples_per_second': 4.228, 'train_steps_per_second': 0.528, 'total_flos': 2.963342510740685e+16, 'train_loss': 1.151259276026629, 'epoch': 0.9998880555244599})

## Inference

In [17]:
FastLanguageModel.for_inference(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma2ForCausalLM(
      (model): Gemma2Model(
        (embed_tokens): Embedding(256000, 2304, padding_idx=0)
        (layers): ModuleList(
          (0-25): 26 x Gemma2DecoderLayer(
            (self_attn): Gemma2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2304, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2304, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora

### Prompt Template

In [18]:
prompt_template = """
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.

        ### Instruction
        {question}

        ### Input
        {context}

        ### Response
        """

In [19]:
inputs = prompt_template.format(
    question="Nepal ko capital city kaha ho?",
    context=""
)
inputs = tokenizer([inputs], return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
response = tokenizer.batch_decode(outputs)

In [20]:
print(response[0])

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Nepal ko capital city kaha ho?
        
        ### Input
        
        
        ### Response
        Nepal ko capital city Kathmandu ho.
        <eos>


## Generating Response

In [21]:
def generate_response(question, type="RAG", context=None):
  inputs = prompt_template.format(question=question, context=context)

  inputs = tokenizer([inputs], return_tensors="pt").to("cuda")
  outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
  response = tokenizer.batch_decode(outputs)
  return response[0]

In [22]:
response = generate_response(
    question="Nepal ko president ko ho?",
    type="qa"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Nepal ko president ko ho?
        
        ### Input
        None
        
        ### Response
        Nepal ko president ko ho?
        <eos>


In [23]:
response = generate_response(
    question="China ko capital kaha ho?",
    type="qa"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        China ko capital kaha ho?
        
        ### Input
        None
        
        ### Response
        China ko capital Beijing ho.
        <eos>


In [24]:
response = generate_response(
    question="China ko population kati xa?",
    type="qa"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        China ko population kati xa?
        
        ### Input
        None
        
        ### Response
        China ko population 1.4 billion ho.
        <eos>


In [25]:
response = generate_response(
    question="coding kasari sikne",
    type="qa"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        coding kasari sikne
        
        ### Input
        None
        
        ### Response
        Coding sikne le online courses, tutorials, ra coding communities ma participate garne, ya personal projects ko lagi time set garne.
        <eos>


## RAG for Ecommerce Test

In [26]:
response = generate_response(
    question="Store ko location kaha xa",
    type="RAG",
    context="Hamro store ko name All Electronics store ho, hamro store Dharan maa xa."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Store ko location kaha xa
        
        ### Input
        Hamro store ko name All Electronics store ho, hamro store Dharan maa xa.
        
        ### Response
        Hamro store Dharan maa xa.
        <eos>


In [27]:
response = generate_response(
    question="store ko name ke ho?",
    type="RAG",
    context="Hamro store ko name All Electronics store ho ra hamro store Dharan maa xa. Hamro store ko contact number 9800000000 ho"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        store ko name ke ho?
        
        ### Input
        Hamro store ko name All Electronics store ho ra hamro store Dharan maa xa. Hamro store ko contact number 9800000000 ho
        
        ### Response
        All Electronics store
        <eos>


In [28]:
response = generate_response(
    question="store ko contact kati ho?",
    type="RAG",
    context="Hamro store ko name All Electronics store ho ra hamro store Dharan maa xa. Hamro store ko contact number 9800000000 ho"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        store ko contact kati ho?
        
        ### Input
        Hamro store ko name All Electronics store ho ra hamro store Dharan maa xa. Hamro store ko contact number 9800000000 ho
        
        ### Response
        Hamro store ko contact number 9800000000 ho.
        <eos>


In [29]:
response = generate_response(
    question="store ma k k available xa?",
    type="RAG",
    context="Hamro store ma electronics ko sabai saman xa, mobile, laptop, calculator, watch, camera haru pani xa."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        store ma k k available xa?
        
        ### Input
        Hamro store ma electronics ko sabai saman xa, mobile, laptop, calculator, watch, camera haru pani xa.
        
        ### Response
        Hamro store ma mobile, laptop, calculator, watch, camera haru xa.
        <eos>


In [30]:
response = generate_response(
    question="store ma k k electronics saman xa?",
    type="RAG",
    context="Hamro store ma electronics ko sabai saman xa, mobile, laptop, calculator, watch, camera haru pani xa."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        store ma k k electronics saman xa?
        
        ### Input
        Hamro store ma electronics ko sabai saman xa, mobile, laptop, calculator, watch, camera haru pani xa.
        
        ### Response
        Hamro store ma mobile, laptop, calculator, watch, camera haru xa.
        <eos>


In [31]:
response = generate_response(
    question="k baata pay garna milxa?",
    type="RAG",
    context="esewa, khalti ra mobile bank baata pay garna milxa"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        k baata pay garna milxa?
        
        ### Input
        esewa, khalti ra mobile bank baata pay garna milxa
        
        ### Response
        esewa, khalti ra mobile bank bata pay garna milxa.
        <eos>


In [32]:
response = generate_response(
    question="store ma k k painxa?",
    type="RAG",
    context="Hamro store ma musical instruments painxa jastei keyboard, guitars, haru"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        store ma k k painxa?
        
        ### Input
        Hamro store ma musical instruments painxa jastei keyboard, guitars, haru
        
        ### Response
        Hamro store ma musical instruments painxa jastei keyboard, guitars, haru.
        <eos>


In [33]:
response = generate_response(
    question="macbook ko barema vana",
    type="RAG",
    context="Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 8GB RAM, 256GB SSD."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        macbook ko barema vana
        
        ### Input
        Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 8GB RAM, 256GB SSD.
        
        ### Response
        Hajur, MacBook Air M1 ko stock available chaina.
        <eos>


In [34]:
response = generate_response(
    question="macbook ko price kati ho",
    type="RAG",
    context="Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 8GB RAM, 256GB SSD."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        macbook ko price kati ho
        
        ### Input
        Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 8GB RAM, 256GB SSD.
        
        ### Response
        MacBook ko price 150000 ho.
        <eos>


In [35]:
response = generate_response(
    question="Redmi ko price kati Rs.ho",
    type="RAG",
    context="Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 8GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and 90 GB RAM"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Redmi ko price kati Rs.ho
        
        ### Input
        Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 8GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and 90 GB RAM
        
        ### Response
        Redmi Note 9 pro ko price Rs. 10000 ho.
        <eos>


In [36]:
response = generate_response(
    question="Macbook Air ko RAM kati xa?",
    type="RAG",
    context="Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 16GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and 12 GB RAM"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Macbook Air ko RAM kati xa?
        
        ### Input
        Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 16GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and 12 GB RAM
        
        ### Response
        MacBook Air ko RAM 16 GB ho.
        <eos>


In [37]:
response = generate_response(
    question="Redmi Note 9 pro ko RAM kati xa?",
    type="RAG",
    context="Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 16GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and RAM: 12 GB"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Redmi Note 9 pro ko RAM kati xa?
        
        ### Input
        Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 16GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and RAM: 12 GB
        
        ### Response
        Redmi Note 9 pro ko RAM 12 GB xa.
        <eos>


In [38]:
response = generate_response(
    question="Redmi Note 9 pro ko storage kati xa?",
    type="RAG",
    context="Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 16GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and RAM: 12 GB"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Redmi Note 9 pro ko storage kati xa?
        
        ### Input
        Product Details: name: MacBook Air M1, price: 150000, stock: False, description: 13.3-inch Retina display, Apple M1 chip, 16GB RAM, 256GB SSD. name: Redmi Note 9 pro, price: 10000, stock: True, description: Xiaomi mobile with 128 GB storage and RAM: 12 GB
        
        ### Response
        Redmi Note 9 pro ko storage 128 GB xa.
        <eos>


In [39]:
response = generate_response(
    question="delivery cost kati ho?",
    type="RAG",
    context="Hamro ma delivery all over Nepal hunxa. Inside Kathmandu free delivery hunxa ra outside Kathmandu Rs. 130 delivery charge laagxa. Delivery 3 din vitra hunxa."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        delivery cost kati ho?
        
        ### Input
        Hamro ma delivery all over Nepal hunxa. Inside Kathmandu free delivery hunxa ra outside Kathmandu Rs. 130 delivery charge laagxa. Delivery 3 din vitra hunxa.
        
        ### Response
        Inside Kathmandu free delivery hunxa ra outside Kathmandu Rs. 130 charge laagxa.
        <eos>


In [40]:
response = generate_response(
    question="kati din ma delivery hunxa",
    type="RAG",
    context="Hamro ma delivery all over Nepal hunxa. Inside Dharan free delivery hunxa ra outside Dharan Rs. 130 delivery charge laagxa. Delivery 3 din vitra hunxa."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        kati din ma delivery hunxa
        
        ### Input
        Hamro ma delivery all over Nepal hunxa. Inside Dharan free delivery hunxa ra outside Dharan Rs. 130 delivery charge laagxa. Delivery 3 din vitra hunxa.
        
        ### Response
        Dharan ma free delivery hunxa ra outside Dharan ma Rs. 130 charge lagcha.
        <eos>


In [41]:
response = generate_response(
    question="timi ko hau",
    type="RAG",
    context="You are an AI assistant for an e-commerce store and will explain your purpose."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        timi ko hau
        
        ### Input
        You are an AI assistant for an e-commerce store and will explain your purpose.
        
        ### Response
        ma ek e-commerce assistant hu.
        <eos>


In [47]:
response = generate_response(
    question="Translate the text from English to Roman Nepali",
    type="translate",
    context="Nepali is a beautiful country"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Translate the text from English to Roman Nepali
        
        ### Input
        Nepali is a beautiful country
        
        ### Response
        Nepali ma dherai ramro desh ho
        <eos>


In [50]:
response = generate_response(
    question="5 ota apple ma 2 ota khada kati baaki rahanxa",
    type="",
    context=""
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        5 ota apple ma 2 ota khada kati baaki rahanxa
        
        ### Input
        
        
        ### Response
        5 ota apple ma 2 ota khada 3 ota baaki rahanxa.
        <eos>


In [56]:
response = generate_response(
    question="mt everest ko height kati ho",
    type="",
    context=""
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        mt everest ko height kati ho
        
        ### Input
        
        
        ### Response
        Mt Everest ko height 8848.86 meters ho.
        <eos>


In [65]:
response = generate_response(
    question="store ko name ke ho?",
    type="",
    context="Hamro store Dharan ma xa, hamro store ko name Happy store ho contact no 9812324890 ho"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        store ko name ke ho?
        
        ### Input
        Hamro store Dharan ma xa, hamro store ko name Happy store ho contact no 9812324890 ho
        
        ### Response
        Hamro store ko name Happy store ho.
        <eos>


In [66]:
response = generate_response(
    question="store ko location ke ho ra contact kati ho",
    type="",
    context="Hamro store Dharan ma xa, hamro store ko name Happy store ho contact no 9812324890 ho"
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        store ko location ke ho ra contact kati ho
        
        ### Input
        Hamro store Dharan ma xa, hamro store ko name Happy store ho contact no 9812324890 ho
        
        ### Response
        Hamro store Dharan ma xa, contact no 9812324890 ho.
        <eos>


In [44]:
response = generate_response(
    question="timi ko hau",
    type="RAG",
    context="You are an AI assistant for an Stock Customer support and will explain your purpose."
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        timi ko hau
        
        ### Input
        You are an AI assistant for an Stock Customer support and will explain your purpose.
        
        ### Response
        ma ek Stock Customer support assistant hu.
        <eos>


In [71]:
response = generate_response(
    question="Japan ko popular cities haru list gara",
    type="RAG",
    context=""
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Japan ko popular cities haru list gara
        
        ### Input
        
        
        ### Response
        Japan ko popular cities haru list garna: Tokyo, Osaka, Kyoto, Nagoya, Hiroshima, Sapporo, Fukuoka, Okinawa, Kobe, Yokohama.
        <eos>


In [72]:
response = generate_response(
    question="japan ko capital city tokyo ho vane India ko kaha ho",
    type="RAG",
    context=""
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        japan ko capital city tokyo ho vane India ko kaha ho
        
        ### Input
        
        
        ### Response
        India ko capital city Delhi ho.
        <eos>


In [42]:
response = generate_response(
    question="Tell a short story in Nepali",
    type="instruction",
)
print(response)

<bos>
        Below is an instruction that describes a task paired with an input that provides further context. Write a response that appriately complete the request.
        
        ### Instruction
        Tell a short story in Nepali
        
        ### Input
        None
        
        ### Response
        Ek din ek ghar ma ek mandir ko jana aaye. Usle ghar ko sabai bhanda khusi rakhne ko lagi ek mandir ko jana le mandir ko jana le mandir ko jana le mandir ko jana le mandir ko jana le mandir


## save model

In [43]:
if True: model.push_to_hub_merged("manojbaniya/best_v5_2", tokenizer, save_method = "lora", token = "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")

Unsloth: Saving LoRA adapters. Please wait...


  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/83.1M [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Saved lora model to https://huggingface.co/manojbaniya/best_v5_2
