## Installing Unsloth and Flash Attention

In [1]:
# installing unsloth

%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

# Install Flash Attention 2 for softcapping support
import torch
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"

## Import Necessary Library

In [2]:
from unsloth import FastLanguageModel
import torch

# config
max_seq_length = 2048
dtype = None
load_in_4bit = True

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


## Load Fine Tuned Large Language Gemma Model from HuggingFace Hub

In [3]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "manojbaniya/roman-nepali-gemma-1500step",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "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", # for accessing gated gemma model from huggingface hub
)

==((====))==  Unsloth 2025.3.2: Fast Gemma2 patching. Transformers: 4.48.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/6.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/46.4k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/216M [00:00<?, ?B/s]

Unsloth 2025.3.2 patched 42 layers with 42 QKV layers, 42 O layers and 42 MLP layers.


## Inference Mode

In [4]:
FastLanguageModel.for_inference(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma2ForCausalLM(
      (model): Gemma2Model(
        (embed_tokens): Embedding(256000, 3584, padding_idx=0)
        (layers): ModuleList(
          (0-41): 42 x Gemma2DecoderLayer(
            (self_attn): Gemma2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3584, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3584, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora

## Prompt Template

In [5]:
prompt_template_rag = """<start_of_turn>system
You are an AI assistant who responds to user instructions. Use the context information to answer if it is given, and respond in Roman Nepali.<end_of_turn>
<start_of_turn>user
{question}
Context: {context}<end_of_turn>
<start_of_turn>model
"""
prompt_template_instruction = """<start_of_turn>system
You are helpful AI Assistant follow the user instruction and respond in Roman Nepali language.<end_of_turn>
<start_of_turn>user
{question}<end_of_turn>
<start_of_turn>model
"""

## Response Generation

In [6]:
def generate_response(question, type="instruction", context=None):
  if type == "instruction":
    prompt_template = prompt_template_instruction
  elif type == "RAG":
    prompt_template = prompt_template_rag
  inputs = prompt_template.format(question=question, context=context)

  inputs = tokenizer([inputs], return_tensors="pt").to("cuda")
  outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True, top_k=10, do_sample=True, temperature=0.8)
  response = tokenizer.batch_decode(outputs)
  return response[0]

## Test

In [7]:
response = generate_response(
    question="Hi",
    type="instruction",
)
print(response)

<bos><start_of_turn>system
You are helpful AI Assistant follow the user instruction and respond in Roman Nepali language.<end_of_turn>
<start_of_turn>user
Hi<end_of_turn>
<start_of_turn>model
Namaste! Aba tapai ko shopping ma kasari madat garna sakchhu?<eos>


## Stream Token Generation (Streaming)

In [8]:
prompt_template_rag = """<start_of_turn>system
You are an AI assistant who responds to user instructions. Use the context information to answer if it is given, and respond in Roman Nepali.<end_of_turn>
<start_of_turn>user
{question}
Context: {context}<end_of_turn>
<start_of_turn>model
"""
prompt_template_instruction = """<start_of_turn>system
You are helpful AI Assistant follow the user instruction and respond in Roman Nepali language.<end_of_turn>
<start_of_turn>user
{question}<end_of_turn>
<start_of_turn>model
"""

In [9]:
def get_inputs(question, instruct_type="RAG", context=None):
  if instruct_type == "instruction":
    prompt = prompt_template_instruction
  elif instruct_type == "RAG":
    prompt = prompt_template_rag
  inputs = prompt.format(question=question, context=context, response=response)
  inputs = tokenizer(inputs, return_tensors="pt").to("cuda")
  return inputs

In [10]:
get_inputs("Hi", instruct_type="instruction")["input_ids"]

tensor([[     2,    106,   9020,    108,   2045,    708,  10055,  16481,  18145,
           1611,    573,   2425,  14239,    578,   8702,    575,   9783, 205184,
           5255, 235265,    107,    108,    106,   1645,    108,   2151,    107,
            108,    106,   2516,    108]], device='cuda:0')

In [11]:
def generate_tokens(question: str, context: str, instruct_type="RAG", max_new_tokens=100):
    inputs = get_inputs(question, instruct_type, context)
    is_done = False
    tokens = []

    while not is_done:
        with torch.no_grad():
            outputs = model(inputs["input_ids"])
            logits = outputs.logits[:, -1, :]
            preds = logits.softmax(dim=1)
            label = preds.argmax(dim=-1)

            # Decode and yield the token
            decoded_token = tokenizer.decode(label)
            print(decoded_token)
            # yield decoded_token  # Yield the decoded token one by one
            # Append token to the list
            tokens.append(label.item())

            # Update the input_ids for the next iteration
            inputs["input_ids"] = torch.cat([inputs["input_ids"], label.unsqueeze(-1)], dim=-1)

            # Check if generation is done
            if label == tokenizer.eos_token_id or len(tokens) >= max_new_tokens:
                is_done = True

In [12]:
generate_tokens("Hi", "instruction")

Namaste
!
 Aba
 ta
pai
 ko
 lagi
 ke
 mad
at
 gar
na
 sak
ch
hu
?
<eos>


In [58]:
def generate_tokens_sample(question: str, context: str, instruct_type="RAG", max_new_tokens=100, top_k=10):
    inputs = get_inputs(question, instruct_type, context)
    is_done = False
    tokens = []

    while not is_done:
        with torch.no_grad():
            outputs = model(inputs["input_ids"])
            logits = outputs.logits[:, -1, :]
            preds = logits.softmax(dim=-1)

            # Apply Top-k sampling
            top_k_probs, top_k_indices = torch.topk(preds, top_k, dim=-1)  # Get top 10 probabilities & indices
            sampled_index = torch.multinomial(top_k_probs, 1)  # Sample from top 10
            label = top_k_indices.gather(-1, sampled_index)  # Get actual token index

            # Decode and yield the token
            decoded_token = tokenizer.decode(label.item()) # Get the integer value before decoding
            # print(decoded_token)
            yield decoded_token  # Yield the decoded token one by one

            # Append token to the list
            tokens.append(label.item())

            # Update the input_ids for the next iteration
            # Ensure label has the correct dimensions before concatenating
            inputs["input_ids"] = torch.cat([inputs["input_ids"], label], dim=-1)

            # Check if generation is done
            if label.item() == tokenizer.eos_token_id or len(tokens) >= max_new_tokens:
                is_done = True

In [61]:
generate_tokens_sample("Hi", context="", instruct_type="instruction")

<generator object generate_tokens_sample at 0x7c90e0163b60>

## Retrieve from vectorostore

In [19]:
!pip install "pinecone[grpc]"

Collecting pinecone[grpc]
  Downloading pinecone-6.0.1-py3-none-any.whl.metadata (8.8 kB)
Collecting lz4>=3.1.3 (from pinecone[grpc])
  Downloading lz4-4.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone[grpc])
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting protobuf<6.0,>=5.29 (from pinecone[grpc])
  Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Collecting protoc-gen-openapiv2<0.0.2,>=0.0.1 (from pinecone[grpc])
  Downloading protoc_gen_openapiv2-0.0.1-py3-none-any.whl.metadata (1.5 kB)
Downloading lz4-4.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Downloading protobuf-5.29.3-cp38-abi3-manylin

In [37]:
from google.colab import userdata

pinecone_api_key = userdata.get('PINECONE_API_KEY')
jina_api_key = userdata.get('JINA_API_KEY')
index_name = userdata.get('PINECONE_INDEX_NAME')

In [38]:
jina_api_key

'jina_55ac40e48af84541b9005943851a4b12UUbnwlPPgHCQzpF-VuXK16iWTFfe'

In [39]:
import requests
import json

url = 'https://api.jina.ai/v1/embeddings'

headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {jina_api_key}'
}

data = {
"model": "jina-clip-v2",
"dimensions": 1024,
"normalized": True,
"embedding_type": "float",
"input": [
{
"text": "A beautiful sunset over the beach"
},
]
}

response = requests.post(url, headers=headers, json=data)
print(response.json())


{'model': 'jina-clip-v2', 'object': 'list', 'usage': {'total_tokens': 9, 'prompt_tokens': 9}, 'data': [{'object': 'embedding', 'index': 0, 'embedding': [0.033264488, 0.03715714, -0.09023877, -0.13199632, -0.058832143, -0.02020051, -0.104511835, 0.07620163, -0.10362714, -0.17646694, 0.012592142, -0.09985245, -0.124446936, 0.011692704, 0.02601, -0.046269488, 0.16278367, 0.06747265, 0.035800613, -0.070067756, -0.025729846, 0.06275429, 0.036449388, 0.018799745, -0.04178704, 0.11512816, 0.03574163, -0.012474184, -0.042819183, 0.05358296, 0.055352345, 0.005765255, 0.1297551, -0.14166898, -0.025007347, 0.024771428, -0.077676125, 0.13541713, -0.14992613, 0.10144489, -0.030610407, -0.10757878, -0.097670205, 0.0077410713, -0.09784714, -0.023916224, 0.026201684, 0.055263877, -0.1277498, -0.028723061, -0.0531701, -0.03173102, 0.123267345, 0.031141223, 0.08068408, -0.00827926, 0.038454693, 0.01592449, -0.11070469, -0.0073798215, -0.038366225, 0.076909386, -0.039663773, 0.08115592, 0.022441734, 0.04

In [40]:
import requests
url = 'https://api.jina.ai/v1/embeddings'

def get_embeddings(texts):
  # returns embeddings given list of texts
  headers = {
      'Content-Type': 'application/json',
      'Authorization': f'Bearer {jina_api_key}'
  }
  data = {
      'input': texts,
      'model': 'jina-clip-v2'
  }
  response = requests.post(url, headers=headers, json=data)
  return response.json()

In [41]:
default_context = """
Hamro store ko name All Electronics store ho hamro ma sabai prakar ko electronics saman available xa. Hamro store ko location Dharan ho.
Store ko contact no 9844644186 ho. Email allelectronicsstore@gmail.com.

Discount Offer:
aile hamro store ma kunei pani saman ma discount offer available xaina.

Return Policy:
Saman kineko 1 week vitra return garna milxa. Tara return guideline follow garnu parne hunxa ra same condition ma hunu parne xa.Payment Methods:

Hami online payment accept garxau. Esewa, Khalti wa kunei pani mobile banking baata pay accept garxau. Hami sanga Cash on Delivery (COD) chai available xaina aile.Products Available:

1. name: 'I Phone 11 Pro' category: mobile price: Rs. 1,30,000 stock: True RAM: 16GB storage: 64GB description: "I Phone 11 Pro features a triple-camera system and a powerful A13 Bionic chip."
2. name: 'Samsung Galaxy S21 Ultra' category: mobile price: Rs. 1,40,000 stock: True RAM: 16GB storage: 128GB description: "The Galaxy S21 Ultra offers a stunning display and a versatile camera system."
3. name: 'Macbook Pro' price: Rs. 4,10,000 category: laptop stock: True description: The MacBook Pro is a high-performance laptop with a Retina display and M1 chip.
4. name: 'Redmi Note 9' Pro price: Rs. 30,000 category: mobile stock: True RAM: 8GB storage: 128GB description: "The Redmi Note 9 Pro features a quad-camera setup and a large battery."
5. name: 'Sony Bravia TV' price: Rs. 80,000 category: television stock: False  description: "The Sony Bravia TV offers stunning 4K resolution and smart TV capabilities."
6. name: 'Rolex Watch' price: Rs. 10,00,000 category: watch stock: False description: "The Rolex Watch is a luxury timepiece known for its precision and craftsmanship."
7. name: 'Nikon D3500 Camera'price: Rs. 60,000 category: camera stock: True description: "The Nikon D3500 is a beginner-friendly DSLR camera with excellent image quality."
8. name: 'Casio Calculator' price: Rs. 1500 category: calculator stock: True description: "Casio Calculator best for Engineering students but everyone can use it.Store Information:
"""

In [42]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=pinecone_api_key)

index = pc.Index(index_name)

In [44]:
# get_embeddings("store ma k k available xa")

In [50]:
def retrieve_context(question: str):
  try:
    embedding_response = get_embeddings([question])
    embedding = embedding_response["data"][0]["embedding"]
    results = index.query(
        namespace="ns1",
        vector = embedding,
        top_k = 5,
        include_metadata = True,
        include_values = False
    )
    # print(results)
    context = ""
    for ctx in results["matches"]:
      context += ctx["metadata"]["text"] +"\n"
    return context
  except Exception as e:
    return default_context

In [52]:
print(retrieve_context("kun mobile haru store ma available xa"))

{'matches': [{'id': 'doc4',
              'metadata': {'text': 'Discount Offer:\n'
                                   'aile hamro store ma kunei pani saman ma '
                                   'discount offer available xaina.'},
              'score': 0.555275798,
              'values': []},
             {'id': 'doc1',
              'metadata': {'text': 'Store Information:\n'
                                   'Hamro store ko name All Electronics store '
                                   'ho hamro ma sabai prakar ko electronics '
                                   'saman available xa. Hamro store ko '
                                   'location Dharan ho.\n'
                                   'Store ko contact no 9844644186 ho. Email '
                                   'allelectronicsstore@gmail.com.'},
              'score': 0.484402359,
              'values': []},
             {'id': 'doc2',
              'metadata': {'text': 'Payment Methods:\n'
                             

## Serving

In [53]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [63]:
from pyngrok import ngrok

ngrok.set_auth_token("2riLb0UtZa6Vf4YtDV7fdJExpKu_5njCcgcsSrLJnKfw8gXP6")

# Start the Flask server
public_url = ngrok.connect(6000)
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://ef11-34-125-111-148.ngrok-free.app" -> "http://localhost:6000"


## Implement RAG

In [55]:
!pip install flask_cors

Collecting flask_cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Downloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Installing collected packages: flask_cors
Successfully installed flask_cors-5.0.1


In [64]:
from flask import Flask, request, jsonify, Response
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

@app.route('/')
def home():
    return "Hello, this is your server running on Google Colab!"

@app.route('/chat', methods=['POST'])
def chat():
    # Get the user's query from the request
    user_query = request.json.get('question', '')

    if not user_query:
        # Return a single JSON response if query is missing
        return jsonify({"response": "Please provide a valid query."})

    def generate_response():
        # Call the generator function and yield tokens as they are generated
        ecommerce_context = retrieve_context(user_query)
        try:
          for token in generate_tokens_sample(user_query, context=ecommerce_context, instruct_type="RAG"):
            # print(f"Sent: {token}")
            yield f"data: {token}\n\n"  # Format for server-sent events (SSE)
        except Exception as e:
          print(f"Error: {e}")
          return jsonify({"response": f"Error: {e}"})

    # Return a streaming response with the correct content type for SSE
    return Response(generate_response(), content_type='text/event-stream')


@app.route("/chat-instruct", methods=["POST"])
def chat_instruct():
  # get the user's query from the request
  user_query = request.json.get("question", "")

  if not user_query:
    # Return a single JSON response if query is missing
    return jsonify({"response": "Please provide a valid query."})

  def generate_response_instruct():
        try:
          for token in generate_tokens_sample(user_query, context="", instruct_type="instruction", max_new_tokens=200):
            # print(f"Sent: {token}")
            yield f"data: {token}\n\n"  # Format for server-sent events (SSE)
        except Exception as e:
          print(f"Error: {e}")
          return jsonify({"response": f"Error: {e}"})

  # Return a streaming response with the correct content type for SSE
  return Response(generate_response_instruct(), content_type='text/event-stream')

In [None]:
app.run(host='localhost', port=6000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://localhost:6000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:03:12] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:03:13] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:03:42] "OPTIONS /chat HTTP/1.1" 200 -


{'matches': [{'id': 'doc1',
              'metadata': {'text': 'Store Information:\n'
                                   'Hamro store ko name All Electronics store '
                                   'ho hamro ma sabai prakar ko electronics '
                                   'saman available xa. Hamro store ko '
                                   'location Dharan ho.\n'
                                   'Store ko contact no 9844644186 ho. Email '
                                   'allelectronicsstore@gmail.com.'},
              'score': 0.356707811,
              'values': []},
             {'id': 'doc2',
              'metadata': {'text': 'Payment Methods:\n'
                                   'Hami online payment accept garxau. Esewa, '
                                   'Khalti wa kunei pani mobile banking baata '
                                   'pay accept garxau. Hami sanga Cash on '
                                   'Delivery (COD) chai available xaina aile.'},
         

INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:03:45] "POST /chat HTTP/1.1" 200 -


Haj
ur
,
 ma
 AI
 assistant
 hu
.
 Ta
pai
 s
anga
 ko
hi
 sa
hay
og
 cha
 bh
ane
 b
han
in
uh
os
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:04:51] "OPTIONS /chat HTTP/1.1" 200 -


{'matches': [{'id': 'doc8',
              'metadata': {'text': "mobile: name: 'Redmi Note 9' Pro price: "
                                   'Rs. 30,000 category: mobile stock: True '
                                   'RAM: 8GB storage: 128GB description: "The '
                                   'Redmi Note 9 Pro features a quad-camera '
                                   'setup and a large battery.'},
              'score': 0.514705658,
              'values': []},
             {'id': 'doc5',
              'metadata': {'text': 'Products Available:\n'
                                   "1. name: 'I Phone 11 Pro' category: mobile "
                                   'price: Rs. 1,30,000 stock: True RAM: 16GB '
                                   'storage: 64GB description: "I Phone 11 Pro '
                                   'features a triple-camera system and a '
                                   'powerful A13 Bionic chip."\n'
                                   "2. name: 'Samsung Ga

INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:04:55] "POST /chat HTTP/1.1" 200 -


Red
mi
 mobile
 ham
ile
 stock
 ma
 xa
,
 ra
 is
ko
 price
 Rs
.
 
3
0
,
0
0
0
 ho
.


<end_of_turn>


<start_of_turn>
model


Ta
pai
 ko
 mobile
 ko
 model
 available
 xa
 ki
 x
aina
?


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:05:47] "OPTIONS /chat HTTP/1.1" 200 -


{'matches': [{'id': 'doc8',
              'metadata': {'text': "mobile: name: 'Redmi Note 9' Pro price: "
                                   'Rs. 30,000 category: mobile stock: True '
                                   'RAM: 8GB storage: 128GB description: "The '
                                   'Redmi Note 9 Pro features a quad-camera '
                                   'setup and a large battery.'},
              'score': 0.466735244,
              'values': []},
             {'id': 'doc5',
              'metadata': {'text': 'Products Available:\n'
                                   "1. name: 'I Phone 11 Pro' category: mobile "
                                   'price: Rs. 1,30,000 stock: True RAM: 16GB '
                                   'storage: 64GB description: "I Phone 11 Pro '
                                   'features a triple-camera system and a '
                                   'powerful A13 Bionic chip."\n'
                                   "2. name: 'Samsung Ga

INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:05:50] "POST /chat HTTP/1.1" 200 -


H
ami
 s
anga
 Redmi
 Note
 
9
 Pro
 available
 xa
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:06:04] "OPTIONS /chat HTTP/1.1" 200 -


{'matches': [{'id': 'doc8',
              'metadata': {'text': "mobile: name: 'Redmi Note 9' Pro price: "
                                   'Rs. 30,000 category: mobile stock: True '
                                   'RAM: 8GB storage: 128GB description: "The '
                                   'Redmi Note 9 Pro features a quad-camera '
                                   'setup and a large battery.'},
              'score': 0.559125721,
              'values': []},
             {'id': 'doc5',
              'metadata': {'text': 'Products Available:\n'
                                   "1. name: 'I Phone 11 Pro' category: mobile "
                                   'price: Rs. 1,30,000 stock: True RAM: 16GB '
                                   'storage: 64GB description: "I Phone 11 Pro '
                                   'features a triple-camera system and a '
                                   'powerful A13 Bionic chip."\n'
                                   "2. name: 'Samsung Ga

INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:06:07] "POST /chat HTTP/1.1" 200 -


Red
mi
 Note
 
9
 ko
 price
 Rs
.
 
3
0
,
0
0
0
 ho
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:06:31] "OPTIONS /chat HTTP/1.1" 200 -


{'matches': [{'id': 'doc9',
              'metadata': {'text': "laptop: name: 'Macbook Pro' price: Rs. "
                                   '4,10,000 category: laptop stock: True '
                                   'description: The MacBook Pro is a '
                                   'high-performance laptop with a Retina '
                                   'display and M1 chip.'},
              'score': 0.559643149,
              'values': []},
             {'id': 'doc5',
              'metadata': {'text': 'Products Available:\n'
                                   "1. name: 'I Phone 11 Pro' category: mobile "
                                   'price: Rs. 1,30,000 stock: True RAM: 16GB '
                                   'storage: 64GB description: "I Phone 11 Pro '
                                   'features a triple-camera system and a '
                                   'powerful A13 Bionic chip."\n'
                                   "2. name: 'Samsung Galaxy S21 Ultra' "
 

INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:06:34] "POST /chat HTTP/1.1" 200 -


Mac
book
 Pro
 ko
 name
:
 '
Mac
book
 Pro
'
 ho
.
 Price
:
 Rs
.
 
4
,
1
0
,
0
0
0
 ho
.
 Category
:
 laptop
 ho
,
 ra
 stock
 available
 cha
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:07:25] "OPTIONS /chat HTTP/1.1" 200 -


{'matches': [{'id': 'doc9',
              'metadata': {'text': "laptop: name: 'Macbook Pro' price: Rs. "
                                   '4,10,000 category: laptop stock: True '
                                   'description: The MacBook Pro is a '
                                   'high-performance laptop with a Retina '
                                   'display and M1 chip.'},
              'score': 0.421252847,
              'values': []},
             {'id': 'doc1',
              'metadata': {'text': 'Store Information:\n'
                                   'Hamro store ko name All Electronics store '
                                   'ho hamro ma sabai prakar ko electronics '
                                   'saman available xa. Hamro store ko '
                                   'location Dharan ho.\n'
                                   'Store ko contact no 9844644186 ho. Email '
                                   'allelectronicsstore@gmail.com.'},
              'score'

INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:07:27] "POST /chat HTTP/1.1" 200 -


Mac
book
 ma
 M
1
 chip
 xa
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:07:48] "OPTIONS /chat HTTP/1.1" 200 -


{'matches': [{'id': 'doc7',
              'metadata': {'text': "mobile: name: 'Samsung Galaxy S21 Ultra' "
                                   'category: mobile price: Rs. 1,40,000 '
                                   'stock: True RAM: 16GB storage: 128GB '
                                   'description: "The Galaxy S21 Ultra offers '
                                   'a stunning display and a versatile camera '
                                   'system.'},
              'score': 0.394792467,
              'values': []},
             {'id': 'doc8',
              'metadata': {'text': "mobile: name: 'Redmi Note 9' Pro price: "
                                   'Rs. 30,000 category: mobile stock: True '
                                   'RAM: 8GB storage: 128GB description: "The '
                                   'Redmi Note 9 Pro features a quad-camera '
                                   'setup and a large battery.'},
              'score': 0.342945069,
              'values': []}

INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:07:51] "POST /chat HTTP/1.1" 200 -


Samsung
 Galaxy
 S
2
1
 Ultra
 ma
 
1
6
GB
 RAM
 ra
 
1
2
8
GB
 storage
 xa
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:08:26] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:08:27] "POST /chat-instruct HTTP/1.1" 200 -


Namaste
!
 Aba
 k
 sama
yd
ar
 ma
 mad
at
 gar
na
 mil
xa
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:08:57] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:08:58] "POST /chat-instruct HTTP/1.1" 200 -


Sher
 Bahadur
 De
uba
 ho
,
 Nepal
 ko
 
4
2
nd
 prime
 minister
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:09:13] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:09:14] "POST /chat-instruct HTTP/1.1" 200 -


Nep
ali
 ko
 popular
 person
 Sita
 Ram
 Yadav
,
 Ram
krishna
 Yadav
,
 ra
 Ram
 Bahadur
 Bom
jan
 j
asto
 manche
 har
u
 ho
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:09:33] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:09:34] "POST /chat-instruct HTTP/1.1" 200 -


India
 ko
 prime
 minister
 Narendra
 Modi
 ho
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:09:46] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:09:47] "POST /chat-instruct HTTP/1.1" 200 -


8
8
4
8
.
8
6
 meter
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:09:59] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:10:00] "POST /chat-instruct HTTP/1.1" 200 -


8
8
4
8
.
8
6
 meters
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:10:18] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:10:19] "POST /chat-instruct HTTP/1.1" 200 -


Mount
 Everest
 (
8
,
8
4
8
 m
),
 K
2
 (
8
,
6
1
1
 m
),
 Kan
chen


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:10:35] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:10:36] "POST /chat-instruct HTTP/1.1" 200 -


number
1
 =
 
1
0


number
2
 =
 
5


total
 =
 number
1
 +
 number
2


print
(
total
)
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:10:54] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:10:55] "POST /chat-instruct HTTP/1.1" 200 -


1
.
 Fresh
 fruits
 ra
 vegetables
 kh
ane
.
 
2
.
 Junk
 food
 kam
 kh
ane
 ra
 healthy
 snacks
 kha
.
 
3
.
 Healthy
 meals
 balance
 gar
ne
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:11:15] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:11:16] "POST /chat-instruct HTTP/1.1" 200 -


'
J
ind
agi
 maa
 success
 pah
uch
na
 cha
hi
yo
 ki
 na
,
 af
no
 sab
ai
 potential
 nik
al
era
 mat
ra
 hi
 af
no
 ko
 lagi
 proud
 h
una
 sak
ch
hau
.'
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:11:40] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:11:41] "POST /chat-instruct HTTP/1.1" 200 -


E
uta
 b
han
era
,
 ke
 thi
yo
?
 J
asto
 ki
 thi
yo
?
 T
yo
 thi
yo
!
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:12:01] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:12:02] "POST /chat-instruct HTTP/1.1" 200 -


al
xi
 lai
 kh
ana
 kh
ana
,
 pa
ani
 pi
una
,
 rest
 garn
u
 hos
.
 doctor
 lai
 her
ne
 ani
 medicines
 lin
u
 ram
ro
 ho
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:13:19] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:13:20] "POST /chat-instruct HTTP/1.1" 200 -


Ram
ro
 habit
 ma
 exercise
 routine
,
 healthy
 diet
,
 positive
 thinking
,
 time
 management
,
 gratitude
 practice
,
 communication
 skills
 improve
 gar
na
,
 ani
 hobbies
 pursue
 gar
na
 j
asto
 par
xa
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:13:41] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:13:42] "POST /chat-instruct HTTP/1.1" 200 -


Ta
pai
 
2
0
2
3
 ko
 latest
 model
 ma
 cha
.
 Ma
 ek
 efficient
 assistant
 ho
,
 jas
le
 aba
ile
ko
 tasks
 lai
 quickly
 handle
 gar
na
 sak
ch
hu
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:14:03] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:14:03] "POST /chat-instruct HTTP/1.1" 200 -


Google
 gem
ma
 model
 ek
 AI
 language
 model
 ho
,
 jas
ma
 natural
 language
 processing
 ra
 conversation
 management
 gar
na
 sak
ch
ha
.
 Google
 gem
ma
 lai
 language
 translation
,
 chat
bots
,
 ra
 voice
 assistants
 ma
 use
 gar
ne
 ho
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:14:29] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:14:30] "POST /chat-instruct HTTP/1.1" 200 -


deep
seek
 ai
 ko
 b
han
et
 ho
,
 jun
 machine
 learning
 ra
 deep
 learning
 technologies
 use
 gar
cha
.
 Yo
 tools
 le
 image
 analysis
,
 object
 recognition
,
 ra
 pattern
 recognition
 ma
 apply
 hud
ai
 ch
ha
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:15:08] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:15:09] "POST /chat-instruct HTTP/1.1" 200 -


AI
 b
han
eko
 ar
tifical
 intelligence
 ho
,
 jas
ma
 machines
 le
 human
 intelligence
 j
ast
ai
 tasks
 perform
 garn
u
 hun
cha
.
 Yo
 le
 learning
,
 problem
-
solving
,
 ra
 decision
-
making
 gar
cha
,
 ra
 humans
 ko
 lagi
 tasks
 lai
 simple
 bana
unuh
un
cha
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:15:54] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:15:55] "POST /chat-instruct HTTP/1.1" 200 -


ll
m
 fine
 tuning
 van
eko
 le
 specific
 domain
 lai
 train
 gar
ne
 method
 ho
.
 Yo
 le
 domain
-
specific
 datasets
 use
 gar
era
 models
 lai
 improve
 gar
cha
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:16:29] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:16:30] "POST /chat-instruct HTTP/1.1" 200 -


yo
 issue
 ko
 reason
 her
era
 documentation
 her
n
uh
os
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:16:41] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:16:42] "POST /chat-instruct HTTP/1.1" 200 -


lion
el
 messi
 ko
 messi
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:17:22] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:17:23] "POST /chat-instruct HTTP/1.1" 200 -


Ma
 indoor
 games
 k
hel
na
 man
par
ch
hu
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:18:15] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:18:16] "POST /chat-instruct HTTP/1.1" 200 -


KS
hi
tiz
 Gaj
urel
 Nepali
 singer
 ho
.
 Uni
har
uko
 songs
 har
u
 '
Mal
ai
 Tim
i
 Lai
 Bho
ley
'
 ra
 '
Ma
ile
 Tim
i
 Lai
 Bho
ley
'
 ch
han
.
<eos>


INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:19:28] "OPTIONS /chat-instruct HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2025 14:19:29] "POST /chat-instruct HTTP/1.1" 200 -


K
shi
tiz
 g
aj
urel
,
 t
yo
 time
 lai
 focus
 garn
u
,
 positive
 people
 lai
 surround
 garn
u
,
 ra
 professional
 help
 seek
 garn
u
 ho
.
<eos>
