In [1]:
import ollama
from pprint import pprint
from pydantic import BaseModel
import pandas as pd
from random import randint
import numpy as np
from tqdm.notebook import tqdm

In [2]:
product_df = pd.read_excel("./datasets/startech_first_half.xlsx")
product_df = product_df.replace(np.nan, None)
product_df.head(5)

Unnamed: 0,id,name,price,category,specification
0,6195dfc6-1544-450f-adb0-a7c31c47ddd6,AMD Ryzen 5 5600G Budget Desktop PC,26699,Star PC,Processor: AMD Ryzen 5 5600G Processor with Ra...
1,0ea7d2cb-9d73-4abb-9986-f9c0d44c5c5f,AMD Ryzen 5 5600G Desktop PC,29500,Star PC,Processor: AMD Ryzen 5 5600G Processor with Ra...
2,5aaf0d1a-8304-4849-8c96-ecb9d3bf840b,Intel 12th Gen Core i5-12400 Desktop PC,31200,Star PC,Processor: Intel 12th Gen Core i5-12400 Alder ...
3,9f7f0b17-1422-4288-9dd2-86298d4ba38b,AMD Ryzen 7 5700G Custom Desktop PC,32400,Star PC,Processor: AMD Ryzen 7 5700G Processor with Ra...
4,41c74a5c-5afd-4a30-88b1-412f5609622a,AMD Ryzen 5 8500G Desktop PC,37499,Star PC,Processor: AMD Ryzen 5 8500G Processor with Ra...


In [5]:
class Query(BaseModel):
    relevant_query: list[str]

In [7]:
def format_product_details(name, price, specification):
    product_details = ""
    if specification is not None:
        product_details = f"Name: {name}\nPrice: {price} taka\n{specification.strip()}"
    else:
        product_details = f"Name: {name}\nPrice: {price} taka"

    return product_details


def generate_response(model, system_prompt, prompt):
    response = ollama.generate(
        model=model,
        system=system_prompt,
        prompt=prompt,
        format=Query.model_json_schema(),
        options={"temperature": 0.65, "top_k": 6.5, "top_p": 0.79, "num_predict": 2048},
    )

    try:
        query = Query.model_validate_json(response.response)
        return query
    except Exception as e:
        print(f"Error: {e}")
    return None

In [21]:
model = "gemma3"

system_prompt = "Based on the given product specification of a tech product, generate exactly 7 relevant product search Bangla only queries that a user might enter on an e-commerce platform search bar. Must not include english queries or words at all. Out of these 7 relevant queries, 5 of them must not include directly the product, or specific product names, model numbers, brands or detailed technical attributes. Instead, use generic adjectives, context and phrases that describe the product type, specifications, key features and user intent. Try to find relevant queries that a user might search to reach to this product. Use product related synonyms occasionally in relevant queries. And the rest 2 relevant queries must include the brand or model names and the intent of the product based on given product description. Do not include any country-specific or regional terms."

In [26]:
idx = randint(0, product_df.shape[0])

name = product_df["name"][idx]
price = product_df["price"][idx]
specification = product_df["specification"][idx]
product_details = format_product_details(name, price, specification)

response = generate_response(model, system_prompt, product_details)
if response is not None:
    print(f"Product details: {product_details}")
    print(f"Relevent query: {response.relevant_query}")

Product details: Name: MSI PRO MP243X 23.8" 100Hz IPS FHD Monitor
Price: 16600 taka
Display Size: 23.8"
Display Type: LED
Panel Type: IPS
Resolution: Full HD (1920 x 1080)
Pixel pitch(MM): 0.2745(H) x 0.2745(V)
Display Surface: Anti-glare
Aspect Ratio: 16:9
Viewing Angle: 178°(H) / 178°(V)
Brightness: 300 cd/m2
Contrast Ratio: 1000:1
Refresh Rate: 100Hz
Color Support: 16.7M
Color Gamut: COLOR BIT: 8 bits (6 bits + FRC). sRGB: 119% (CIE 1976)
Response Time: 1ms (MPRT) / 4ms (GTG)
Curvature: Flat
Flicker Free: Yes
Low Blue Light: Yes
Free Sync Support: Yes
Speaker (Built In): Yes
Speaker Details: Built-in speakers allow you to listen clearly without connecting external speakers
Speaker Output: 2x 3W
Microphone (Built In): N/A
HDMI: 1x HDMI (1.4b)
Audio Jack: 1x Headphone-out
Tilt: -5° ~ 20°
Vesa Wall Mount: 75 x 75 mm
Security Locker: Yes
Color: Black
Dimension: 541.93 x 182.16 x 421.79 mm. (21.34 x 7.17 x 16.61 inch)
Weight: 2.95 kg
Type: External Adaptor (12V 2.5A)
Voltage: 100~240V, 5

In [27]:
queries = {
    "id": [],
    "relevant_query": [],
}

start = 0
end = 5

for idx in tqdm(range(start, end)):
    
    id = product_df["id"][idx]
    name = product_df["name"][idx]
    price = product_df["price"][idx]
    specification = product_df["specification"][idx]
    product_details = format_product_details(name, price, specification)

    response = generate_response(model, system_prompt, product_details)

    if response is not None:
        relevant_query = response.relevant_query
        queries["id"].extend([id] * len(relevant_query))
        queries["relevant_query"].extend(relevant_query)

    if (idx + 1) % 100 == 0: 
        print(f"Done: {idx + 1}")

  0%|          | 0/5 [00:00<?, ?it/s]

In [30]:
query_df = pd.DataFrame.from_dict(queries)
query_df.head(10)

Unnamed: 0,id,relevant_query
0,6195dfc6-1544-450f-adb0-a7c31c47ddd6,নতুন কম্পিউটার দরকার
1,6195dfc6-1544-450f-adb0-a7c31c47ddd6,কম্পিউটার কিট দাম
2,6195dfc6-1544-450f-adb0-a7c31c47ddd6,গেম খেলার জন্য কম্পিউটার
3,6195dfc6-1544-450f-adb0-a7c31c47ddd6,AMD Ryzen 5 কম্পিউটার
4,6195dfc6-1544-450f-adb0-a7c31c47ddd6,কম্পিউটার কিট রিভিউ
5,6195dfc6-1544-450f-adb0-a7c31c47ddd6,5600G প্রসেসর দিয়ে কম্পিউটার
6,6195dfc6-1544-450f-adb0-a7c31c47ddd6,MaxGreen কেস এর দাম
7,0ea7d2cb-9d73-4abb-9986-f9c0d44c5c5f,নতুন কম্পিউটার দরকার
8,0ea7d2cb-9d73-4abb-9986-f9c0d44c5c5f,গেমিং এর জন্য ভালো প্রসেসর
9,0ea7d2cb-9d73-4abb-9986-f9c0d44c5c5f,কম বাজেটে ভালো কম্পিউটার


In [43]:
query_df.to_excel(f"queries_{start}_{end}.xlsx", index=False, engine='xlsxwriter')