In [1]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
embedding = model.encode("Hello world")
print(len(embedding))  # Output: 768


768


In [None]:
import httpx

SOLR_URL = "http://10.10.10.55:8983/solr/ret_grocery/select"


def get_categories_nearby(payload):
    params = {"q": "*:*", "fq": [f"{{!geofilt sfield=provider_geo pt={payload['lat']},{payload['lon']} d={payload['radius_km']}}}"], "facet": "true", "facet.field": "item_category_id", "facet.sort": "count", "facet.mincount": 1, "facet.limit": -1, "rows": 0, "wt": "json"}
    with httpx.Client() as client:
        response = client.post(SOLR_URL, data=params)  # No need for custom headers
        response.raise_for_status()
        data = response.json()
    facet_list = data.get("facet_counts", {}).get("facet_fields", {}).get("item_category_id", [])
    categories = [{"item_category_id": facet_list[i], "count": facet_list[i + 1]} for i in range(0, len(facet_list), 2)]
    print(categories)


get_categories_nearby({"lat": 13.00000, "lon": 77.00000, "radius_km": 1000})

[{'item_category_id': 'Fruits and Vegetables', 'count': 38853}, {'item_category_id': 'Snacks and Namkeen', 'count': 32049}, {'item_category_id': 'Masala & Seasoning', 'count': 24381}, {'item_category_id': 'Cleaning & Household', 'count': 19714}, {'item_category_id': 'Chocolates and Biscuits', 'count': 15330}, {'item_category_id': 'Bakery, Cakes & Dairy', 'count': 15270}, {'item_category_id': 'Tea and Coffee', 'count': 9123}, {'item_category_id': 'Oil & Ghee', 'count': 4192}, {'item_category_id': 'Dals and Pulses', 'count': 4083}, {'item_category_id': 'Pasta, Soup and Noodles', 'count': 3347}, {'item_category_id': 'Atta, Flours and Sooji', 'count': 2348}, {'item_category_id': 'Sauces, Spreads and Dips', 'count': 2179}, {'item_category_id': 'Energy and Soft Drinks', 'count': 2050}, {'item_category_id': 'Indian Sweets', 'count': 1871}, {'item_category_id': 'Fruit Juices and Fruit Drinks', 'count': 1786}, {'item_category_id': 'Tinned and Processed Food', 'count': 1607}, {'item_category_id'

In [2]:
from urllib.parse import urlencode

category = "Masala & Seasoning"
fq_value = 'item_category_id:"Masala & Seasoning"'

# Use list of tuples so fq can be repeated if needed
params = [
    ("q", "*:*"),
    ("fq", fq_value),
    ("rows", "10"),
]

# Proper encoding: quote_plus is applied inside urlencode
encoded_body = urlencode(params)

response = httpx.post(
    "http://10.10.10.55:8983/solr/ret_grocery/select",  # use your core
    content=encoded_body,
    headers={"Content-Type": "application/x-www-form-urlencoded"},
)

print("Status:", response.status_code)
print("Response:", response.json())

Status: 200
Response: {'responseHeader': {'status': 0, 'QTime': 2, 'params': {'q': '*:*', 'fq': 'item_category_id:"Masala & Seasoning"', 'rows': '10'}}, 'response': {'numFound': 25383, 'start': 0, 'numFoundExact': True, 'docs': [{'id': 'ddbc8136-4a1b-596f-bf81-a150c94b08ec', 'domain': 'ONDC:RET10', 'domain_string': 'ONDC:RET10', 'bpp_id': 'shikhar-ondc.hulcd.com', 'bpp_name': 'Shikhar Store', 'bpp_uri': 'https://shikhar-ondc.hulcd.com/ondc', 'city': 'std:080', 'item_id': '300649', 'item_category_id': 'Masala & Seasoning', 'item_currency': 'INR', 'item_currency_string': 'INR', 'item_measure_quantity': 'unit', 'item_measure_value': '36.0', 'item_name': 'MTR paneer mixed masala', 'item_name_string': 'MTR paneer mixed masala', 'item_name_suggest': 'MTR paneer mixed masala', 'item_name_vector': [-0.01385087, -0.029383672, -0.0075405734, -0.033957593, -0.0018137086, 0.032057703, -0.069396526, 0.005912101, -0.010610849, 0.03586828, 0.015299755, -0.009322106, 0.025337493, 0.07490363, 0.0020420

In [7]:
import hashlib
from typing import Dict

import numpy as np
from sentence_transformers import SentenceTransformer

embeddings_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
embedding_cache: Dict[str, np.ndarray] = {}


def generate_embedding_key(text: str) -> str:
    return hashlib.sha256(text.encode("utf-8")).hexdigest()


def generate_text_embeddings(text: str) -> np.ndarray:
    key = generate_embedding_key(text)
    if key in embedding_cache:
        return embedding_cache[key]
    else:
        embeddings = embeddings_model.encode(text, convert_to_tensor=False).astype("float32")
        embedding_cache[key] = embeddings
        return embeddings

In [10]:
vector = generate_text_embeddings("Hello world")
print(vector, type(vector))  # Should print the embedding for "Hello world"
print(vector.shape)  # Should print the embedding for "Hello world"

[ 2.62496546e-02  1.33955777e-02 -4.53314465e-03 -2.17914563e-02
  5.45518920e-02 -4.96648625e-03  6.65552868e-03  3.06262653e-02
 -5.76286810e-03 -4.56201797e-03 -3.31328879e-03 -4.84962352e-02
 -1.13641042e-02  3.50774415e-02  9.30946469e-02 -8.66873562e-02
  5.10865338e-02  9.88616142e-03 -6.35693893e-02 -8.55018757e-03
  7.05440668e-03 -3.86236073e-03  2.47443132e-02  4.28849347e-02
  3.50941494e-02 -2.98482105e-02  1.02525931e-02  2.23448742e-02
  2.08899714e-02  9.49224085e-03 -3.30443121e-02 -1.22841159e-02
  5.35289198e-02  2.54292265e-02  2.02217689e-06 -3.41909938e-02
  9.60998237e-03 -1.64845288e-02  5.60952025e-03 -4.25007846e-03
 -2.28012446e-02  4.03546877e-02  3.05194664e-03  3.13726366e-02
 -1.08123878e-02 -3.55708487e-02  2.22928636e-02  1.68711727e-03
  2.07725051e-03  2.31161937e-02  6.88584289e-03 -6.83087064e-03
 -4.87612151e-02 -2.70108003e-02  1.54910972e-02  3.73169966e-02
  2.72794329e-02  2.64989696e-02 -1.69242558e-03 -2.88223885e-02
  2.56629288e-02 -4.66157

In [13]:
import asyncio
import httpx
import json
from datetime import datetime

core = "ret_grocery"
source_solr_url = f"https://retail-buyer-solr.nearshop.in/solr/{core}/select"
target_url = "https://stagingondcfs.finfotech.co.in/ss/solr-index/"

BATCH_FETCH_SIZE = 1000
BATCH_SEND_SIZE = 100

# Helper to transform Solr doc to target schema
def transform_doc(doc):
    return {
        "collection_type": "grocery",
        "id": doc.get("id", ""),
        "code": doc.get("code", ""),  # or a separate code if available
        "domain": doc.get("domain", ""),
        "bpp_id": doc.get("bpp_id", ""),
        "bpp_name": doc.get("bpp_name", ""),
        "bpp_uri": doc.get("bpp_uri", ""),
        "city": doc.get("city", ""),
        "item_id": doc.get("item_id", ""),
        "item_offers": doc.get("item_offers", []),
        "parent_item_id": doc.get("parent_item_id", ""),
        "item_category_id": doc.get("item_category_id", ""),
        "item_currency": doc.get("item_currency", "INR"),
        "item_measure_quantity": doc.get("item_measure_quantity", "unit"),
        "item_measure_value": float(doc.get("item_measure_value", 0)),
        "item_name": doc.get("item_name", ""),
        "item_short_description": doc.get("item_short_desc", ""),
        "item_long_description": doc.get("item_long_desc", ""),
        "item_selling_price": float(doc.get("item_selling_price", 1)),
        "item_mrp_price": float(doc.get("item_mrp_price", 0)),
        "item_status": doc.get("item_status", "enable"),
        "item_timestamp": doc.get("item_timestamp", datetime.utcnow().isoformat()),
        "provider_timestamp": doc.get("provider_timestamp", datetime.utcnow().isoformat()),
        "item_symbol": doc.get("item_symbol", ""),
        "provider_symbol": doc.get("provider_symbol", ""),
        "item_veg": doc.get("item_veg", "true"),
        "item_nonveg": doc.get("item_nonveg", "true"),
        "item_discount_percentage": float(doc.get("item_discount_percentage", 0)),
        "item_available_count": int(doc.get("item_available_count", 0)),
        "item_maximum_count": int(doc.get("item_maximum_count", 0)),
        "item_cancellable_status": doc.get("item_cancellable_status", "enable"),
        "item_returnable_status": doc.get("item_returnable_status", "enable"),
        "provider_name": doc.get("provider_name", ""),
        "provider_status": doc.get("provider_status", "enable"),
        "provider_geo_latitude": float(doc.get("provider_geo_latitude", 0)),
        "provider_geo_longitude": float(doc.get("provider_geo_longitude", 0)),
        "provider_id": doc.get("provider_id", ""),
        "provider_location_id": doc.get("provider_location_id", ""),
        "provider_location_city": doc.get("provider_location_city", ""),
        "provider_location_area_code": doc.get("provider_location_area_code", ""),
        "provider_location_street": doc.get("provider_location_street", ""),
        "provider_min_order_value": float(doc.get("provider_min_order_value", 0)),
        "provider_start_time_day": int(doc.get("provider_start_time_day", 2359)),
        "provider_end_time_day": int(doc.get("provider_end_time_day", 2359)),
        "provider_days": doc.get("provider_days", [0]),
        "provider_service_location_distance": float(doc.get("provider_service_location_distance", 0)),
        "provider_service_type": int(doc.get("provider_service_type", 10)),
    }

async def fetch_documents(start: int) -> list:
    params = {
        "q": "*:*",
        "start": start,
        "rows": BATCH_FETCH_SIZE,
        "wt": "json"
    }
    async with httpx.AsyncClient(verify=False) as client:
        response = await client.get(source_solr_url, params=params)
        response.raise_for_status()
        return response.json()["response"]["docs"]

async def send_batch(batch_docs: list):
    headers = {"Content-Type": "application/json"}
    payload = {
        "collection_type": "grocery",
        "documents": batch_docs
    }
    async with httpx.AsyncClient(verify=False) as client:
        response = await client.post(target_url, json=payload, headers=headers)
        if response.status_code != 200:
            print(f"❌ Error {response.status_code}: {response.text}")
        else:
            print(f"✅ Sent batch of {len(batch_docs)} docs")

async def send_in_batches(docs: list):
    tasks = []
    for i in range(0, len(docs), BATCH_SEND_SIZE):
        raw_batch = docs[i:i + BATCH_SEND_SIZE]
        transformed_batch = [transform_doc(doc) for doc in raw_batch]
        tasks.append(send_batch(transformed_batch))
    await asyncio.gather(*tasks)

async def transfer_all_documents():
    start = 213800
    total = 213800

    while True:
        print(f"📦 Fetching docs from offset {start}")
        docs = await fetch_documents(start)
        if not docs:
            print("✅ Done! No more documents.")
            break

        await send_in_batches(docs)
        total += len(docs)
        start += BATCH_FETCH_SIZE

    print(f"🎉 All done! Transferred {total} documents.")

# 👉 Run this in Jupyter:
await transfer_all_documents()


📦 Fetching docs from offset 213800


  "item_timestamp": doc.get("item_timestamp", datetime.utcnow().isoformat()),
  "provider_timestamp": doc.get("provider_timestamp", datetime.utcnow().isoformat()),


❌ Error 422: {"detail":[{"type":"missing","loc":["body","id"],"msg":"Field required","input":{"collection_type":"grocery","documents":[{"collection_type":"grocery","id":"a3d770f2-ca27-5e14-b74a-d7b0be6f5671","code":"shikhar-ondc.hulcd.com_ONDC:RET10_HUL-431326F-P1119_74575_Snacks and Namkeen","domain":"ONDC:RET10","bpp_id":"shikhar-ondc.hulcd.com","bpp_name":"Shikhar Store","bpp_uri":"https://shikhar-ondc.hulcd.com/ondc","city":"std:080","item_id":"74575","item_offers":[],"parent_item_id":"","item_category_id":"Snacks and Namkeen","item_currency":"INR","item_measure_quantity":"unit","item_measure_value":36.0,"item_name":"Kurkure Namkeen Green Chutney Style, 36 g","item_short_description":"These crunchy snacks are flavored with a spicy green chutney flavor in a North Indian Rajasthani style. About the brand-Kurkure is a crunchy, new-age Pack namkeen snack brand which symbolizes light-hearted fun. Embodying Pack the spirit of India, Kurkure has found a home in millions of hearts and mind

In [12]:
await transfer_all_documents()

📦 Fetching documents from offset: 213700
docs [{'id': 'c700c480-3a8d-5ef9-bb05-eb20e21611ea', 'domain': 'ONDC:RET10', 'domain_string': 'ONDC:RET10', 'bpp_id': 'shikhar-ondc.hulcd.com', 'bpp_name': 'Shikhar Store', 'bpp_uri': 'https://shikhar-ondc.hulcd.com/ondc', 'city': 'std:080', 'item_id': '309011', 'item_category_id': 'Indian Sweets', 'item_currency': 'INR', 'item_currency_string': 'INR', 'item_measure_quantity': 'unit', 'item_measure_value': '60.0', 'item_name': 'Himalaya Pure herbs Guduchi Immunity wellness Giloy', 'item_name_string': 'Himalaya Pure herbs Guduchi Immunity wellness Giloy', 'item_name_suggest': 'Himalaya Pure herbs Guduchi Immunity wellness Giloy', 'item_name_vector': [0.029035144, 0.037335675, -0.015740542, 0.014341438, 0.01927631, 0.016229572, 0.005952152, 0.069126755, 0.08228248, -0.01025797, -0.013859927, 0.03350095, -0.010421248, 0.041818902, 0.0022136592, -0.01758705, -0.014327215, 0.014604188, -0.006518713, -0.029046763, -0.040185444, 0.019341609, -0.0556485