In [1]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

# Local Qdrant example – adjust for Cloud (host, api_key, etc.)
client = QdrantClient(host="localhost", port=6333)

client.recreate_collection(
    collection_name="yang_sensors",
    vectors_config=VectorParams(
        size=1536,                 # dimension of your embedding model
        distance=Distance.COSINE,  # cosine works well for text
    ),
)


  client.recreate_collection(


True

In [3]:
from openai import OpenAI
from qdrant_client.models import PointStruct

client_oa = OpenAI()  # uses OPENAI_API_KEY from env

EMBEDDING_MODEL = "text-embedding-3-small"

def get_embedding(text: str) -> list[float]:
    resp = client_oa.embeddings.create(
        model=EMBEDDING_MODEL,
        input=text,
    )
    return resp.data[0].embedding


In [1]:
# Cell 2: load YANG catalog
import json

CATALOG_PATH = "sensor_catalog.jsonl"  # or .json

all_rows = []

with open(CATALOG_PATH, "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        all_rows.append(json.loads(line))

len(all_rows)


54063

In [6]:
from qdrant_client.models import PointStruct

points = []

for idx, row in enumerate(all_rows[:10]):
    vector = get_embedding(row["search_text"])

    point = PointStruct(
        id=idx,  # ✅ valid: unsigned integer
        vector=vector,
        payload={
            "yang_id": row["id"],          # "yang-0"
            "module": row["module"],
            "path": row["path"],
            "protocol_tag": row["protocol_tag"],
            "category": row["category"],
            "kind": row["kind"],
            "leaf_count": row["leaf_count"],
            "description": row["description"],
            "leaf_names": row["leaf_names"],
        },
    )
    points.append(point)

client.upsert(
    collection_name="yang_sensors",
    points=points,
)


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
def make_query_text(intent: dict) -> str:
    # Compact but rich description
    parts = [
        f"Protocol: {intent.get('protocol')}",
        f"Focus: {', '.join(intent.get('focus', []))}",
        f"Goal: {intent.get('goal', '')}",
    ]
    return "\n".join(parts)

intent = {
    "protocol": "tunnel",
    "focus": ["state", "stats"],
    "goal": "monitor GRE tunnel health and keepalives",
    "sampling_interval_ms": 30000,
}

query_text = make_query_text(intent)
query_vector = get_embedding(query_text)


In [None]:
from qdrant_client.models import Filter, FieldCondition, MatchValue

query_filter = Filter(
    must=[
        FieldCondition(
            key="protocol_tag",
            match=MatchValue(value=intent["protocol"])
        )
    ]
)


In [21]:
hits = client.search(
    collection_name="yang_sensors",
    query_vector=query_vector,
    query_filter=query_filter,
    limit=10,
)

for h in hits:
    print(h.id, h.payload["yang_id"], h.payload["path"])


0 yang-0 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma
3 yang-3 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/database
1 yang-1 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/gsp-node-db-summary
4 yang-4 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/database/transport-vrf-datas
5 yang-5 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/database/transport-vrf-datas/transport-vrf-data
2 yang-2 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/gsp-node-db-summary/gspdb-array
9 yang-9 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/database/tunnel-ids
6 yang-6 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/database/transport-vrf-datas/transport-vrf-data/idb-array
7 yang-7 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/database/transport-vrf-datas/transport-vrf-data/idb-array/source-address
8 yang-8 Cisco-IOS-XR-tunnel-ip-ma-oper:tunnel-ip-ma/database/transport-vrf-datas/transport-vrf-data/idb-array/destination-address


  hits = client.search(
