In [1]:
# Cell 1: imports & config

import os
import glob
import json
from dataclasses import dataclass
from typing import List, Dict, Any

import numpy as np
from openai import OpenAI

# Uses your OPENAI_API_KEY env var
client = OpenAI()

EMBEDDING_MODEL = "text-embedding-3-small"
CHAT_MODEL = "gpt-4.1-mini"   # or gpt-4.1, etc.


In [2]:
# Cell 2: load & chunk YANG files

YANG_ROOT = "yang/vendor/cisco/xr/701"  # adapt if your path is different

@dataclass
class Chunk:
    id: int
    file_path: str
    chunk_index: int
    text: str

def load_yang_files(root: str) -> List[str]:
    pattern = os.path.join(root, "**", "*.yang")
    files = glob.glob(pattern, recursive=True)
    return files

def chunk_text(text: str, max_chars: int = 1000) -> List[str]:
    # naive fixed-size char chunking
    return [text[i:i+max_chars] for i in range(0, len(text), max_chars)]

def build_chunks(root: str) -> List[Chunk]:
    chunks: List[Chunk] = []
    files = load_yang_files(root)
    cid = 0
    for f in files:
        try:
            with open(f, "r", encoding="utf-8", errors="ignore") as fh:
                content = fh.read()
        except Exception as e:
            print(f"Skipping {f}: {e}")
            continue

        pieces = chunk_text(content)
        for i, piece in enumerate(pieces):
            # add a tiny header so the model knows where this comes from
            text = f"FILE: {os.path.basename(f)}\nCHUNK: {i}\n{piece}"
            chunks.append(Chunk(id=cid, file_path=f, chunk_index=i, text=text))
            cid += 1
    print(f"Loaded {len(files)} files, created {len(chunks)} chunks.")
    return chunks

chunks = build_chunks(YANG_ROOT)


Loaded 1229 files, created 16578 chunks.


In [4]:
chunks[0]

Chunk(id=0, file_path='yang/vendor/cisco/xr/701/openconfig-local-routing.yang', chunk_index=0, text='FILE: openconfig-local-routing.yang\nCHUNK: 0\nmodule openconfig-local-routing {\n  yang-version 1;\n  namespace "http://openconfig.net/yang/local-routing";\n  prefix oc-loc-rt;\n\n  import openconfig-inet-types {\n    prefix inet;\n  }\n  import openconfig-policy-types {\n    prefix oc-pt;\n  }\n  import openconfig-extensions {\n    prefix oc-ext;\n  }\n  import openconfig-interfaces {\n    prefix oc-if;\n  }\n\n  organization\n    "OpenConfig working group";\n  contact\n    "OpenConfig working group\n     www.openconfig.net";\n  description\n    "This module describes configuration and operational state data\n     for routes that are locally generated, i.e., not created by\n     dynamic routing protocols.  These include static routes, locally\n     created aggregate routes for reducing the number of constituent\n     routes that must be advertised, summary routes for IGPs, etc.\n     

In [6]:
# Cell 3: embed all chunks

def embed_texts(texts: List[str]) -> np.ndarray:
    resp = client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=texts
    )
    return np.array([d.embedding for d in resp.data], dtype=np.float32)

chunk_embeddings = embed_texts([c.text for c in chunks[:100]])
chunk_embeddings.shape


(100, 1536)

In [11]:
# Cell 4: simple cosine-similarity retrieval

def embed_query(query: str) -> np.ndarray:
    resp = client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=[query]
    )
    return np.array(resp.data[0].embedding, dtype=np.float32)

def retrieve_chunks(query: str, top_k: int = 8) -> List[Chunk]:
    q_vec = embed_query(query)
    # cosine similarity = dot(a,b) / (|a||b|); we can normalize once
    norms = np.linalg.norm(chunk_embeddings, axis=1, keepdims=True)
    normed = chunk_embeddings / (norms + 1e-10)
    q_norm = q_vec / (np.linalg.norm(q_vec) + 1e-10)

    scores = normed @ q_norm
    top_idx = np.argsort(-scores)[:top_k]

    results = [chunks[i] for i in top_idx]
    # print("Top scores:", [float(scores[i]) for i in top_idx])
    return results


In [None]:
# Cell 5: query → context → config generator

def build_context_text(retrieved: List[Chunk]) -> str:
    # join all chunk texts
    parts = []
    for c in retrieved:
        parts.append(
            f"---\nSource file: {os.path.basename(c.file_path)} | chunk {c.chunk_index}\n{c.text}"
        )
    return "\n".join(parts)

default_system_prompt = """You are a Cisco IOS XR network engineer.
Using ONLY the information in the CONTEXT, generate a telemetry configuration.
Output valid IOS XR CLI configuration blocks, nothing else.
If you are unsure, make the best reasonable guess but stay consistent with IOS XR syntax.""" 

def generate_telemetry_config(user_query: str, top_k: int = 8, system_prompt =default_system_prompt ) -> str:
    retrieved = retrieve_chunks(user_query, top_k=top_k)
    context = build_context_text(retrieved)
    
    messages = [
        {"role": "system", "content": system_prompt},
        {
            "role": "user",
            "content": (
                f"CONTEXT:\n{context}\n\n"
                f"USER REQUEST:\n{user_query}\n\n"
                "Return only the telemetry model-driven configuration."
            ),
        },
    ]

    resp = client.chat.completions.create(
        model=CHAT_MODEL,
        messages=messages,
        temperature=0.2,
    )

    return resp.choices[0].message.content.strip()


In [None]:
# Cell 6: example query

query = (
    "Generate telemetry configuration for Cisco IOS XR about BGP. "
    "Use gRPC with no TLS, telemetry server 192.0.2.0 port 57500. "
    "Choose relevant BGP sensor paths."
)

config = generate_telemetry_config(query, top_k=10)
print(config)


[{'role': 'system', 'content': 'You are a Cisco IOS XR network engineer.\nUsing ONLY the information in the CONTEXT, generate a telemetry configuration.\nOutput valid IOS XR CLI configuration blocks, nothing else.\nIf you are unsure, make the best reasonable guess but stay consistent with IOS XR syntax.'}, {'role': 'user', 'content': 'CONTEXT:\n---\nSource file: Cisco-IOS-XR-tunnel-ip-ma-oper.yang | chunk 6\nFILE: Cisco-IOS-XR-tunnel-ip-ma-oper.yang\nCHUNK: 6\nrs";\n      uses SUMMARY;\n    }\n    container endpoints {\n      description\n        "GRE MA DB endpoints";\n      list endpoint {\n        description\n          "GRE MA DB endpoints";\n        leaf tunnel-id {\n          type dt1:Tunl-id-range;\n          description\n            "Tunnel ID";\n        }\n        leaf transport-ip-address {\n          type inet:ip-address-no-zone;\n          description\n            "Transport IP Address";\n        }\n        leaf overlay-ip-address {\n          type inet:ip-address-no-zone;\

Issues:

subscription is at the top, with sensor-path directly under it → wrong hierarchy.

No sensor-group / destination-group separation.

Uses stream grpc, transport grpc, destination-ip, destination-port, no tls → this smells like a mashup of IOS XE / NX-OS syntax, not XR.

No sample-interval or destination-group-id.

Extra exit, end that you probably don’t want the model to output.

So: semantically okay, CLI invalid for XR.

In [15]:
system_prompt = """
You are a Cisco IOS XR network engineer.

Always output telemetry model-driven configuration for IOS XR 7.x
using EXACTLY this structure (adapt names as needed):

telemetry model-driven
 sensor-group <SENSOR_GROUP_NAME>
  sensor-path <PATH_1>
  sensor-path <PATH_2>
 !
 destination-group DG-GRPC
  address-family ipv4
   destination <DEST_IP>
    port <DEST_PORT>
    encoding self-describing-gpb
    protocol grpc no-tls
 !
 subscription <SUBSCRIPTION_NAME>
  sensor-group-id <SENSOR_GROUP_NAME> sample-interval <INTERVAL_MS>
  destination-group-id DG-GRPC
 !

Rules:
- Do NOT use 'stream', 'transport', 'destination-ip', 'destination-port', or 'no tls' commands.
- Put ALL sensor-path lines inside a sensor-group.
- Put ALL destination settings inside a destination-group as shown.
- Use only IOS XR syntax.
- Use the CONTEXT below only to choose relevant sensor-paths.
- Output only configuration, no explanations.
"""


In [None]:
# Example query

query = (
    "Generate telemetry configuration for Cisco IOS XR about BGP. "
    "Use gRPC with no TLS, telemetry server 192.0.2.0 port 57500. "
    "Choose relevant BGP sensor paths."
)

config = generate_telemetry_config(query, top_k=10, system_prompt= system_prompt)
print(config)


telemetry model-driven
 sensor-group BGP-SG
  sensor-path Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/instance/instance-active/default-vrf/neighbors/neighbor
  sensor-path Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/instance/instance-active/default-vrf/afs/af
  sensor-path Cisco-IOS-XR-ipv4-bgp-oper:bgp/instances/instance/instance-active/default-vrf/rib/ipv4-unicast/attributes
 !
 destination-group DG-GRPC
  address-family ipv4
   destination 192.0.2.0
    port 57500
    encoding self-describing-gpb
    protocol grpc no-tls
 !
 subscription BGP-SUB
  sensor-group-id BGP-SG sample-interval 10000
  destination-group-id DG-GRPC
 !
