# Azure ML deployment (one notebook)
This notebook generates deployment code and deploys it under an existing Online Endpoint using the Python SDK.

In [None]:
%pip install -U azure-ai-ml azure-identity openai pandas numpy openpyxl python-dotenv requests pyyaml


## CONFIG

In [None]:
SUBSCRIPTION_ID = "REPLACE_ME"
RESOURCE_GROUP  = "REPLACE_ME"
WORKSPACE_NAME  = "REPLACE_ME"

ENDPOINT_NAME   = "REPLACE_ME"   # existing Azure ML Online Endpoint name
NEW_DEPLOYMENT  = "llm"          # new deployment name inside the endpoint

INSTANCE_TYPE   = "Standard_DS3_v2"
INSTANCE_COUNT  = 1

# Azure OpenAI (Azure Portal -> Azure OpenAI resource -> Keys and Endpoint)
AZURE_OPENAI_ENDPOINT = "https://<your-resource>.openai.azure.com/"
AZURE_OPENAI_API_KEY  = "REPLACE_ME"
AZURE_OPENAI_API_VERSION = "2024-12-01-preview"  # if fails, try 2024-10-01-preview

# Azure OpenAI Studio -> Deployments (deployment names)
AZURE_OPENAI_CHAT_DEPLOYMENT  = "gpt-5-mini"
AZURE_OPENAI_EMBED_DEPLOYMENT = "text-embedding-3-small"

TOP_K_RAG = 8
MAX_LABELS_PER_CHUNK = 3
MIN_KEEP_PROBA = 0.30
MAX_TEXT_CHARS = 4000

# Folder where the notebook will write score.py + demands.xlsx for deployment
CODE_DIR = "./_deploy_code"
SCORING_SCRIPT = "score.py"

SWITCH_TRAFFIC_TO_NEW = False  # True -> route 100% traffic to NEW_DEPLOYMENT


## Generate `score.py` + sample `demands.xlsx` into `CODE_DIR`

In [None]:
import os
import pandas as pd
import pathlib

os.makedirs(CODE_DIR, exist_ok=True)

demands_path = os.path.join(CODE_DIR, "demands.xlsx")
if not os.path.exists(demands_path):
    df = pd.DataFrame([
        {"demand_id": "motor-earthing", "demand": "CST - Motor - earthing", "description": "Explicit requirement to connect motor/frame to protective earth (PE) / grounding."},
        {"demand_id": "motor-ip", "demand": "CST - Motor - IP", "description": "Ingress protection class requirement (e.g., IP55, IP66) for motor/enclosure."},
        {"demand_id": "motor-iso9001", "demand": "CST - Motor - ISO 9001 certificate", "description": "Requirement to provide ISO 9001 certification or certificate/documentation."},
        {"demand_id": "pump-material", "demand": "CST - Pump - material", "description": "Requirement for pump material (e.g., AISI 316L) and restrictions for wetted parts/housing."},
    ])
    df.to_excel(demands_path, index=False)
    print("Created sample demands.xlsx:", demands_path)
else:
    print("Using existing demands.xlsx:", demands_path)

score_py = r'''import os
import json
import logging
import time
from typing import Any, Dict, List, Optional

import numpy as np
import pandas as pd
from openai import AzureOpenAI

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

client: Optional[AzureOpenAI] = None
DEMANDS: List[Dict[str, str]] = []
DEMAND_EMB: Optional[np.ndarray] = None

AZURE_CHAT_DEPLOYMENT = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "").strip()
AZURE_EMBED_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT", "").strip()

TOP_K_RAG = int(os.getenv("TOP_K_RAG", "8"))
MAX_LABELS_PER_CHUNK = int(os.getenv("MAX_LABELS_PER_CHUNK", "3"))
MIN_KEEP_PROBA = float(os.getenv("MIN_KEEP_PROBA", "0.30"))
MAX_TEXT_CHARS = int(os.getenv("MAX_TEXT_CHARS", "4000"))

def _l2_normalize(x: np.ndarray) -> np.ndarray:
    denom = (np.linalg.norm(x, axis=1, keepdims=True) + 1e-12)
    return x / denom

def _safe_float(v: Any, default: float = 0.0) -> float:
    try:
        return float(v)
    except Exception:
        return default

def _safe_json_loads(s: str) -> Optional[dict]:
    try:
        return json.loads(s)
    except Exception:
        return None

def init():
    global client, DEMANDS, DEMAND_EMB, AZURE_CHAT_DEPLOYMENT, AZURE_EMBED_DEPLOYMENT
    logger.info("INIT: starting...")

    endpoint = (os.getenv("AZURE_OPENAI_ENDPOINT") or "").strip()
    api_key = (os.getenv("AZURE_OPENAI_API_KEY") or "").strip()
    api_version = (os.getenv("AZURE_OPENAI_API_VERSION") or "2024-12-01-preview").strip()

    AZURE_CHAT_DEPLOYMENT = (os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT") or "").strip()
    AZURE_EMBED_DEPLOYMENT = (os.getenv("AZURE_OPENAI_EMBED_DEPLOYMENT") or "").strip()

    if not endpoint or not api_key:
        logger.error("Missing AZURE_OPENAI_ENDPOINT or AZURE_OPENAI_API_KEY.")
        return
    if not AZURE_CHAT_DEPLOYMENT or not AZURE_EMBED_DEPLOYMENT:
        logger.error("Missing AZURE_OPENAI_CHAT_DEPLOYMENT or AZURE_OPENAI_EMBED_DEPLOYMENT.")
        return

    client = AzureOpenAI(azure_endpoint=endpoint, api_key=api_key, api_version=api_version)

    base_dir = os.path.dirname(os.path.abspath(__file__))
    demands_path = os.path.join(base_dir, "demands.xlsx")
    if not os.path.exists(demands_path):
        logger.error(f"demands.xlsx not found at: {demands_path}")
        return

    df = pd.read_excel(demands_path)
    required_cols = {"demand_id", "demand", "description"}
    if not required_cols.issubset(df.columns):
        logger.error(f"demands.xlsx missing columns. Required: {required_cols}, got: {set(df.columns)}")
        return

    DEMANDS = []
    embed_inputs: List[str] = []
    for _, row in df.iterrows():
        did = str(row["demand_id"]).strip()
        name = str(row["demand"]).strip()
        desc = str(row["description"]).strip()
        if not did or did.lower() == "nan":
            continue
        if not name or name.lower() == "nan":
            continue
        DEMANDS.append({"id": did, "name": name, "description": desc})
        embed_inputs.append(f"Name: {name}\nClarification: {desc}")

    if not DEMANDS:
        logger.error("No demands loaded from Excel (DEMANDS is empty).")
        return

    t0 = time.time()
    emb = client.embeddings.create(model=AZURE_EMBED_DEPLOYMENT, input=embed_inputs)
    DEMAND_EMB = _l2_normalize(np.array([e.embedding for e in emb.data], dtype=np.float32))
    logger.info(f"INIT: loaded {len(DEMANDS)} demands. Embedding time: {time.time() - t0:.2f}s")

def _retrieve_candidates(chunk_text: str, k: int) -> List[Dict[str, str]]:
    if client is None or DEMAND_EMB is None or not DEMANDS:
        return []
    emb = client.embeddings.create(model=AZURE_EMBED_DEPLOYMENT, input=[chunk_text])
    q = _l2_normalize(np.array([emb.data[0].embedding], dtype=np.float32))
    sims = DEMAND_EMB @ q[0]
    idx = np.argsort(-sims)[:k]
    return [DEMANDS[i] for i in idx]

def _llm_classify_chunk(chunk_id: str, chunk_text: str, candidates: List[Dict[str, str]]) -> Dict[str, Any]:
    if client is None:
        return {"chunkId": chunk_id, "demandIds": [], "explanation": "AzureOpenAI client not initialized."}
    if not candidates:
        return {"chunkId": chunk_id, "demandIds": [], "explanation": "No candidates from retrieval."}

    demands_context = "\n".join(
        [f"- id: {d['id']}\n  name: {d['name']}\n  clarification: {d['description']}" for d in candidates]
    )

    system = (
        "You label customer requirements in product documentation. "
        "You MUST only choose from the provided demands. "
        "Match ONLY when the chunk clearly satisfies the demand's clarification. "
        "Return at most 3 demands. "
        "If nothing matches, return an empty demandIds array. "
        "Probabilities must be in [0.0, 1.0]. "
        "Return JSON only."
    )

    user = f"""Demands (id, name, clarification):
{demands_context}

ChunkId: {chunk_id}
Chunk text:
{chunk_text}

Return JSON in this format exactly:
{{
  "chunkId": "{chunk_id}",
  "demandIds": [{{"id":"<one of provided ids>","probability":0.85}}],
  "explanation": "brief reason"
}}""".strip()

    resp = client.chat.completions.create(
        model=AZURE_CHAT_DEPLOYMENT,
        messages=[{"role": "system", "content": system}, {"role": "user", "content": user}],
    )
    content = resp.choices[0].message.content or ""
    parsed = _safe_json_loads(content)
    return parsed or {"chunkId": chunk_id, "demandIds": [], "explanation": "LLM returned non-JSON."}

def run(raw_data: Any) -> Dict[str, Any]:
    request = json.loads(raw_data) if isinstance(raw_data, str) else raw_data
    if not isinstance(request, dict):
        raise ValueError("Request must be a JSON object/dict.")
    if "document" not in request or "num_preds" not in request:
        raise ValueError("Invalid input: expected 'document' and 'num_preds'.")

    document = request["document"]
    num_pred = int(request["num_preds"])

    by_id = document.get("contentDomain", {}).get("byId", {})
    if not isinstance(by_id, dict):
        raise ValueError("document.contentDomain.byId must be an object/dict.")

    document_demand_predictions = set()

    for chunk_id, content in by_id.items():
        text = str(content.get("text", "") or "")[:MAX_TEXT_CHARS]

        if client is None or DEMAND_EMB is None or not DEMANDS:
            content.update({"relevantProba": 0.0, "cdLogregPredictions": [], "cdTransformerPredictions": []})
            continue

        candidates = _retrieve_candidates(text, TOP_K_RAG)
        llm_out = _llm_classify_chunk(chunk_id, text, candidates)

        preds = []
        for item in (llm_out.get("demandIds", []) or [])[:MAX_LABELS_PER_CHUNK]:
            did = str(item.get("id", "")).strip()
            proba = _safe_float(item.get("probability", 0.0))
            if did and proba >= MIN_KEEP_PROBA:
                preds.append({"label": did, "proba": proba})
                document_demand_predictions.add(did)

        preds = sorted(preds, key=lambda x: x["proba"], reverse=True)[:num_pred]
        relevant_proba = max([p["proba"] for p in preds], default=0.0)

        content.update({"relevantProba": relevant_proba, "cdLogregPredictions": [], "cdTransformerPredictions": preds})

    document["documentDemandPredictions"] = list(document_demand_predictions)
    return {"predictions": document}
'''
pathlib.Path(os.path.join(CODE_DIR, "score.py")).write_text(score_py, encoding="utf-8")
print("Wrote", os.path.join(CODE_DIR, "score.py"))


## Deploy to existing Online Endpoint

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Environment, ManagedOnlineDeployment, CodeConfiguration, OnlineRequestSettings

credential = DefaultAzureCredential(exclude_interactive_browser_credential=False)

ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION_ID,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WORKSPACE_NAME,
)

endpoint = ml_client.online_endpoints.get(ENDPOINT_NAME)
print("Endpoint:", endpoint.name, "Traffic:", endpoint.traffic)

ENV_NAME = "openai-rag-demand-labeler-env"
env = Environment(
    name=ENV_NAME,
    description="PoC env for Azure OpenAI + pandas/openpyxl",
    image="mcr.microsoft.com/azureml/minimal-ubuntu20.04-py310-cpu-inference:latest",
    conda_file="conda_env.yml",
)
env = ml_client.environments.create_or_update(env)
print("Environment:", env.name, env.version)

deployment = ManagedOnlineDeployment(
    name=NEW_DEPLOYMENT,
    endpoint_name=ENDPOINT_NAME,
    environment=f"azureml:{ENV_NAME}@latest",
    code_configuration=CodeConfiguration(code=CODE_DIR, scoring_script=SCORING_SCRIPT),
    instance_type=INSTANCE_TYPE,
    instance_count=INSTANCE_COUNT,
    environment_variables={
        "AZURE_OPENAI_ENDPOINT": AZURE_OPENAI_ENDPOINT,
        "AZURE_OPENAI_API_KEY": AZURE_OPENAI_API_KEY,
        "AZURE_OPENAI_API_VERSION": AZURE_OPENAI_API_VERSION,
        "AZURE_OPENAI_CHAT_DEPLOYMENT": AZURE_OPENAI_CHAT_DEPLOYMENT,
        "AZURE_OPENAI_EMBED_DEPLOYMENT": AZURE_OPENAI_EMBED_DEPLOYMENT,
        "TOP_K_RAG": str(TOP_K_RAG),
        "MAX_LABELS_PER_CHUNK": str(MAX_LABELS_PER_CHUNK),
        "MIN_KEEP_PROBA": str(MIN_KEEP_PROBA),
        "MAX_TEXT_CHARS": str(MAX_TEXT_CHARS),
    },
    request_settings=OnlineRequestSettings(request_timeout_ms=180000),
)

dep = ml_client.online_deployments.begin_create_or_update(deployment).result()
print("Deployment:", dep.name, dep.provisioning_state)

print("Deployment logs (tail):")
print(ml_client.online_deployments.get_logs(name=NEW_DEPLOYMENT, endpoint_name=ENDPOINT_NAME, lines=200))

if SWITCH_TRAFFIC_TO_NEW:
    ep = ml_client.online_endpoints.get(ENDPOINT_NAME)
    ep.traffic = {NEW_DEPLOYMENT: 100}
    ml_client.online_endpoints.begin_create_or_update(ep).result()
    print("Traffic updated.")

print("Final traffic:", ml_client.online_endpoints.get(ENDPOINT_NAME).traffic)
