In [None]:
from pathlib import Path
from openai import OpenAI, AsyncOpenAI
from dotenv import dotenv_values

In [44]:
api_key = dotenv_values(".env")["API_KEY"]
api_base = dotenv_values(".env")["API_BASE_URL"]
model_name = dotenv_values(".env")["MODEL_NAME"]
client_secret = dotenv_values(".env")["PROJECT_SERVICE_TOKEN"]

In [45]:
config = {
    "api_key": api_key,
    "base_url": api_base,
    "default_headers": {"Grace-Client-Secret": client_secret},
}

In [46]:
client = AsyncOpenAI(**config)

In [47]:
from typing import Optional


async def qwen_chat(
    prompt: str,
    system_prompt: Optional[str] = None,
    model: Optional[str] = None,
    max_tokens: int = 512,
    temperature: float = 0.0,
) -> str:
    """
    Ask Qwen via the existing AsyncOpenAI client and return the text response.

    Parameters
    ----------
    prompt : str
        User prompt to send to the model.
    system_prompt : str, optional
        Optional system prompt to set assistant behavior.
    model : str, optional
        Model name to use; defaults to `MODEL_NAME` from .env if not provided.
    max_tokens : int, default=512
        Maximum tokens to generate.
    temperature : float, default=0.0
        Sampling temperature for output randomness.

    Returns
    -------
    str
        The assistant's text response (empty string if the model returns no content).

    Examples
    --------
    >>> # await qwen_chat("What is the capital of France?")  # doctest: +SKIP
    >>> # await qwen_chat("...", system_prompt=rag_system_prompt)  # doctest: +SKIP
    """
    try:
        selected_model = model or model_name
        if not selected_model:
            raise ValueError(
                "Model name is not configured. Set MODEL_NAME in .env or pass `model`."
            )

        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": prompt})

        response = await client.chat.completions.create(
            model=selected_model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
        )
        content = response.choices[0].message.content
        return content or ""
    except Exception as exc:
        raise RuntimeError(f"Qwen chat request failed: {exc}") from exc

In [48]:
# System prompt from main.py RAG service
rag_system_prompt = """
You act as a proctor AI embedded in a Retrieval-Augmented Generation (RAG) framework designed to assist examinees during exams. Your responsibilities include:
- Confirming that each question pertains directly to the exam or the specific documents retrieved for that question.
- When questions are confirmed exam-related, provide comprehensive, transparent, and courteous answers strictly based on the retrieved document content, integrating relevant details precisely.
- If the question lacks supporting context or is unclear, inform the user politely: "The system cannot answer this question based on current information. Please wait for a real proctor to respond in chat."
- For questions unrelated to the exam or beyond the document scope, reply with: "I have no information about the topic you asked. Kindly direct your queries to the proctor in the chat."

Always maintain professionalism and avoid any information or assumptions outside the retrieved content.

Remember, your role is to ensure that examinees receive reliable, context-aware assistance while clearly signalling when human intervention is necessary.
"""

In [49]:
from typing import Final


def _normalize_openai_base_url(base_url: str) -> str:
    """
    Normalize provider base URL for OpenAI-compatible clients by appending '/v1'.

    Parameters
    ----------
    base_url : str
        Provider base URL from .env (may or may not already include '/v1').

    Returns
    -------
    str
        Normalized base URL ending with '/v1' and no trailing slash duplication.
    """
    base = base_url.rstrip("/")
    return base if base.endswith("/v1") else base + "/v1"


# Recreate client with normalized base URL
api_base: Final[str] = _normalize_openai_base_url(api_base)
config["base_url"] = api_base
client = AsyncOpenAI(**config)
print(f"Using normalized OpenAI base_url: {api_base}")

Using normalized OpenAI base_url: https://models.grace-qc.prometric.com/model/candidatemonitor/fastapi/ppllm/model/v1


In [50]:
# Retry simple call after base_url normalization
try:
    reply = await qwen_chat(
        "What is the capital of France?", system_prompt=rag_system_prompt
    )
    print("Model reply:\n", reply)
except Exception as e:
    print("Call failed:", e)

Model reply:
 The capital of France is Paris.
