In [5]:
# Copyright (c) 2026 Microsoft Corporation.
# Licensed under the MIT License.

##  Basic completion example

This example demonstrates basic usage of the LLM library to interact with Azure OpenAI. It loads environment variables for API configuration, creates a ModelConfig for Azure OpenAI, and sends a simple question to the model. The code handles both streaming and non-streaming responses (streaming responses are printed chunk by chunk in real-time, while non-streaming responses are printed all at once). It also shows how to use the gather_completion_response utility function as a simpler alternative that automatically handles both response types and returns the complete text.

In [6]:
import os
from collections.abc import Iterator

from dotenv import load_dotenv
from graphrag_llm.completion import LLMCompletion, create_completion
from graphrag_llm.config import AuthMethod, ModelConfig
from graphrag_llm.types import LLMCompletionChunk, LLMCompletionResponse
from graphrag_llm.utils import (
    gather_completion_response,
)

load_dotenv()

api_key = os.getenv("GRAPHRAG_API_KEY")
api_base = os.getenv("GRAPHRAG_API_BASE")

model_config = ModelConfig(
    model_provider="azure",
    model=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
    azure_deployment_name=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
    api_base=api_base,
    api_version=os.getenv("GRAPHRAG_API_VERSION", "2025-04-01-preview"),
    api_key=api_key,
    auth_method=AuthMethod.AzureManagedIdentity if not api_key else AuthMethod.ApiKey,
)
llm_completion: LLMCompletion = create_completion(model_config)

response: LLMCompletionResponse | Iterator[LLMCompletionChunk] = (
    llm_completion.completion(
        messages="What is the capital of France?",
    )
)

if isinstance(response, Iterator):
    print("Streaming response:")
    # Streaming response
    for chunk in response:
        print(chunk.choices[0].delta.content or "", end="", flush=True)
else:
    # Non-streaming response
    print("Not streaming response:")
    print(response.choices[0].message.content)

# Alternatively, you can use the utility function to gather the full response
# The following is equivalent to the above logic. If all you care about is
# the first choice response then you can use the gather_completion_response
# utility function.
response_text = gather_completion_response(response)
print(response_text)

Not streaming response:
The capital of France is **Paris**.
The capital of France is **Paris**.
