# Provision knowledge source and knowledge base 
This notebook configures required knowledge sources and knowledge base for the **Fault Diagnosis Agent**

## Overview
- **Knowledge source**: specifies the content used for agentic retrieval.It either encapsulates a search index which is populated by an external data source, or it's a direct connection to a remote source such as Bing or Sharepoint that's queried directly 
- **Knowledge base**: a top-level object that orchestrates agentic retrieval. It defines which knowledge sources to query and the default behavior for retrieval operations

## 1. Import dependencies and load environment
Import all neccessary libraries for the provisioning

In [1]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    AzureBlobKnowledgeSource,
    AzureBlobKnowledgeSourceParameters,
    AzureOpenAIVectorizerParameters,
    KnowledgeBase,
    KnowledgeBaseAzureOpenAIModel,
    KnowledgeRetrievalLowReasoningEffort,
    KnowledgeRetrievalOutputMode,
    KnowledgeSourceAzureOpenAIVectorizer,
    KnowledgeSourceContentExtractionMode,
    KnowledgeSourceIngestionParameters,
    KnowledgeSourceReference,
)
from azure.search.documents.knowledgebases import KnowledgeBaseRetrievalClient
from azure.search.documents.knowledgebases.models import (
    KnowledgeBaseMessage,
    KnowledgeBaseMessageTextContent,
    KnowledgeBaseRetrievalRequest,
    SearchIndexKnowledgeSourceParams,
)
from dotenv import load_dotenv

# Load environment variables
load_dotenv(override=True)

# Configuration
storage_connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING")
search_endpoint = os.environ.get("SEARCH_SERVICE_ENDPOINT")
search_key = os.environ.get("SEARCH_ADMIN_KEY")
model_deployment_name = os.environ.get("MODEL_DEPLOYMENT_NAME")
embedding_model_deployment_name = os.environ.get("EMBEDDING_MODEL_DEPLOYMENT_NAME")
openai_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
openai_key = os.environ.get("AZURE_OPENAI_KEY")
knowledge_source_name = "machine-wiki-blob-ks"
knowledge_base_name = 'machine-kb'
project_resource_id = os.environ.get("AZURE_AI_PROJECT_RESOURCE_ID")



## 2. Create knowledge source
Creates a Blob Storage knowledge source

In [None]:

index_client = SearchIndexClient(
    endpoint=search_endpoint, credential=AzureKeyCredential(search_key))

knowledge_source = AzureBlobKnowledgeSource(
    name=knowledge_source_name,
    description="This knowledge source pulls from a blob storage container.",
    encryption_key=None,
    azure_blob_parameters=AzureBlobKnowledgeSourceParameters(
        connection_string=storage_connection_string,
        container_name="machine-wiki",
        folder_path=None,
        is_adls_gen2=False,
        ingestion_parameters=KnowledgeSourceIngestionParameters(
            identity=None,
            disable_image_verbalization=False,
            chat_completion_model=KnowledgeBaseAzureOpenAIModel(
                azure_open_ai_parameters=AzureOpenAIVectorizerParameters(
                    resource_url=openai_endpoint,
                    deployment_name=model_deployment_name,
                    api_key=openai_key,
                    model_name=model_deployment_name
                )
            ),
            embedding_model=KnowledgeSourceAzureOpenAIVectorizer(
                azure_open_ai_parameters=AzureOpenAIVectorizerParameters(
                    resource_url=openai_endpoint,
                    deployment_name=embedding_model_deployment_name,
                    api_key=openai_key,
                    model_name=embedding_model_deployment_name
                )
            ),
            content_extraction_mode=KnowledgeSourceContentExtractionMode.MINIMAL,
            ingestion_schedule=None,
            ingestion_permission_options=None
        )
    )
)

index_client.create_or_update_knowledge_source(knowledge_source)
print(
    f"✅ Knowledge source '{knowledge_source.name}' created or updated successfully.")

## 3. Examine the knowledge source
The knowledge source we created is powered by **Azure Search**. Navigate to [Azure Portal](https://portal.azure.com) and select the search service.

1. Select _Agentic retrieval_/_Knowledge sources_ and review the **machine-wiki-blob-ks** knowledge source.
2. Select _Search management_/_Indexes_ and review the **machine-wiki-blob-ks-index** (note that it might take a few moments for the indexer to complete and the document count to be updated)
3. Select _Search management_/_Skillset_ and review the **machine-wiki-blob-ks-skillset**

## 4. Create the Machine Data knowledge base
Reference the previously created knowledge source



In [None]:

index_client = SearchIndexClient(
    endpoint=search_endpoint, credential=AzureKeyCredential(search_key))

aoai_params = AzureOpenAIVectorizerParameters(
    resource_url=openai_endpoint,
    api_key=openai_key,
    deployment_name=model_deployment_name,
    model_name=model_deployment_name,
)

knowledge_base = KnowledgeBase(
    name=knowledge_base_name,
    description="This knowledge base handles questions about common issues with manufacturing machines",
    retrieval_instructions=f"Use the {knowledge_source_name} to query potential root causes for problems by machine type",
    answer_instructions="Provide a single sentence for the likely cause of the issue based on the retrieved documents.",
    output_mode=KnowledgeRetrievalOutputMode.ANSWER_SYNTHESIS,
    knowledge_sources=[
        KnowledgeSourceReference(name=knowledge_source_name)
    ],
    models=[KnowledgeBaseAzureOpenAIModel(
        azure_open_ai_parameters=aoai_params)],
    encryption_key=None,
    retrieval_reasoning_effort=KnowledgeRetrievalLowReasoningEffort,
)

index_client.create_or_update_knowledge_base(knowledge_base)
print(
    f"✅ Knowledge base '{knowledge_base.name}' created or updated successfully.")

# 5. Test the knowledge base 
Try accessing the knowlede base with a sample agent

In [None]:

kb_client = KnowledgeBaseRetrievalClient(
    endpoint=search_endpoint, knowledge_base_name=knowledge_base_name, credential=AzureKeyCredential(search_key))

request = KnowledgeBaseRetrievalRequest(
    messages=[
        KnowledgeBaseMessage(
            role="assistant",
            content=[KnowledgeBaseMessageTextContent(
                text="What can be the potential issue if curing_temperature is above 178°C")]
        ),
    ],
    knowledge_source_params=[
        SearchIndexKnowledgeSourceParams(
            knowledge_source_name=knowledge_source_name,
            include_references=True,
            include_reference_source_data=True,
            always_query_source=False,
        )
    ],
    include_activity=True,
)

result = kb_client.retrieve(request)
print(result.response[0].content[0].text)

# 6. Create project connection
Make the knowledge base available as an MCP server via a project connection

In [None]:
import requests
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

# Provide connection details
credential = DefaultAzureCredential()
project_resource_id = project_resource_id 
project_connection_name = "machine-wiki-connection"
mcp_endpoint = f"{search_endpoint}/knowledgebases/{knowledge_base_name}/mcp?api-version=2025-11-01-preview" # This endpoint enables the MCP connection between the agent and knowledge base

# Get bearer token for authentication
bearer_token_provider = get_bearer_token_provider(credential, "https://management.azure.com/.default")
headers = {
  "Authorization": f"Bearer {bearer_token_provider()}",
}

# Create project connection
response = requests.put(
  f"https://management.azure.com{project_resource_id}/connections/{project_connection_name}?api-version=2025-10-01-preview",
  headers = headers,
  json = {
    "name": "project_connection_name",
    "type": "Microsoft.MachineLearningServices/workspaces/connections",
    "properties": {
      "authType": "ProjectManagedIdentity",
      "category": "RemoteTool",
      "target": mcp_endpoint,
      "isSharedToAll": True,
      "audience": "https://search.azure.com/",
      "metadata": { "ApiType": "Azure" }
    }
  }
)

response.raise_for_status()
print(f"Connection '{project_connection_name}' created or updated successfully.")