In [10]:
import os
import json
from dotenv import load_dotenv


from azure.identity import (
    DefaultAzureCredential,
    get_bearer_token_provider,
    AzurePowerShellCredential,
    AzureCliCredential )
from azure.core.credentials import AzureKeyCredential

In [35]:
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    VectorSearch,
    VectorSearchProfile,
    HnswAlgorithmConfiguration,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SemanticSearch,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,

)
from azure.search.documents.indexes import SearchIndexClient


from azure.search.documents.models import VectorizableTextQuery

In [12]:
load_dotenv(dotenv_path=".env", override=True) # Take environment variables from .env.

print(os.environ.get("VERSION"))

0.1.0-agentic-retrieval


In [13]:
azure_openai_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
azure_openai_gpt_deployment = os.environ.get("AZURE_OPENAI_GPT_DEPLOYMENT")
azure_openai_gpt_model = os.environ.get("AZURE_OPENAI_GPT_MODEL")

azure_ai_search_endpoint = os.environ.get("AZURE_SEARCH_SERVICE_ENDPOINT")
index_name = os.environ.get("AZURE_SEARCH_INDEX")
credential = AzureKeyCredential(key=os.environ.get("AZURE_SEARCH_ADMIN_KEY"))


agent_name = "agenticretriever"

In [None]:
import sys
sys.path.append("..")


In [33]:
from helpers.credhlpr import check_credential
print(check_credential(DefaultAzureCredential))
print(check_credential(AzureCliCredential))

✓ DefaultAzureCredential authentication succeeded
✓ AzureCliCredential authentication succeeded


In [None]:
from  helpers.searchhlpr import get_search_client, get_search_index_client

search_index_client = get_search_index_client()
search_client = get_search_client()

print(type(search_client))

index_name:  multimodal-rag
{'document_count': 623, 'storage_size': 27295599, 'vector_index_size': 7699308}
index_name:  multimodal-rag
{'document_count': 623, 'storage_size': 27295599, 'vector_index_size': 7699308}
<class 'azure.search.documents._search_client.SearchClient'>


In [45]:
search_index = search_index_client.get_index(name=index_name)

all_fields = [field.name for field in search_index.fields]
print(f"Fields in index '{index_name}': {all_fields}")

vector_fields = [field.name for field in search_index.fields if field.vector_search_profile_name is not None]
print(f"Vector fields in index '{index_name}': {vector_fields}")

# select_fields = [field.name for field in search_index.fields if field.searchable and field.name not in vector_fields]
select_fields = [field.name for field in search_index.fields if field.name not in vector_fields]
print(f"Searchable fields in index '{index_name}': {select_fields}")

semantic_configuration_name = search_index.semantic_search.default_configuration_name
print(f"Semantic configuration name: {semantic_configuration_name}")



Fields in index 'multimodal-rag': ['content_id', 'text_document_id', 'document_title', 'image_document_id', 'content_text', 'content_embedding', 'content_path', 'locationMetadata']
Vector fields in index 'multimodal-rag': ['content_embedding']
Searchable fields in index 'multimodal-rag': ['content_id', 'text_document_id', 'document_title', 'image_document_id', 'content_text', 'content_path', 'locationMetadata']
Semantic configuration name: multimodal-rag-semantic-configuration


In [114]:
query = "What does a project manager do"
topk = 5

vector_queries = [VectorizableTextQuery(
        text = query,
        k_nearest_neighbors=topk,
        fields=vector,
        exhaustive=True,
    ) for vector in vector_fields]


search_results = search_client.search(
        search_text = query,
        vector_queries=vector_queries,
        select=select_fields,
        top=topk,
        include_total_count=True,
        query_type="semantic",
        semantic_configuration_name=semantic_configuration_name,
        query_answer="extractive",
        query_answer_count=5,
        query_caption="extractive",
        query_caption_highlight_enabled=True,
    )



In [115]:

# for r in search_results:
#     print(json.dumps(r, indent=2))

for result in search_results:
    print("========================================")
    # print(result.keys())
    print("Document title: ", result["document_title"])
    print("Content path: ", result["content_path"])
    print("Content text: ", result["content_text"][:20], " ... ")
    print("Score: ", result["@search.score"])
    # print(result["@search.captions"])
    # print(result)

    for caption in result["@search.captions"]:
        # print("Caption: ", caption)
        print("Text: ", caption.text[:50], " ... ")
        print("Highlights: ", caption.highlights[:100], " ... ")
        # print(type(caption))



Document title:  role_library.pdf
Content path:  None
Content text:  that performance obj  ... 
Score:  0.029206350445747375
Text:  ... Oversee and manage all research and developmen  ... 
Highlights:  <em>... Oversee and manage all research and development projects   - Lead and motivate the research   ... 
Document title:  Enterprise AI Planning.pdf
Content path:  None
Content text:  In most organization  ... 
Score:  0.014084506779909134
Text:  In most organizations there are peopl... The accou  ... 
Highlights:  In most organizations there are peopl... The accountabilities of these individuals are generally:     ... 
Document title:  role_library.pdf
Content path:  None
Content text:  Job Title: Manager o  ... 
Score:  0.01149425283074379
Text:  ...e special pr as needed   • Build and maintain r  ... 
Highlights:  <em>...e special pr </em>as needed   • Build and maintain relationships with external stakeholders    ... 
Document title:  Enterprise AI Planning.pdf
Content path:  None


In [116]:
# index_client = SearchIndexClient(endpoint=endpoint, credential=credential)
index_client = SearchIndexClient(endpoint=azure_ai_search_endpoint, credential=credential)
print(f"Index '{index_name}' created or updated successfully")


Index 'multimodal-rag' created or updated successfully


In [117]:
indexesitor = index_client.list_indexes()

[i.name for i in indexesitor]


['multimodal-rag', 'sharepoint-1748400144034-index']

In [118]:
index_client.get_index("multimodal-rag")

<azure.search.documents.indexes.models._index.SearchIndex at 0x275870787d0>

In [119]:
print(azure_openai_endpoint)
print(azure_openai_gpt_deployment)
print(azure_openai_gpt_model)

https://openaiau.openai.azure.com/
gpt-4o-global
gpt-4o


In [122]:
from azure.search.documents.indexes.models import KnowledgeAgent, KnowledgeAgentAzureOpenAIModel, KnowledgeAgentTargetIndex, KnowledgeAgentRequestLimits, AzureOpenAIVectorizerParameters

agent = KnowledgeAgent(
    name=agent_name,
    models=[
        KnowledgeAgentAzureOpenAIModel(
            azure_open_ai_parameters=AzureOpenAIVectorizerParameters(
                resource_url=azure_openai_endpoint,
                deployment_name=azure_openai_gpt_deployment,
                model_name=azure_openai_gpt_model
            )
        )
    ],
    target_indexes=[
        KnowledgeAgentTargetIndex(
            index_name=index_name,
            default_reranker_threshold=2.5
        )
    ],
)

index_client.create_or_update_agent(agent)
print(f"Knowledge agent '{agent_name}' created or updated successfully")

Knowledge agent 'agenticretriever' created or updated successfully


In [123]:
for a in index_client.list_agents():
    print(a.name)

agenticretriever


In [None]:
from azure.search.documents.agent import KnowledgeAgentRetrievalClient
from azure.search.documents.agent.models import KnowledgeAgentRetrievalRequest, KnowledgeAgentMessage, KnowledgeAgentMessageTextContent, KnowledgeAgentIndexParams

agent_client = KnowledgeAgentRetrievalClient(
    endpoint=azure_ai_search_endpoint,
    agent_name=agent_name,
    credential=credential)


In [125]:
instructions = """
A Q&A agent that can answer questions about the Earth at night.
Sources have a JSON format with a ref_id that must be cited in the answer.
If you do not have the answer, respond with "I don't know".
"""

messages = [
    {
        "role": "system",
        "content": instructions
    }
]


# messages.append({
#     "role": "user",
#     "content": """
#     Why do suburban belts display larger December brightening than urban cores even though absolute light levels are higher downtown?
#     Why is the Phoenix nighttime street grid is so sharply visible from space, whereas large stretches of the interstate between midwestern cities remain comparatively dim?
#     """
# })

messages.append({
    "role": "user",
    "content": """
    What is a project manager? and what is a CEO? and between the two, who is more important in a company?
    """
})


In [126]:
agent_client

<KnowledgeAgentRetrievalClient [endpoint='https://magaisearchswedencentral.search.windows.net', agent='agenticretriever']>

In [127]:

retrieval_result = agent_client.retrieve(
    retrieval_request=KnowledgeAgentRetrievalRequest(
        messages=[KnowledgeAgentMessage(role=msg["role"], content=[KnowledgeAgentMessageTextContent(text=msg["content"])]) for msg in messages if msg["role"] != "system"],
        target_index_params=[KnowledgeAgentIndexParams(index_name=index_name, reranker_threshold=2.5)]
    )
)



In [128]:

messages.append({
    "role": "assistant",
    "content": retrieval_result.response[0].content[0].text
})

In [129]:
retrieval_result

<azure.search.documents.agent._generated.models._models_py3.KnowledgeAgentRetrievalResponse at 0x27586a280d0>

In [130]:
print(retrieval_result.as_dict().keys())

dict_keys(['response', 'activity', 'references'])


In [131]:
retrival_response = retrieval_result.as_dict().get("response")

retrival_activity = retrieval_result.as_dict().get("activity")

retrival_references = retrieval_result.as_dict().get("references")

In [132]:
retrival_activity

[{'id': 0,
  'type': 'ModelQueryPlanning',
  'input_tokens': 1240,
  'output_tokens': 506},
 {'id': 1,
  'type': 'AzureSearchQuery',
  'target_index': 'multimodal-rag',
  'query': {'search': 'What is the role of a project manager in a company?'},
  'query_time': '2025-06-04T01:03:28.534Z',
  'count': 1,
  'elapsed_ms': 1801},
 {'id': 2,
  'type': 'AzureSearchQuery',
  'target_index': 'multimodal-rag',
  'query': {'search': 'What is the role of a CEO in a company?'},
  'query_time': '2025-06-04T01:03:30.190Z',
  'count': 3,
  'elapsed_ms': 1645},
 {'id': 3,
  'type': 'AzureSearchQuery',
  'target_index': 'multimodal-rag',
  'query': {'search': 'How does the importance of a project manager compare to that of a CEO in a company?'},
  'query_time': '2025-06-04T01:03:31.792Z',
  'count': 0,
  'elapsed_ms': 1602},
 {'id': 4, 'type': 'AzureSearchSemanticRanker', 'input_tokens': 78938}]

In [133]:
retrival_references

[{'type': 'AzureSearchDoc',
  'id': '1',
  'activity_source': 2,
  'doc_key': '2e222e310abb_aHR0cHM6Ly9nb2R6aWxsYXN0b3JhZ2UuYmxvYi5jb3JlLndpbmRvd3MubmV0L2ludGVncmF0ZWR2ZWN0b3ItZG9jcy9yb2xlX2xpYnJhcnkucGRm0_pages_0'},
 {'type': 'AzureSearchDoc',
  'id': '2',
  'activity_source': 2,
  'doc_key': '2e222e310abb_aHR0cHM6Ly9nb2R6aWxsYXN0b3JhZ2UuYmxvYi5jb3JlLndpbmRvd3MubmV0L2ludGVncmF0ZWR2ZWN0b3ItZG9jcy9yb2xlX2xpYnJhcnkucGRm0_pages_1'},
 {'type': 'AzureSearchDoc',
  'id': '0',
  'activity_source': 1,
  'doc_key': '2e222e310abb_aHR0cHM6Ly9nb2R6aWxsYXN0b3JhZ2UuYmxvYi5jb3JlLndpbmRvd3MubmV0L2ludGVncmF0ZWR2ZWN0b3ItZG9jcy9yb2xlX2xpYnJhcnkucGRm0_pages_7'},
 {'type': 'AzureSearchDoc',
  'id': '3',
  'activity_source': 2,
  'doc_key': '2e222e310abb_aHR0cHM6Ly9nb2R6aWxsYXN0b3JhZ2UuYmxvYi5jb3JlLndpbmRvd3MubmV0L2ludGVncmF0ZWR2ZWN0b3ItZG9jcy9yb2xlX2xpYnJhcnkucGRm0_pages_2'}]

In [141]:
retrival_references[0]["doc_key"]

'2e222e310abb_aHR0cHM6Ly9nb2R6aWxsYXN0b3JhZ2UuYmxvYi5jb3JlLndpbmRvd3MubmV0L2ludGVncmF0ZWR2ZWN0b3ItZG9jcy9yb2xlX2xpYnJhcnkucGRm0_pages_0'

In [146]:
doc_result  = search_client.get_document(retrival_references[0]["doc_key"])

for k, v in doc_result.items():
    if k in select_fields:
        print(f"> {k}: {v}")  # Print first 100 characters of the field value


> content_id: 2e222e310abb_aHR0cHM6Ly9nb2R6aWxsYXN0b3JhZ2UuYmxvYi5jb3JlLndpbmRvd3MubmV0L2ludGVncmF0ZWR2ZWN0b3ItZG9jcy9yb2xlX2xpYnJhcnkucGRm0_pages_0
> text_document_id: aHR0cHM6Ly9nb2R6aWxsYXN0b3JhZ2UuYmxvYi5jb3JlLndpbmRvd3MubmV0L2ludGVncmF0ZWR2ZWN0b3ItZG9jcy9yb2xlX2xpYnJhcnkucGRm0
> document_title: role_library.pdf
> image_document_id: None
> content_text: Roles Descriptions at 

Contoso Electronics  

 

 
 

 

 

  





This document contains information generated using a language model (Azure OpenAI). The 

information contained in this document is only for demonstration purposes and does not 

reflect the opinions or beliefs of Microsoft. Microsoft makes no representations or 

warranties of any kind, express or implied, about the completeness, accuracy, reliability, 

suitability or availability with respect to the information contained in this document.  

All rights reserved to Microsoft 

  



Contoso Electronics Role Library 
Last Updated: 2023-03-05 

Chief Executive Offi

In [147]:
for r in retrival_response:
    print(r.keys())
    print(r.get('role'))
    print(r.get("content"))


dict_keys(['role', 'content'])
assistant
[{'type': 'text', 'text': '[{"ref_id":0,"title":"role_library.pdf","content":"efficiency, reduce costs, and maximize profitability.  \\n\\n \\n\\nResponsibilities: \\n\\n• Lead and manage the operations team to ensure effective delivery of products and \\n\\nservices to customers.  \\n\\n• Develop and implement strategies for improving operational practices and processes.  \\n\\n• Analyze financial data to identify areas of potential cost savings and profitability.  \\n\\n• Oversee the development and implementation of operational plans and budgets.  \\n\\n• Monitor operational performance to ensure compliance with established standards.  \\n\\n• Establish and maintain strong working relationships with internal and external \\n\\nstakeholders.  \\n\\n• Lead the development of operational policies and procedures.  \\n\\n• Develop short- and long-term goals and objectives to ensure organizational success.  \\n\\n• Identify and implement process im

In [148]:
import json

# Extract the text content from the retrieval result
json_text = retrieval_result.response[0].content[0].text

# Parse the JSON array
try:
    parsed_results = json.loads(json_text)
    print(f"Successfully parsed JSON array with {len(parsed_results)} items")

    # Display the first result
    if parsed_results:
        for i, result in enumerate(parsed_results):
            print(f"\n****************Result {i + 1}:")
            for key, value in result.items():
                # Truncate long values for display
                if isinstance(value, str) and len(value) > 100:
                    value = value[:5] + "..."
                print(f"{key}: {value}")

except json.JSONDecodeError as e:
    print(f"Error parsing JSON: {e}")
    print("Raw text:")
    print(json_text)

Successfully parsed JSON array with 4 items

****************Result 1:
ref_id: 0
title: role_library.pdf
content: effic...

****************Result 2:
ref_id: 1
title: role_library.pdf
content: Roles...

****************Result 3:
ref_id: 2
title: role_library.pdf
content: • Pro...

****************Result 4:
ref_id: 3
title: role_library.pdf
content: manag...


In [149]:
parsed_results

[{'ref_id': 0,
  'title': 'role_library.pdf',
  'content': 'efficiency, reduce costs, and maximize profitability.  \n\n \n\nResponsibilities: \n\n• Lead and manage the operations team to ensure effective delivery of products and \n\nservices to customers.  \n\n• Develop and implement strategies for improving operational practices and processes.  \n\n• Analyze financial data to identify areas of potential cost savings and profitability.  \n\n• Oversee the development and implementation of operational plans and budgets.  \n\n• Monitor operational performance to ensure compliance with established standards.  \n\n• Establish and maintain strong working relationships with internal and external \n\nstakeholders.  \n\n• Lead the development of operational policies and procedures.  \n\n• Develop short- and long-term goals and objectives to ensure organizational success.  \n\n• Identify and implement process improvements to increase efficiency.  \n\n• Ensure safety standards and legal regulatio