### Demo for Azure AI indexing (more write-up to follow)

More details on how to set up service principal authentication (application id & tenant id & [service principal] object ID) and RBAC authorisation (ML Contributor, OpenAI contributor) + AI foundry (AI service, OpenAI models, AI Search)

In [None]:
# Custom RAG with Azure AI Foundry SDK - https://learn.microsoft.com/en-gb/azure/ai-studio/tutorials/copilot-sdk-create-resources?tabs=windows
# (Tutorial: Part 1 - Set up project and development environment to build a custom knowledge retrieval (RAG) app with the Azure AI Foundry SDK)

# Link AI services to a project - https://learn.microsoft.com/en-us/azure/ai-studio/ai-services/how-to/connect-azure-openai


# Tutorial with portal set up and local consumption of index - https://learn.microsoft.com/en-gb/azure/ai-studio/concepts/retrieval-augmented-generation

# On ai-inference https://learn.microsoft.com/en-us/python/api/overview/azure/ai-inference-readme?view=azure-python-preview

In [None]:
# service principal - https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?view=azureml-api-2&tabs=sdk

# https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?view=azureml-api-2&tabs=sdk

# configure a managed identity for ai search - https://learn.microsoft.com/en-us/azure/search/search-howto-managed-identities-data-sources?tabs=portal-sys%2Cportal-user

In [None]:
import pandas as pd

In [6]:
from dotenv import load_dotenv
import os
from openai import AzureOpenAI
env_path = os.path.join(".venv/.env")
load_dotenv(dotenv_path=env_path)

True

In [3]:
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import ConnectionType
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient

In [7]:
# create a project client using environment variables loaded from the .env file
project = AIProjectClient.from_connection_string(
    conn_str=os.environ["AIPROJECT_CONNECTION_STRING"], credential=DefaultAzureCredential()
)

In [12]:
# create a vector embeddings client that will be used to generate vector embeddings
embeddings = project.inference.get_embeddings_client()

In [13]:
# use the project client to get the default search connection
search_connection = project.connections.get_default(
    connection_type=ConnectionType.AZURE_AI_SEARCH, include_credentials=True
)

In [14]:
# Create a search index client using the search connection
# This client will be used to create and delete search indexes
index_client = SearchIndexClient(
    endpoint=search_connection.endpoint_url, credential=AzureKeyCredential(key=search_connection.key)
)

In [16]:
# the index should then appear in the AI search resource, e.g.,
# https://portal.azure.com/#@fegamma.com/resource/subscriptions/1c2256ae-339e-4689-ab4c-b5023dc00d9d/resourceGroups/rg-aistudio-dev-001/providers/Microsoft.Search/searchServices/nlp-exploration-search-free/indexes

from azure.search.documents.indexes.models import SearchIndex, SimpleField, SearchableField, SearchFieldDataType

# Define the index schema
index_name = "mockup-index"
index_schema = SearchIndex(
    name=index_name,
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),  # Primary key
        SearchableField(name="content", type=SearchFieldDataType.String, analyzer_name="en.microsoft"),  # Searchable field
        SimpleField(name="category", type=SearchFieldDataType.String, filterable=True),  # Filterable field
    ],
)

# Create the index
try:
    index_client.create_index(index=index_schema)
    print(f"Index '{index_name}' created successfully.")
except Exception as e:
    print(f"Error creating index: {e}")


Index 'mockup-index' created successfully.


In [17]:
# Create a SearchClient for the newly created index
search_client = SearchClient(
    endpoint=search_connection.endpoint_url,
    index_name=index_name,
    credential=AzureKeyCredential(key=search_connection.key),
)

# Define some mock data
mock_data = [
    {"id": "1", "content": "The quick brown fox jumps over the lazy dog.", "category": "animal"},
    {"id": "2", "content": "Azure AI offers powerful AI capabilities.", "category": "technology"},
    {"id": "3", "content": "This is a mock-up document for testing.", "category": "example"},
]

# Upload the documents
try:
    result = search_client.upload_documents(documents=mock_data)
    print(f"Upload results: {result}")
except Exception as e:
    print(f"Error uploading documents: {e}")


Upload results: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x0000016262010620>, <azure.search.documents._generated.models._models_py3.IndexingResult object at 0x0000016262010680>, <azure.search.documents._generated.models._models_py3.IndexingResult object at 0x0000016262010650>]


In [18]:
# Perform a search query
query = "AI"
try:
    results = search_client.search(query)
    print(f"Search results for query '{query}':")
    for result in results:
        print(result)
except Exception as e:
    print(f"Error searching index: {e}")


Search results for query 'AI':
{'id': '2', 'content': 'Azure AI offers powerful AI capabilities.', 'category': 'technology', '@search.score': 1.3921447, '@search.reranker_score': None, '@search.highlights': None, '@search.captions': None}
