# Azure AI Search Index Creation

In [None]:
# Install the required Azure packages
# Install the required Azure packages and python-dotenv for reading .env files
%pip install azure-core azure-search-documents python-dotenv

In [26]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex, SimpleField, SearchableField, SearchFieldDataType, SearchField, HnswAlgorithmConfiguration,
    VectorSearch, VectorSearchProfile, SemanticField, SemanticConfiguration, SemanticPrioritizedFields, SemanticSearch
)
from dotenv import load_dotenv
load_dotenv()

import os

endpoint = os.getenv("AZURE_SEARCH_ENDPOINT")
key = os.getenv("AZURE_SEARCH_KEY")
index_name = "rag-index"

client = SearchIndexClient(endpoint=endpoint, credential=AzureKeyCredential(key))

fields = [
    SimpleField(name="id", type="Edm.String", key=True),
    SearchableField(name="content", type="Edm.String"),
    SimpleField(name="category", type="Edm.String", filterable=True),
    SimpleField(name="source", type="Edm.String"),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, vector_search_dimensions=1536, vector_search_profile_name="userHnswProfile")
]

# Define vector search configurations
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="userHnsw",
            parameters={
                "m": 8,
                "efConstruction": 800,
                "efSearch": 800,
                "metric": "cosine"
            }
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="userHnswProfile",
            algorithm_configuration_name="userHnsw",
        )
    ]
)




index = SearchIndex(
    name=index_name,
    fields=fields,
    vector_search=vector_search,
    semantic_search=SemanticSearch(
        configurations=[
            SemanticConfiguration(
                name="default",
                prioritized_fields=SemanticPrioritizedFields(
                    keywords_fields=[
                        SemanticField(field_name="content")
                    ]
                )
            )
        ]
    )
)

# Create or update the index
client.create_or_update_index(index)
print(f"Index '{index_name}' created or updated.")

# # Add the Embedding and searchContent fields to the existing index
# additional_fields = [
#     SearchField(name="Embedding", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), retrievable=True, searchable=True, vector_search_dimensions=1536, vector_search_profile_name="userHnswProfile"),
#     SearchField(name="searchContent", type=SearchFieldDataType.String, searchable=True)
# ]

# # Retrieve the existing index
# existing_index = index_client.get_index(search_index_name)
# existing_index.fields.extend(additional_fields)

# # Update the index with the new fields
# result = index_client.create_or_update_index(existing_index)
# print(f'Index {result.name} updated with additional fields')


Index 'rag-index' created or updated.
