### Preparation
- Create Azure AI Services **multi-service account** in the same region as AI Search
- In Azure OpenAI access control, add Azure AI Search as a **Cognitive Service OpenAI Contributor** role.

### Import Packages

In [None]:
import os
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
load_dotenv(".env")

from azure.identity import DefaultAzureCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes import SearchIndexerClient
from azure.search.documents.indexes.models import (
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SearchIndex,
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection,
    SplitSkill,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    AzureOpenAIEmbeddingSkill,
    EntityRecognitionSkill,
    KeyPhraseExtractionSkill,
    SearchIndexerIndexProjection,
    SearchIndexerIndexProjectionSelector,
    SearchIndexerIndexProjectionsParameters,
    IndexProjectionMode,
    SearchIndexerSkillset,
    CognitiveServicesAccountKey,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
    SearchIndexer
)
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery, QueryType

service_endpoint = os.environ["SEARCH_ENDPOINT"] 
index_name = os.environ["SEARCH_INDEX_NAME_3"]
key = os.environ["SEARCH_KEY"]
credential = AzureKeyCredential(key) 

aoai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
aoai_key = os.environ["AZURE_OPENAI_API_KEY"]
embedding_model = os.environ["AZURE_OPENAI_EMBEDDING_MODEL"]

connection_string = os.environ["STORAGE_CONN_STR"]
container_name = os.environ["CONTAINER_NAME"]

ai_multiservice_key = os.environ["AZURE_AI_MULTISERVICE_KEY"]
ai_multiservice_account_name = os.environ["AZURE_AI_MULTISERVICE_ACCOUNT"]

### CREATE AN INDEX

In [None]:
# Create a search index  
index_name = index_name 
index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)  
fields = [
    SearchField(name="parent_id", type=SearchFieldDataType.String),
    SearchField(name="title", type=SearchFieldDataType.String, analyzer_name="ja.microsoft", filterable=True, facetable=True, searchable=True),
    SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),
    SearchField(name="chunk", type=SearchFieldDataType.String, analyzer_name="ja.microsoft", sortable=False, filterable=False, facetable=False),
    SearchField(name="key_phrases", type=SearchFieldDataType.String, analyzer_name="ja.microsoft", filterable=True, facetable=True, searchable=True),
    SearchField(name="text_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), vector_search_dimensions=3072, vector_search_profile_name="myHnswProfile")
    ]  
  
# Configure the vector search configuration  
vector_search = VectorSearch(  
    algorithms=[  
        HnswAlgorithmConfiguration(name="myHnsw"),
    ],  
    profiles=[  
        VectorSearchProfile(  
            name="myHnswProfile",  
            algorithm_configuration_name="myHnsw",  
            vectorizer_name="myOpenAI",  
        )
    ],  
    vectorizers=[  
        AzureOpenAIVectorizer(  
            vectorizer_name="myOpenAI",  
            kind="azureOpenAI",  
            parameters=AzureOpenAIVectorizerParameters(  
                resource_url=aoai_endpoint,  
                deployment_name=embedding_model,
                model_name=embedding_model
            ),
        ),  
    ], 
)
semantic_config = SemanticConfiguration(
    name="default",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="key_phrases")],
        content_fields=[SemanticField(field_name="chunk")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index
index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search, semantic_search=semantic_search)  
result = index_client.create_or_update_index(index)  
print(f"{result.name} created")  

### CREATE A DATASOURCE

In [None]:
# Create a data source 
indexer_client = SearchIndexerClient(endpoint=service_endpoint, credential=credential)
container = SearchIndexerDataContainer(name=container_name)
data_source_connection = SearchIndexerDataSourceConnection(
    name="prius-ds",
    type="azureblob",
    connection_string=connection_string,
    container=container
)
data_source = indexer_client.create_or_update_data_source_connection(data_source_connection)

print(f"Data source '{data_source.name}' created or updated")

### CREATE A SKILLSET

In [None]:
# Create a skillset  
skillset_name = "prius-skillset"

split_skill = SplitSkill(  
    description="Split skill to chunk documents",  
    text_split_mode="pages",  
    context="/document",  
    maximum_page_length=4000,  
    page_overlap_length=800,  
    inputs=[  
        InputFieldMappingEntry(name="text", source="/document/content"),  
    ],  
    outputs=[  
        OutputFieldMappingEntry(name="textItems", target_name="pages")  
    ],  
)  
  
embedding_skill = AzureOpenAIEmbeddingSkill(  
    description="Skill to generate embeddings via Azure OpenAI",  
    context="/document/pages/*",  
    resource_url=aoai_endpoint,  
    api_key=aoai_key,
    deployment_name=embedding_model,  
    model_name=embedding_model,
    dimensions=3072,
    inputs=[  
        InputFieldMappingEntry(name="text", source="/document/pages/*"),  
    ],  
    outputs=[  
        OutputFieldMappingEntry(name="embedding", target_name="text_vector")  
    ],  
)

keyphrase_skill = KeyPhraseExtractionSkill(
    description="Skill to extract Key Phrases",
    context="/document/pages/*",
    default_language_code="ja",
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/pages/*")
    ],
    outputs=[
        OutputFieldMappingEntry(name="keyPhrases", target_name="key_phrases")
    ]
)
  
index_projections = SearchIndexerIndexProjection(  
    selectors=[  
        SearchIndexerIndexProjectionSelector(  
            target_index_name=index_name,  
            parent_key_field_name="parent_id",  
            source_context="/document/pages/*",  
            mappings=[  
                InputFieldMappingEntry(name="chunk", source="/document/pages/*"),  
                InputFieldMappingEntry(name="text_vector", source="/document/pages/*/text_vector"),
                InputFieldMappingEntry(name="key_phrases", source="/document/pages/*/key_phrases"),  
                InputFieldMappingEntry(name="title", source="/document/metadata_storage_name"),  
            ],  
        ),  
    ],  
    parameters=SearchIndexerIndexProjectionsParameters(  
        projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS  
    ),  
) 

cognitive_services_account = CognitiveServicesAccountKey(key=ai_multiservice_key)

skills = [split_skill, embedding_skill, keyphrase_skill]

skillset = SearchIndexerSkillset(  
    name=skillset_name,  
    description="Skillset to chunk documents and generating embeddings",  
    skills=skills,  
    index_projection=index_projections,
    cognitive_services_account=cognitive_services_account
)
  
client = SearchIndexerClient(endpoint=service_endpoint, credential=credential)  
client.create_or_update_skillset(skillset)  
print(f"{skillset.name} created")


### CREATE AN INDEXER

In [None]:
# Create an indexer  
indexer_name = "prius-indexer" 

# For public access to storage accounts, set indexer_parameters to "None"
#indexer_parameters = None
indexer_parameters = {
     "configuration": {
         "executionEnvironment": "private"
    }
}

indexer = SearchIndexer(  
    name=indexer_name,  
    description="Indexer to index documents and generate embeddings",  
    skillset_name=skillset_name,  
    target_index_name=index_name,  
    data_source_name=data_source.name,
    parameters=indexer_parameters
)  

# Create and run the indexer  
try:
    indexer_client = SearchIndexerClient(endpoint=service_endpoint, credential=credential)  
    indexer_result = indexer_client.create_or_update_indexer(indexer)  

    print(f' {indexer_name} is created and running. Give the indexer a few minutes before running a query.')  
except Exception as e:
    print(f"An error occurred: {e}")

### Check results

In [None]:
# Vector Search using text-to-vector conversion of the query string
query = "PCS警告灯が点滅または点灯する場合の対処法"  

search_client = SearchClient(endpoint=service_endpoint, credential=credential, index_name=index_name)
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="text_vector")
  
results = search_client.search(  
    search_text=query,  
    vector_queries= [vector_query],
    select=["chunk"],
    top=1
)  
  
for result in results:  
    print(f"Score: {result['@search.score']}")
    print(f"Chunk: {result['chunk']}")

In [None]:
query = "PCS警告灯が点滅または点灯する場合の対処法"  

search_client = SearchClient(endpoint=service_endpoint, credential=credential, index_name=index_name)
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=50, fields="text_vector")

results = list(search_client.search(  
    search_text=query,  
    vector_queries=[vector_query],
    select=["title", "chunk"],
    query_type=QueryType.SEMANTIC, 
    semantic_configuration_name="default",
    search_fields=["chunk, key_phrases"],
    #query_caption=QueryCaptionType.EXTRACTIVE, 
    #query_answer=QueryAnswerType.EXTRACTIVE,
    top=3
))

concatenated_documents = ""
for doc in results:
    print("Document title:", {doc['title']})
    concatenated_documents += f"<DOCUMENT>\nTitle: {doc['title']}\ncontent: {doc['chunk']}\n</DOCUMENT>\n"

print(concatenated_documents)