- > ! pip install openai
- > ! pip install azure-search-documents

In [None]:
pip show azure-search-documents

## Load all the Azure AI Search client

In [None]:
import os
import ai_search
from azure.search.documents.indexes import SearchIndexClient  

AI_SEARCH_API_ENDPOINT = os.environ['AI_SEARCH_API_ENDPOINT']
AI_SEARCH_API_KEY = os.environ['AI_SEARCH_API_KEY']
AI_SEARCH_INDEX_NAME = os.environ['AI_SEARCH_INDEX_NAME']

credential = ai_search.getAISearchCredential(AI_SEARCH_API_KEY);
# Create a search index client
index_client = SearchIndexClient(endpoint=AI_SEARCH_API_ENDPOINT, credential=credential)

# delete any existing index first to have a clean slate
index_client.delete_index(AI_SEARCH_INDEX_NAME)

## Create my search index

In [49]:
from azure.search.documents import SearchClient  
from azure.search.documents.models import Vector  
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    PrioritizedFields,  
    SemanticField,  
    SearchField,  
    SemanticSettings,  
    VectorSearch,  
    VectorSearchAlgorithmConfiguration,  
)  

fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String,
                    filterable=True, facetable=True, sortable=True),
    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="my-vector-config"),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="my-vector-config"),
]

# For Autocomplete, add search suggestion
suggester = [{'name': 'sg', 'source_fields': ['title']}]

# Using the Hierarchical Navigable Small World (HNSW)
# Approximate Nearest Neighborhood (ANN) algorithm
# for the vector database
vector_search = VectorSearch(
    algorithm_configurations=[
        VectorSearchAlgorithmConfiguration(
            name="my-vector-config",
            kind="hnsw",
            hnsw_parameters={
                "m": 4,
                "efConstruction": 400,
                "efSearch": 500,
                "metric": "cosine"
            }
        )
    ]
)

semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=PrioritizedFields(
        title_field=SemanticField(field_name="title"),
        prioritized_keywords_fields=[SemanticField(field_name="category")],
        prioritized_content_fields=[SemanticField(field_name="content")]
    )
)

# Create the semantic settings with the configuration
semantic_settings = SemanticSettings(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=AI_SEARCH_INDEX_NAME, fields=fields, suggesters=suggester,
                    vector_search=vector_search, semantic_settings=semantic_settings)

result = index_client.create_or_update_index(index)
print(f' {result.name} created')

 azure_docs_index created


## Load the AOAI client

In [None]:
import os
import aoai

AOAI_API_ENDPOINT = os.environ['OPENAI_API_ENDPOINT']
AOAI_API_KEY = os.environ['OPENAI_API_KEY']
AOAI_API_VERSION = os.environ['OPENAI_API_VERSION']
AOAI_API_ENGINE = os.environ['OPENAI_API_ENGINE']
AOAI_API_EMBEDDING_ENGINE = 'text-embedding-3-small'

status, client = aoai.setupOpenai(
                        aoai_endpoint=AOAI_API_ENDPOINT,
                        aoai_api_key=AOAI_API_KEY,
                        aoai_version=AOAI_API_VERSION
                 )
if status == True:
    print("AOAI setup succeeded")
else:
    print("AOAI setup failed")

## Create embeddings
Read your data, generate OpenAI embeddings and export to a format to insert your Azure Cognitive Search index:

In [50]:
import json
from tenacity import retry, wait_random_exponential, stop_after_attempt  

# Generate Document Embeddings using OpenAI Ada 002

# Read the text-sample.json
with open('./data/text-sample.json', 'r', encoding='utf-8') as file:
    input_data = json.load(file)

@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def generate_embeddings(text):
    embeddings=aoai.generate_embedding(
                the_client=client,
                the_model=AOAI_API_EMBEDDING_ENGINE,
                the_text=text
       )
    return embeddings


# Generate embeddings for title and content fields
for item in input_data:
    title = item['title']
    content = item['content']
    title_embeddings = generate_embeddings(title)
    content_embeddings = generate_embeddings(content)
    item['titleVector'] = title_embeddings
    item['contentVector'] = content_embeddings

# Output embeddings to docVectors.json file
with open("./output/docVectors.json", "w") as f:
    json.dump(input_data, f)

## Insert text and embeddings into Azure AI Search vector store
Add texts and metadata from the JSON data to the vector store:

In [51]:
# Upload some documents to the index
with open('./output/docVectors.json', 'r') as file:  
    documents = json.load(file)  
search_client = SearchClient(endpoint=AI_SEARCH_API_ENDPOINT, index_name=AI_SEARCH_INDEX_NAME, credential=credential)
result = search_client.upload_documents(documents)  
print(f"Uploaded {len(documents)} documents") 

Uploaded 108 documents


## Search client queries
#### Create an instance of SearchClient first

In [52]:
search_client = SearchClient(AI_SEARCH_API_ENDPOINT, AI_SEARCH_INDEX_NAME, credential=credential)  

## Run your first query

In [53]:
query = "IoT*"

results = search_client.search(
    query_type='simple',
    search_text=query,
    include_total_count=True
)
print ('Total Documents Matching Query:', results.get_count())
for result in results:
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Total Documents Matching Query: 12
Title: Azure IoT Edge
Score: 2.0
Content: Azure IoT Edge is a fully managed service that enables you to run cloud intelligence directly on your IoT devices. It provides features like automatic scaling, device management, and integration with Azure Machine Learning. IoT Edge supports various programming languages, such as C#, Java, and Python. You can use Azure IoT Edge to build edge computing solutions, optimize your operations, and improve your decision-making. It also integrates with other Azure services, such as Azure IoT Hub and Azure Functions.
Category: Internet of Things

Title: Azure IoT Hub
Score: 2.0
Content: Azure IoT Hub is a managed service that enables you to connect, monitor, and manage billions of IoT devices. It provides secure and reliable communication between your IoT devices and your backend solution. IoT Hub supports multiple communication protocols, including MQTT, AMQP, and HTTPS. It offers device-to-cloud and cloud-to-device m

## Add a filter

In [54]:
query = "IoT*"

# Add a filter
results = search_client.search(
    query_type='simple',
    search_text=query,
    filter="category eq 'Analytics'",
    include_total_count=True
)
print ('Total Documents Matching Query:', results.get_count())
for result in results:
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

Total Documents Matching Query: 3
Title: Azure Stream Analytics
Score: 1.0
Content: Azure Stream Analytics is a real-time data stream processing service that enables you to analyze and process high volumes of fast-moving data. It supports various data sources, such as Azure Event Hubs, Azure IoT Hub, and Azure Blob Storage. Stream Analytics provides features like windowing, time-based aggregations, and user-defined functions. You can use Stream Analytics to build real-time dashboards, detect anomalies, and generate alerts. It also integrates with other Azure services, such as Azure Functions and Azure Machine Learning.
Category: Analytics

Title: Azure Data Explorer
Score: 1.0
Content: Azure Data Explorer is a fast, fully managed data analytics service for real-time analysis on large volumes of data. It provides features like ingestion, querying, and visualization. Data Explorer supports various data sources, such as Azure Event Hubs, Azure IoT Hub, and Azure Blob Storage. You can use 

## Scope a query to specific searchable fields

In [55]:
# Narrow scope to specific fields
query = "IoT*"

# Add a filter & select search fields
results = search_client.search(
    query_type='simple',
    search_text=query,
    filter="category eq 'Analytics'",
    search_fields=['title'],
    include_total_count=True
)
print ('Total Documents Matching Query:', results.get_count())
for result in results:
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")      

Total Documents Matching Query: 0


## Return facets

In [56]:
# Return facets
results = search_client.search(
    search_text="*", 
    facets=["category"]
)

facets = results.get_facets()
for facet in facets["category"]:
    print(f"{facet}")

{'value': 'Analytics', 'count': 14}
{'value': 'Management + Governance', 'count': 12}
{'value': 'Networking', 'count': 12}
{'value': 'Databases', 'count': 10}
{'value': 'Storage', 'count': 10}
{'value': 'Compute', 'count': 8}
{'value': 'AI + Machine Learning', 'count': 7}
{'value': 'Security', 'count': 7}
{'value': 'Internet of Things', 'count': 6}
{'value': 'Integration', 'count': 4}


## Lookup a specific document

In [57]:
query = "IoT*"

# Lookup a specific document
result = search_client.get_document(key=3)

print ('Details for document 3:')
print(f"Title: {result['title']}")  
print(f"Content: {result['content']}")  
print(f"Category: {result['category']}\n")  

Details for document 3:
Title: Azure Cognitive Services
Content: Azure Cognitive Services are a set of AI services that enable you to build intelligent applications with powerful algorithms using just a few lines of code. These services cover a wide range of capabilities, including vision, speech, language, knowledge, and search. They are designed to be easy to use and integrate into your applications. Cognitive Services are fully managed, scalable, and continuously improved by Microsoft. It allows developers to create AI-powered solutions without deep expertise in machine learning.
Category: AI + Machine Learning



## Autocomplete a query

In [59]:
# Autocomplete a query
search_suggestion = 'machine'
results = search_client.autocomplete(
    search_text=search_suggestion, 
    suggester_name="sg",
    mode='twoTerms')

print("Autocomplete for:", search_suggestion)
for result in results:
    print (result['text'])

Autocomplete for: machine
machine learning
machines


## Perform a Semantic Hybrid Search

In [34]:
# Semantic Hybrid Search
query = "what is the name of Azure service to store files?"

results = search_client.search(
    search_text=query,
    vector=generate_embeddings(query), top_k=3,  
    vector_fields="contentVector",
    select=["title", "content", "category"],
    query_type="semantic", query_language="en-us", semantic_configuration_name='my-semantic-config', query_caption="extractive", query_answer="extractive",
    top=3
)

semantic_answers = results.get_answers()
for answer in semantic_answers:
    if answer.highlights:
        print(f"Semantic Answer: {answer.highlights}")
    else:
        print(f"Semantic Answer: {answer.text}")
    print(f"Semantic Answer Score: {answer.score}\n")

for result in results:
    print(f"Title: {result['title']}")
    print(f"Content: {result['content']}")
    print(f"Category: {result['category']}")

    captions = result["@search.captions"]
    if captions:
        caption = captions[0]
        if caption.highlights:
            print(f"Caption: {caption.highlights}\n")
        else:
            print(f"Caption: {caption.text}\n")


Semantic Answer: <em>Azure File Storage </em>is a fully managed, scalable, and secure file sharing service that enables you to store and access your files over the Server Message Block (SMB) protocol. It provides features like snapshots, shared access signatures, and integration with Azure Backup. File Storage supports various platforms, such as Windows, Linux, and macOS..
Semantic Answer Score: 0.9850000143051147

Title: Azure File Storage
Content: Azure File Storage is a fully managed, scalable, and secure file sharing service that enables you to store and access your files over the Server Message Block (SMB) protocol. It provides features like snapshots, shared access signatures, and integration with Azure Backup. File Storage supports various platforms, such as Windows, Linux, and macOS. You can use Azure File Storage to build file sharing solutions, lift and shift your applications to the cloud, and simplify your data management. It also integrates with other Azure services, such 

## Perform a vector similarity search

In [None]:
# Pure Vector Search
query = "tools for software development"  
  
search_client = SearchClient(AI_SEARCH_API_ENDPOINT, AI_SEARCH_INDEX_NAME, credential=credential)  
  
results = search_client.search(  
    search_text=None,  
    vector=generate_embeddings(query),
    top_k=3,  
    vector_fields="contentVector",
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  


In [None]:
# Pure Vector Search multi-lingual (e.g 'tools for software development' in Dutch)  
query = "tools voor softwareontwikkeling"  
  
search_client = SearchClient(AI_SEARCH_API_ENDPOINT, AI_SEARCH_INDEX_NAME, credential=credential)  
  
results = search_client.search(  
    search_text=None,  
    vector=generate_embeddings(query), top_k=3,  
    vector_fields="contentVector",
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  


## Perform a Cross-Field Vector Search

In [None]:
# Cross-Field Vector Search
query = "tools for software development"  
  
search_client = SearchClient(AI_SEARCH_API_ENDPOINT, AI_SEARCH_INDEX_NAME, credential=credential)  
  
results = search_client.search(  
    search_text=None,  
    vector=generate_embeddings(query), top_k=3,  
    vector_fields="titleVector, contentVector",
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  


## Perform a Pure Vector Search with a filter

In [None]:
# Pure Vector Search with Filter
query = "tools for software development"  
  
search_client = SearchClient(AI_SEARCH_API_ENDPOINT, AI_SEARCH_INDEX_NAME, credential=credential)  

results = search_client.search(  
    search_text=None,  
    vector=generate_embeddings(query), top_k=3,  
    vector_fields="contentVector",
    filter="category eq 'Developer Tools'",
    select=["title", "content", "category"]
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  


## Perform a Hybrid Search

In [None]:
# Hybrid Search
query = "scalable storage solution"  
  
search_client = SearchClient(AI_SEARCH_API_ENDPOINT, AI_SEARCH_INDEX_NAME, credential=credential)  
  
results = search_client.search(  
    search_text=query,  
    vector=generate_embeddings(query), top_k=3,  
    vector_fields="contentVector",
    select=["title", "content", "category"],
    top=3
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  


## Delete the Index

In [47]:
try:
    result = index_client.delete_index(AI_SEARCH_INDEX_NAME)
    print ('Index', AI_SEARCH_INDEX_NAME, 'Deleted')
except Exception as ex:
    print (ex)

Index azure_docs_index Deleted


## Confirm Index is deleted

In [48]:
try:
    result = index_client.get_index(AI_SEARCH_INDEX_NAME)
    print (result)
except Exception as ex:
    print (ex)

() No index with the name 'azure_docs_index' was found in the service 'demo-ai-search-basic'.
Code: 
Message: No index with the name 'azure_docs_index' was found in the service 'demo-ai-search-basic'.
