In [2]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os
import json

In [3]:

load_dotenv(override=True) # take environment variables from .env.

# Variables not used here do not need to be updated in your .env file
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
# credential = AzureKeyCredential(os.environ["AZURE_SEARCH_ADMIN_KEY"]) if len(os.environ["AZURE_SEARCH_ADMIN_KEY"]) > 0 else DefaultAzureCredential()
credential = DefaultAzureCredential()
index_name = os.environ["AZURE_SEARCH_INDEX"]
blob_connection_string = os.environ["BLOB_CONNECTION_STRING"]
blob_container_name = os.environ["BLOB_CONTAINER_NAME"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.environ["AZURE_OPENAI_KEY"] if len(os.environ["AZURE_OPENAI_KEY"]) > 0 else None
azure_openai_embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]

In [4]:
import azure.search as azsearch
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import AnalyzeTextOptions, LexicalAnalyzerName

search_index_client = SearchIndexClient(endpoint, credential=credential)
print(search_index_client.get_index_statistics(index_name))

search_client = search_index_client.get_search_client(index_name)



{'document_count': 663, 'storage_size': 19386539, 'vector_index_size': 4071272}


In [5]:

search_results = search_client.search(
    search_text="eye",
    include_total_count=True,
    select=['title', 'parent_id', 'chunk_id', 'chunk'])


print(search_results.get_count())
print(search_results.get_facets())
print(search_results.get_coverage())

3
None
None


In [6]:
for r in search_results:
    print(json.dumps(r, indent=2))

{
  "chunk": "means building over time a unified, modular, elastic, \nscalable, secure and well-governed Data Platform that can support a wealth of use cases spanning across the entire \nanalytical spectrum (descriptive, diagnostic, predictive and prescriptive analytics).   \n \nHaving the target Modern Data Platform in place from inception is almost always an utopia and this shouldn\u2019t \ndiscourage enterprises but it should actually be an eye opener as often is the use case that drives the realization that \nthe data estate needs to be modernized in order to be able to innovate with AI/ML.  The good news is that you don\u2019t \nhave to spend years to build the target desired Data Platform before embarking on the AI journey because typically a \nsmall scale reproduction of it, is all it takes to get started. Cloud technologies are important enablers to accelerate \ntime to value especially when you adopt Platform as a Service (PaaS) capabilities to ingest data from your on-premise

In [42]:
results = search_results.by_page()

In [43]:
r = results.next()

In [44]:
print(r)

<list_iterator object at 0x0000021A88F71C90>


In [None]:
# from azure.search.documents import SearchClient

# search_client = SearchClient(endpoint, index_name, credential=credential)
# for r in search_client.search(search_text="eye"):
#     print(r)


In [None]:

analyse_request = AnalyzeTextOptions(
    text = "to rapidly turn pilots or Minimal \nViable Products (MVP) in production deployments and leverage quick and tangible wins to build momentum for pilots",
    analyzer_name = LexicalAnalyzerName.EN_LUCENE
)

analyze_result = search_index_client.analyze_text(index_name, analyse_request)

for token in analyze_result.tokens:
    print(token.token, token.start_offset, token.end_offset, token.position)

rapidli 3 10 1
turn 11 15 2
pilot 16 22 3
minim 26 33 5
viabl 35 41 6
product 42 50 7
mvp 52 55 8
product 60 70 10
deploy 71 82 11
leverag 87 95 13
quick 96 101 14
tangibl 106 114 16
win 115 119 17
build 123 128 19
momentum 129 137 20
pilot 142 148 22


In [None]:
search_index = search_index_client.get_index(index_name)

search_index.as_dict()


print('\nFields >>>')
for f in search_index.fields:
    print(f"{f.name} ({f.type})")


search_index_vector_search = search_index.vector_search

print('\nvector algorithms >>>')
for algo in search_index_vector_search.algorithms:
    print(f"{algo.name} ")
print('\nvector vectorizers >>>')
for vec in search_index_vector_search.vectorizers:
    print(f"{vec.name} ")
    print(vec)


print(search_index.additional_properties)


Fields >>>
parent_id (Edm.String)
title (Edm.String)
chunk_id (Edm.String)
chunk (Edm.String)
vector (Collection(Edm.Single))

vector algorithms >>>
myHnsw 
myExhaustiveKnn 

vector vectorizers >>>
myOpenAI 
{'additional_properties': {'customWebApiParameters': None}, 'name': 'myOpenAI', 'kind': 'azureOpenAI', 'azure_open_ai_parameters': <azure.search.documents.indexes._generated.models._models_py3.AzureOpenAIParameters object at 0x0000021A8AD70350>}
{}
