# Create AI Search - Index for Avatar Demo

## Setup

In [None]:
# Install the required libraries
%pip install azure-search-documents==11.4.0b6 openai==0.28.1 tenacity requests matplotlib

In [None]:
import os
import json  
import pandas as pd

import pyodbc
import requests
import inspect

import openai  
from tenacity import retry, wait_random_exponential, stop_after_attempt  
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient  
from azure.search.documents.models import Vector  
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    PrioritizedFields,  
    SemanticField,  
    SearchField,  
    SemanticSettings,  
    VectorSearch,  
    VectorSearchAlgorithmConfiguration,  
)  

You need to have the following settings for your Azure resources defined in the `local.settings.json` file in the __api__ subfolder to populate the demo content for the outdoor app:

In [None]:
# Load JSON file
with open('./api/local.settings.json', 'r') as file:
    data = json.load(file)

# Azure Cognitive Search
service_endpoint = data["Values"]["AZURE_SEARCH_ENDPOINT"]
key = data["Values"]["AZURE_SEARCH_API_KEY"]
index_name = data["Values"]["AZURE_SEARCH_INDEX"]

# Blob SAS URL for Azure Storage Account
blob_sas_url = data["Values"]["BLOB_SAS_URL"]

# Azure OpenAI
openai.api_type = "azure"
openai.api_key = data["Values"]["AZURE_OPENAI_API_KEY"]
openai.api_base = data["Values"]["AZURE_OPENAI_ENDPOINT"]
openai.api_version = data["Values"]["AZURE_OPENAI_API_VERSION"]
AOAI_embeddings_deployment = data["Values"]["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"]

# Azure SQL Database
sql_db_server = data["Values"]["SQL_DB_SERVER"]
sql_db_user = data["Values"]["SQL_DB_USER"]
sql_db_password = data["Values"]["SQL_DB_PASSWORD"]
sql_db_name = data["Values"]["SQL_DB_NAME"]

## Create Cognitive Search Index
First, we create a new Index with demo data to the Cognitive Search service that you have deployed manually.

In [None]:
credential = AzureKeyCredential(key)

df = pd.read_csv('data/mb_products_cs_index.csv', dtype={'id': str})
display(df.head())
input_data = df.to_dict(orient='records')

In [None]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def generate_embeddings(text):
    response = openai.Embedding.create(
        input=text, engine=AOAI_embeddings_deployment)
    embeddings = response['data'][0]['embedding']
    return embeddings

In [None]:
# Generate embeddings for title and content fields
for item in input_data:
    tagline = item['tagline']
    description = item['description']
    tagline_embeddings = generate_embeddings(tagline)
    description_embeddings = generate_embeddings(description)
    item['tagline_vector'] = tagline_embeddings
    item['description_vector'] = description_embeddings

In [None]:
# Output embeddings to docVectors.json file
with open("./data/product-catalog-vectors.json", "w") as f:
    json.dump(input_data, f)

In [None]:
# Delete ACS index if it exists
index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)

try:
    if index_client.get_index(index_name):
        print('Deleting existing index...')
        index_client.delete_index(index_name)

except:
    print('Index does not exist. No need to delete it.')

In [None]:
# Create a search index
index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)

fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="name", type=SearchFieldDataType.String),
    SearchableField(name="tagline", type=SearchFieldDataType.String),
    SearchableField(name="description", type=SearchFieldDataType.String),
    SimpleField(name="original_price", type=SearchFieldDataType.Double),
    SimpleField(name="special_offer", type=SearchFieldDataType.Double),
    SearchableField(name="category", type=SearchFieldDataType.String, filterable=True),
    SearchField(name="tagline_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="my-vector-config"),
    SearchField(name="description_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="my-vector-config"),
    SimpleField(name="product_image_file", type=SearchFieldDataType.String),
]

vector_search = VectorSearch(
    algorithm_configurations=[
        VectorSearchAlgorithmConfiguration(
            name="my-vector-config",
            kind="hnsw",
            hnsw_parameters={
                "m": 4,
                "efConstruction": 400,
                "efSearch": 500,
                "metric": "cosine"
            }
        )
    ]
)

semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=PrioritizedFields(
        title_field=SemanticField(field_name="tagline"),
        prioritized_keywords_fields=[SemanticField(field_name="category")],
        prioritized_content_fields=[SemanticField(field_name="description")]
    )
)

# Create the semantic settings with the configuration
semantic_settings = SemanticSettings(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=index_name, fields=fields,
                    vector_search=vector_search, semantic_settings=semantic_settings)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

In [None]:
# Upload documents to the index
with open("./data/product-catalog-vectors.json", 'r') as file:  
    documents = json.load(file)  
search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)
result = search_client.upload_documents(documents)  
print(f"Uploaded {len(documents)} documents") 

## Perform Test Queries
We are performing a few test queries against the Cognitive Search index. If successful, it should display outdoor product information and images.

In [None]:
search_client = SearchClient(service_endpoint, index_name, credential=credential)  
fields_of_interest = ["id", "name", "tagline", "description", "original_price", "special_offer", "category", "product_image_file"]

In [None]:
import requests
from matplotlib import pyplot as plt
from io import BytesIO

def display_image_from_blob(image_file):
   
  # Append the image name to the SAS URL
  image_url = blob_sas_url.split("?")[0] + f"/{image_file}?" + blob_sas_url.split("?")[1]
  

  # Get the image content
  response = requests.get(image_url)
  

  # Check if the request was successful
  if response.status_code == 200:
      # Open the image and display it
      img = plt.imread(BytesIO(response.content))
      plt.imshow(img)
      plt.axis('off') # No axes for this plot
      plt.show()
  else:
      print(f"Failed to retrieve image. HTTP Status code: {response.status_code}")

def print_results(results):  
  for result in results:  
    print(f"Score: {result['@search.score']}")
    print(f"Name: {result['name']}")  
    print(f"Category: {result['category']}")
    print(f"Tagline: {result['tagline']}")
    print(f"Description: {result['description'][:50]}")
    print(f"Original price: {result['original_price']}")
    print(f"Special offer: {result['special_offer']}")
    print(f"Image file: {result['product_image_file']}\n")
    display_image_from_blob(result['product_image_file'])


# Pure Vector Search with Filter
query = "Trek"  
 
results = search_client.search(  
    search_text=None,  
    vector=generate_embeddings(query), top_k=3,  
    vector_fields="description_vector",
    filter="category eq 'outdoor'",
    select= fields_of_interest
)  

print_results(results)