# Azure Cognitive Search Vector Search Code Sample with Cognitive Services Florence Vision API for Images
This code demonstrates how to use Azure Cognitive Search with Cognitive Services Florence Vision API and Azure Python SDK
## Prerequisites
To run the code, install the following packages. Please use the latest pre-release version `pip install azure-search-documents --pre`.

In [None]:
! pip install azure-search-documents --pre
! pip install openai
! pip install python-dotenv
! pip install ipython  

## Import required libraries and environment variables

In [28]:
# Import libraries  
import os  
import json  
import requests  
from dotenv import load_dotenv  
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient  
from azure.search.documents.models import Vector  
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,
    SearchFieldDataType,  
    SimpleField,
    FieldMapping,
    SearchableField,  
    SearchIndex,  
    VectorSearch,  
    HnswVectorSearchAlgorithmConfiguration,  
    SearchIndexerDataContainer,  
    SearchIndexer,  
    SearchIndexerDataSourceConnection,  
    InputFieldMappingEntry,  
    OutputFieldMappingEntry,  
    SearchIndexerSkillset,
    CorsOptions,
    IndexingParameters,
    IndexerStatus,
    SearchIndexerDataContainer, SearchIndex, SearchIndexer, SimpleField, SearchFieldDataType,
    EntityRecognitionSkill, InputFieldMappingEntry, OutputFieldMappingEntry, SearchIndexerSkillset,
    CorsOptions, IndexingSchedule, SearchableField, IndexingParameters, SearchIndexerDataSourceConnection
)  
from azure.search.documents.indexes.models import WebApiSkill  
from azure.storage.blob import BlobServiceClient  
from azure.search.documents.indexes import SearchIndexerClient  
from azure.search.documents.indexes.models import (  
    SearchIndexerDataContainer,  
    SearchIndexerDataSourceConnection,  
)  
from IPython.display import Image, display 

  
load_dotenv()  
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")  
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")  
key = os.getenv("AZURE_SEARCH_ADMIN_KEY")  
cogSvcsEndpoint = os.getenv("COGNITIVE_SERVICES_ENDPOINT")  
cogSvcsApiKey = os.getenv("COGNITIVE_SERVICES_API_KEY")  
customSkill_endpoint = os.getenv("FUNCTION_CUSTOM_SKILL_ENDPOINT")  
blob_connection_string = os.getenv("BLOB_CONNECTION_STRING")  
container_name = os.getenv("BLOB_CONTAINER_NAME")
credential = AzureKeyCredential(key)

# Connect to Blob Storage

Retreive your images from Blob Storage

In [40]:
# Connect to Blob Storage
blob_service_client = BlobServiceClient.from_connection_string(blob_connection_string)
container_client = blob_service_client.get_container_client(container_name)
blobs = container_client.list_blobs()

first_blob = next(blobs)
blob_url = container_client.get_blob_client(first_blob).url
print(f"URL of the first blob: {blob_url}")

URL of the first blob: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/1012.png


# Connect your Blob Storage to a data source in Cognitive Search

In [41]:
# Create a data source 
ds_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
container = SearchIndexerDataContainer(name=container_name)
data_source_connection = SearchIndexerDataSourceConnection(
    name=f"{index_name}-blob",
    type="azureblob",
    connection_string=blob_connection_string,
    container=container
)
data_source = ds_client.create_or_update_data_source_connection(data_source_connection)

print(f"Data source '{data_source.name}' created or updated")

Data source 'python-vector-images-demo-blob' created or updated


# Create a skillset

Create a custom skill Azure Function which calls the Florence Vision API to generate image embeddings. See GetImageEmbeddings for details on the custom skill.

In [42]:
# Create a skillset  
skillset_name = f"{index_name}-skillset"  
skill_uri = customSkill_endpoint
  
skill = WebApiSkill(  
    uri=skill_uri,  
    inputs=[  
        InputFieldMappingEntry(name="imageUrl", source="/document/metadata_storage_path"),  
        InputFieldMappingEntry(name="recordId", source="/document/metadata_storage_name")  
    ],  
    outputs=[OutputFieldMappingEntry(name="vector", target_name="imageVector")],  
)  
  
skillset = SearchIndexerSkillset(  
    name=skillset_name,  
    description="Skillset to extract image vector",  
    skills=[skill],  
)  
  
client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))  
client.create_or_update_skillset(skillset)  
print(f' {skillset.name} created')  


 python-vector-images-demo-skillset created


## Create an index
Create your search index schema and vector search configuration:

In [43]:
# Create a search index
index_client = SearchIndexClient(
    endpoint=service_endpoint, credential=credential)
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),  
    SimpleField(name="imageUrl", type=SearchFieldDataType.String, retrievable=True),  
    SearchableField(name="title", type=SearchFieldDataType.String, searchable=True, retrievable=True),  
    SearchField(  
        name="imageVector",  
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),  
        searchable=True,  
        vector_search_dimensions=1024,  
        vector_search_configuration="my-vector-config",  
    ),  
]

vector_search = VectorSearch(
    algorithm_configurations=[
        HnswVectorSearchAlgorithmConfiguration(
            name="my-vector-config",
            kind="hnsw",
            parameters={
                "m": 4,
                "efConstruction": 400,
                "efSearch": 1000,
                "metric": "cosine"
            }
        )
    ]
)


# Create the search index 
index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search,)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')


 python-vector-images-demo created


## Create an indexer

Create or update an indexer to process images and populate the search index

In [44]:
# Create an indexer  
indexer_name = f"{index_name}-indexer"  
indexer = SearchIndexer(  
    name=indexer_name,  
    description="Indexer to process images",  
    skillset_name=skillset_name,  
    target_index_name=index_name,  
    data_source_name=data_source.name,  
    field_mappings=[  
        FieldMapping(source_field_name="metadata_storage_path", target_field_name="imageUrl"),  
        FieldMapping(source_field_name="metadata_storage_name", target_field_name="title")  
    ],  
    output_field_mappings=[  
        FieldMapping(source_field_name="/document/imageVector", target_field_name="imageVector")  
    ]  
)  
  
indexer_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))  
indexer_result = indexer_client.create_or_update_indexer(indexer)  
  
# Run the indexer  
indexer_client.run_indexer(indexer_name)  
print(f' {indexer_name} created')

 python-vector-images-demo-indexer created


## Perform a Vector search by vectorizing your text query

Perform a vector search to find the most relevant images based on the text query

In [47]:
def generate_embeddings(text, cogSvcsEndpoint, cogSvcsApiKey):  
    url = f"{cogSvcsEndpoint}/computervision/retrieval:vectorizeText"  
  
    params = {  
        "api-version": "2023-02-01-preview"  
    }  
  
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": cogSvcsApiKey  
    }  
  
    data = {  
        "text": text  
    }  
  
    response = requests.post(url, params=params, headers=headers, json=data)  
  
    if response.status_code == 200:  
        embeddings = response.json()["vector"]  
        return embeddings  
    else:  
        print(f"Error: {response.status_code} - {response.text}")  
        return None  

  
# Generate text embeddings for the query  
query = "winter clothes"  
  
# Initialize the SearchClient  
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))  
vector = Vector(value=generate_embeddings(query, cogSvcsEndpoint, cogSvcsApiKey), k=3, fields="imageVector")  

# Perform vector search  
results = search_client.search(  
    search_text=None,  
    vectors=[vector],
    select=["title", "imageUrl"]  
)   
  
# Print the search results  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Image URL: {result['imageUrl']}") 
    display(Image(url=result['imageUrl'])) 
    print("\n") 


Title: Picture505.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture505.jpg




Title: Picture621.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture621.jpg




Title: Picture623.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture623.jpg






Perform a vector search to find the most relevant images based on the image query

In [48]:
def generate_embeddings(text, cogSvcsEndpoint, cogSvcsApiKey):  
    url = f"{cogSvcsEndpoint}/computervision/retrieval:vectorizeImage"  
  
    params = {  
        "api-version": "2023-02-01-preview"  
    }  
  
    headers = {  
        "Content-Type": "application/json",  
        "Ocp-Apim-Subscription-Key": cogSvcsApiKey  
    }  
  
    data = {  
        "url": text  
    }  
  
    response = requests.post(url, params=params, headers=headers, json=data)  
  
    if response.status_code == 200:  
        embeddings = response.json()["vector"]  
        return embeddings  
    else:  
        print(f"Error: {response.status_code} - {response.text}")  
        return None  

  
# Generate text embeddings for the query (for context, this is a photo of a lady in a red hat)
query = "https://images.unsplash.com/photo-1593476087123-36d1de271f08?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=387&q=80"  
  
# Initialize the SearchClient  
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))  
vector = Vector(value=generate_embeddings(query, cogSvcsEndpoint, cogSvcsApiKey), k=3, fields="imageVector")  

# Perform vector search  
results = search_client.search(  
    search_text=None,  
    vectors=[vector],
    select=["title", "imageUrl"]  
)  
  
# Print the search results  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Image URL: {result['imageUrl']}") 
    display(Image(url=result['imageUrl'])) 
    print("\n") 


Title: Picture290.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture290.jpg




Title: Picture285.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture285.jpg




Title: Picture296.jpg
Image URL: https://fsunavalastorage.blob.core.windows.net/vector-sandbox/Picture296.jpg




