### For this lab, you would need to create a Cosmos DB container with vector indexing and search features enabled. 
(read more: https://learn.microsoft.com/en-us/python/api/overview/azure/cosmos-readme?view=azure-python)




### Set up steps

1- Enable Vector indexing and search (Vector Search for NoSQL API) and Fulle Text search (Preview Features for Full Text Search) in Azure Cosmos DB for NoSQL via Features page of your Azure Cosmos DB:
<img src=".\imgs\cosmos_policy.png" alt="description" width="900" height="300"/>

2- Define vector embedding policy, indexing policy and full text policy which will then be used for creating the container:

In [31]:
vector_embedding_policy = {
    "vectorEmbeddings": [
        {
            "path":"/vector1",
            "dataType":"float32",
            "distanceFunction":"cosine",
            "dimensions":1536
        },
                {
            "path":"/vector2",
            "dataType":"float32",
            "distanceFunction":"cosine",
            "dimensions":1536
        }
    ]
}

In [32]:
full_text_policy = {
    "defaultLanguage": "en-US",
    "fullTextPaths": [
        {
            "path": "/description",
            "language": "en-US"
        },
                {
            "path": "/name",
            "language": "en-US"
        }
    ]
}

In [33]:
indexing_policy = {
    "indexingMode": "consistent",
    "automatic": True,
    "includedPaths": [
        {
            "path": "/*"
        }
    ],
    "excludedPaths": [
        {
            "path": "/_etag/?"
        },
        {
            "path": "/vector1/*"
        }],
    "vectorIndexes": [
            {"path": "/vector1", "type": "diskANN"},
        ],

    "fullTextIndexes": [
            {"path": "/description"},
            {"path": "/name"}
        ]
}

3- Create the container "reviews"

In [34]:
from azure.cosmos import CosmosClient, PartitionKey
import os
from dotenv import load_dotenv
load_dotenv()


databaseName = os.getenv("COSMOS_DATABASE_NAME")
containerName =  "reviews"
client = CosmosClient.from_connection_string(os.getenv("COSMOS_CONNECTION_STRING"))
database = client.get_database_client(databaseName)
database.create_container(id=containerName, partition_key=PartitionKey(path="/category"),
                          indexing_policy=indexing_policy,
                          vector_embedding_policy=vector_embedding_policy,
                          full_text_policy=full_text_policy)

<ContainerProxy [dbs/Contoso/colls/reviews]>

4- Populate with data + vectors

In [None]:
from azure.cosmos.aio import CosmosClient
import os
import json

# connect to embedding service
from semantic_kernel.connectors.ai.open_ai import AzureTextEmbedding
import os
api_key = os.getenv("OPENAI_API_KEY")
embedding_service = AzureTextEmbedding(
    deployment_name="text-embedding-ada-002",
    api_key= os.getenv('AZURE_OPENAI_KEY'),
    endpoint= os.getenv('AZURE_OPENAI_EMBED_ENDPOINT'),
    base_url= os.getenv('AZURE_OPENAI_BASE_EMBED_URL'))

# Load product reviews from JSON file
with open('./src/sample_products.json', 'r') as f:
    reviews = json.load(f)

databaseName = os.getenv("COSMOS_DATABASE_NAME")
containerName =  "reviews"
async def create_products(reviews):
    async with CosmosClient.from_connection_string(os.getenv("COSMOS_CONNECTION_STRING")) as client: # the with statement will automatically initialize and close the async client
        database = client.get_database_client(databaseName)
        container = database.get_container_client(containerName)
        for i in range(len(reviews)):
            reviews[i]['id'] = str(i+1)
            embedding = (await embedding_service.generate_embeddings([reviews[i]["description"]]))[0]
            embedding_list = embedding.tolist()
            # reviews[i]['vector1_diskann'] = embedding_list
            reviews[i]['vector1'] = embedding_list
            reviews[i]['vector2'] = embedding_list
            await container.upsert_item(reviews[i])
            
    print(f"Inserted {len(reviews)} records into the container '{containerName}' in database '{databaseName}'.")

In [36]:
await create_products(reviews)

Inserted 2000 records into the container 'reviews' in database 'Contoso'.


### examples

#### Full text search

In [37]:
from azure.cosmos import CosmosClient
import os
from dotenv import load_dotenv
load_dotenv()

client = CosmosClient.from_connection_string(os.getenv("COSMOS_CONNECTION_STRING"))
containerName = "reviews"
databaseName = os.getenv("COSMOS_DATABASE_NAME")
database = client.get_database_client(databaseName)
container = database.get_container_client(containerName)

In [21]:
ex1 =container.query_items(
    query="SELECT TOP 10 * FROM c WHERE FullTextContainsALL(c.review, 'microsoft')",
    enable_cross_partition_query=True)

In [43]:
question = "tablet with heating issues or a camera with great image quality"
test_embedding = (await embedding_service.generate_embeddings([question]))[0]


#### Vector Search

In [47]:
# test_embedding is a numpy array or list
embedding_list = test_embedding.tolist() 

query2 = f"""
SELECT TOP 5 c.review, c.product_id, VectorDistance(c.vector1, {embedding_list}) AS score
FROM c
ORDER BY VectorDistance(c.vector1, {embedding_list})
"""


In [48]:
b2 =container.query_items(
    query=query2,
    enable_cross_partition_query=True)