### Setup

In [4]:
import openai, json, os, requests, time, csv, uuid
from openai import AzureOpenAI

from tenacity import retry, wait_random_exponential, stop_after_attempt  
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from dotenv import load_dotenv
from cosmosdb_mongodb import insert_one_if_not_exists, create_index
from urllib.parse import quote
from pymongo import MongoClient

load_dotenv()

os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = os.getenv("AZURE_OPENAI_API_VERSION")
os.environ["azure_endpoint"] = os.getenv("AZURE_OPENAI_ENDPOINT")
os.environ["OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["OPENAI_EMBEDDINGS_MODEL_NAME"] = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL")



### Helper functions

In [5]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(10))
def generate_embeddings(openai_client, text):
    """
    Generates embeddings for a given text using the OpenAI API v1.x
    """
    response = openai_client.embeddings.create(
        input = text,
        model= os.getenv("AZURE_OPENAI_EMBEDDING_MODEL")
    )
    
    embeddings = response.data[0].embedding
    return embeddings

In [6]:
def init_cosmos():
    """
    Initialize the CosmosDB client, database, and collections
    
    Returns:
        database: CosmosDB database
        products_collection: CosmosDB collection for products
        customers_collection: CosmosDB collection for customers
    """

    host = os.getenv('COSMOSDB_MONGODB_HOST')
    username = os.getenv('COSMOSDB_MONGODB_USERNAME')
    password = os.getenv('COSMOSDB_MONGODB_PASSWORD')
    database_name = os.getenv('COSMOSDB_MONGODB_DATABASE')
    products_collection_name = os.getenv('COSMOSDB_MONGODB_PRODUCTS')
    customers_collection_name = os.getenv('COSMOSDB_MONGODB_CUSTOMERS')

    # Encode the password
    encoded_password = quote(password, safe='')

    connection_string = f'mongodb+srv://{username}:{encoded_password}@{host}/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000'

    # in case of problems with SSL certificates, you might want to try with 'tlsAllowInvalidCertificates=True'
    # connection_string = f'mongodb+srv://{username}:{encoded_password}@{host}/?tls=true&tlsAllowInvalidCertificates=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000'
    
    client = MongoClient(connection_string)
        
    database = client[database_name]
    products_collection = database[products_collection_name]
    customers_collection = database[customers_collection_name]
    
    return database, products_collection, customers_collection

In [7]:
def add_doc(openai_client, collection, doc):
    """ 
    Add document to Azure Cosmos DB for MongoDB vCore collection
    """
    try:
        doc["textContent"] = json.dumps(doc)
        doc["vectorContent"] = generate_embeddings(openai_client, doc["textContent"])
        insert_one_if_not_exists(collection, doc)
        print(doc["id"])
    except Exception as e:
        print(str(e))

### Populate Cosmos DB for MongoDB from json file 

In [8]:
# Init cosmos db
database, products_collection, customers_collection = init_cosmos()

In [9]:
# Insert products
with open('product.json') as file:
    products = json.load(file)

openai_client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version = os.getenv("AZURE_OPENAI_API_VERSION"),  
  azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
)


# Create a ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
    # For each product in the products list
    for product in products:
        # Use the executor to run add_doc in a separate thread
        executor.submit(add_doc, openai_client, products_collection, product)

# count products
c = products_collection.count_documents({})
print(f"There are {c} products in the collection")    

0A7E57DA-C73F-467F-954F-17B7AFD6227E
14174164-F6C0-47FC-83FB-604C6A63408D
1A176FDB-D9A8-4888-BDD9-CE4F12E97AAE
027D0B9A-F9D9-4C96-8213-C8546C4AAE71
08225A9E-F2B3-4FA3-AB08-8C70ADD6C3C2
201D0D79-81AD-43D2-AD6E-F09EEE6AC2D7
290B4594-95BE-47C5-863A-4EFAAFC0AED7
24BE4267-85D8-4C1A-B184-C08709495752
2C981511-AC73-4A65-9DA3-A0577E386394
29663491-D2E9-47B4-83AE-D9459B6B5B67
3F105575-8677-42F9-8E1F-76E4B450F136
3FE1A99E-DE14-4D11-B635-F5D39258A0B9
44873725-7B3B-4B28-804D-963D2D62E761
47C70E1E-E500-41B3-8615-DCCB963D9E35
4B0848F8-7BF5-4DB9-84A7-C4D69F2E3E8E
5089E32E-8A60-4117-AA98-5EF8AB9A61D1
52FAD88C-567E-469D-A35E-574EA3BF147F
5308BAE7-B0CB-4883-9A93-192CB10DC94F
4E4B38CB-0D82-43E5-89AF-20270CD28A04
5996B5E0-6EC7-4CB7-A924-7B5A053AE980
5B5E90B8-FEA2-4D6C-B728-EC586656FA6D
5BFADECD-2240-4480-9485-1256D1D60EA8
6E3AA511-67DF-4EAD-8F0C-4C9F91F7D335
6FB5B2D5-5725-4998-9B6C-2FF2B7A3E3E0
71BDFE67-6499-4A8E-9CCA-9E9AF7D92A7A
7BAA49C9-21B5-4EEF-9F6B-BCD6DA7C2239
7EA0EEEB-824E-42E9-B787-019219CE4466
8

In [None]:
# Insert customers
with open('customer.json') as file:
    products = json.load(file)

openai_client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version = os.getenv("AZURE_OPENAI_API_VERSION"),  
  azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
)

# Create a ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
    # For each customer in the customers list
    for customer in products:
        # Use the executor to run add_doc in a separate thread
        executor.submit(add_doc, openai_client, customers_collection, customer)

# count customers
c = customers_collection.count_documents({})
print(f"There are {c} customers in the collection")    