## Azure OpenAI <a class="anchor" id="azureopenai"></a>

Finally, let's setup our Azure OpenAI resource Currently, access to this service is granted only by application. You can apply for access to Azure OpenAI by completing the form at https://aka.ms/oai/access. Once you have access, complete the following steps:

- Create an Azure OpenAI resource following this quickstart: https://learn.microsoft.com/azure/ai-services/openai/how-to/create-resource?pivots=web-portal
- Deploy a `completions` and `embeddings` model 
    - For more information on `completions`, go here: https://learn.microsoft.com/azure/ai-services/openai/how-to/completions
    - For more information on `embeddings`, go here: https://learn.microsoft.com/azure/ai-services/openai/how-to/embeddings
- Copy the endpoint, key, deployment names for (embeddings model, completions model) into the config.json file.

## Create an Azure Cosmos DB for MongoDB vCore resource<a class="anchor" id="cosmosdb"></a>
Let's start by creating an Azure Cosmos DB for MongoDB vCore Resource following this quick start guide: https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/quickstart-portal

Then copy the connection details (server, user, pwd) into the config.json file.

# Preliminaries <a class="anchor" id="preliminaries"></a>
First, let's start by installing the packages that we'll need later. 

In [None]:
! pip install numpy
! pip install openai==1.2.3
! pip install pymongo
! pip install python-dotenv
! pip install azure-core
! pip install azure-cosmos
! pip install tenacity
! pip install gradio

In [None]:
import json
import datetime
import time

from azure.core.exceptions import AzureError
from azure.core.credentials import AzureKeyCredential

import openai
from dotenv import load_dotenv
from tenacity import retry, wait_random_exponential, stop_after_attempt

from dotenv import dotenv_values
from openai import AzureOpenAI

#You can use .env file. It will give you more security.
openai.api_type = ""
openai.api_key = ""
openai.api_base = ""
openai.api_version = ""

client = AzureOpenAI(
    api_key=openai.api_key,
    api_version=openai.api_version,
    azure_endpoint = openai.api_base
)



# Load data and create embeddings <a class="anchor" id="loaddata"></a>
Here we'll load a sample dataset containing descriptions of Azure services. Then we'll user Azure OpenAI to create vector embeddings from this data.

In [None]:
import openai

def generate_embeddings(text):
    try:
        response = client.embeddings.create(
            input=text, model="text-embedding")
        embeddings = response.data[0].embedding
        return embeddings
    except Exception as e:
        print("An error occurred: [e]")
        print(e)
        return None

embeddings = generate_embeddings("K.Shreekar Patra")

if embeddings is not None:
    print(embeddings)


In [None]:
# Generate embedding data. Change the product item details as per your data

import json

# Load data from JSON file
data_file_path = "import your json format of your CSV"
with open(data_file_path, "r") as data_file:
    data = json.load(data_file)

n = 0
total_items = len(data)
for item in data:  # Iterate directly over the list of shoe items
    n += 1
    product_string = f"EMP ID: {row['EMP ID']}" + \
                    (f", NAME: {row['NAME']}" if row['NAME'] else "") + \
                    (f", DEPT: {row['DEPT']}" if row['DEPT'] else "")
    
    # Assuming generate_embeddings is defined elsewhere
    title_embeddings = generate_embeddings(product_string)
    item['contentVector'] = title_embeddings
    print(f"Creating embeddings for item: {n}/{total_items}", end='\r')

# Save embeddings to output_embeddings.json file
output_file_path = "path/output_embeddings.json"
with open(output_file_path, "w") as f:
    json.dump(data, f, indent=1)

print("\nEmbeddings generation completed. Output saved to:", output_file_path)


In [None]:
import csv
import json
import time
import requests
from openai import OpenAI

def csv_to_json_complete(input_csv_file, output_json_file):
    # Open the CSV file for reading

    daliClient = OpenAI(
        api_key=config['dali_api_key']
    )

    with open(input_csv_file, 'r', newline='', encoding='utf-8') as csvfile:
        # Create a CSV reader object
        reader = csv.DictReader(csvfile)
        
        # Initialize an empty list to hold all the JSON objects
        json_list = []
        
        # Initialize an ID counter starting at 1
        auto_id = 1
        
        # Iterate over each row in the CSV file
        for row in reader:
            # Add the 'id' key with an auto-incremented value

            if auto_id > 300:
                break

            row['id'] = auto_id
            # Convert string representations of boolean and None types to actual boolean and None types
            for key, value in row.items():
                if value == 'true': 
                    row[key] = True
                elif value == 'false': 
                    row[key] = False
                elif value == '':
                    row[key] = None  # Convert empty strings to None

            product_string = f"EMP ID: {row['EMP ID']}" + \
                    (f", NAME: {row['NAME']}" if row['NAME'] else "") + \
                    (f", DEPT: {row['DEPT']}" if row['DEPT'] else "")

    # Open the JSON file for writing
    with open(output_json_file, 'w', encoding='utf-8') as jsonfile:
        # Write the list of JSON objects to the file
        json.dump(json_list, jsonfile, indent=4)

# Example usage
input_csv_file = r"path\employee.csv"  # Replace with your actual input CSV file path
output_json_file = r"path\output_embeddings.json"  # Replace with your actual output JSON file path
# csv_to_json_complete(input_csv_file, output_json_file)

# Connect and setup Cosmos DB for MongoDB vCore

### Create database, collection, vector index

In [None]:
# CREATE DATA BASE ALSO CREATE VECTORE INDEX
from pymongo import MongoClient
from urllib.parse import quote_plus

# Username and password
username = "your data base name"
password = "your data base password"

# Escape username and password
escaped_username = quote_plus(username)
escaped_password = quote_plus(password)

# Connection String
mongo_conn = f"YOUR CONNECTION STRING"

# Create MongoClient instance
mongo_client = MongoClient(mongo_conn)

# Database and Collection names
DATABASE_NAME = "Smart-ai"
COLLECTION_NAME = "mongodb"

# Access or create the database
db = mongo_client[DATABASE_NAME]

# Check if the collection already exists
if COLLECTION_NAME not in db.list_collection_names():
    # Create the collection
    collection = db.create_collection(COLLECTION_NAME)
    print("Created collection '{}'.".format(COLLECTION_NAME))
else:
    # Use the existing collection
    collection = db[COLLECTION_NAME]
    print("Using existing collection: '{}'.".format(COLLECTION_NAME))

# Create the vector index
db.command({
  'createIndexes': COLLECTION_NAME,
  'indexes': [
    {
      'name': 'vectorSearchIndex',
      'key': {
        "contentVector": "cosmosSearch"
      },
      'cosmosSearchOptions': {
        'kind': 'vector-ivf',
        'numLists': 1,
        'similarity': 'COS',
        'dimensions': 1536
      }
    }
  ]
})

print("Vector index created successfully.")

## Upload data to the collection
A simple `insert_many()` to insert our data in JSON format into the newly created DB and collection.

In [None]:
data_file = open(file="PATH/output_embeddings.json", mode="r") 
data = json.load(data_file)
data_file.close()

collection.insert_many(data)

# Vector Search in Cosmos DB for MongoDB vCore

In [None]:
# Simple function to assist with vector search
def vector_search(query, num_results=3):
    query_embedding = generate_embeddings(query)
    embeddings_list = []
    pipeline = [
        {
            '$search': {
                "cosmosSearch": {
                    "vector": query_embedding,
                    "numLists": 1,
                    "path": "contentVector",
                    "k": num_results
                },
                "returnStoredSource": True }},
        {'$project': { 'similarityScore': { '$meta': 'searchScore' }, 'document' : '$$ROOT' } }
    ]
    results = collection.aggregate(pipeline)
    return results

## Perform vector search query

In [None]:
query = "List out the employees, who all are working in IT department"
results = vector_search(query, 5)
for result in results: 
    print(f"Similarity Score: {result['similarityScore']}")  
    print(f"Title: {result['document']['EMP ID']}")  
    print(f"Name: {result['document']['Name']}")  
    print(f"Dept: {result['document']['Dept']}\n") 

In [None]:
print('Complete') 