# MongoDB VCore RAG


### Installing Dependecies

In [None]:
%pip install langchain==0.3.4
%pip install langchain-openai==0.2.3
%pip install pymongo==4.10.1


In [1]:
from dotenv import load_dotenv
import os
load_dotenv()

True

### Initialize OpenAI Client


In [2]:
import openai
openai.api_type = "azure"
openai.base_url = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")
openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")


### Initialize MongoDB Client

In [3]:
from urllib.parse import quote_plus
from pymongo import MongoClient
mongo_connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING")
mongo_username = quote_plus(os.getenv("AZURE_COSMOS_USERNAME"))
mongo_password = quote_plus(os.getenv("AZURE_COSMOS_PASSWORD"))


collection_name = os.getenv("AZURE_COSMOS_COLLECTION_NAME", "collectionName")
database_name = os.getenv("AZURE_COSMOS_DATABASE_NAME", "DatabaseName")

mongo_connection_string = mongo_connection_string.replace("<user>", mongo_username).replace(
    "<password>", mongo_password
)

# Initialize the MongoClient
mongo_client = MongoClient(mongo_connection_string)

# Create the database if it doesn't exist
db = mongo_client[database_name]

# Create the collection if it doesn't exist
collection = db[collection_name]

  mongo_client = MongoClient(mongo_connection_string)


### Load Json Data

In [4]:
import json
from langchain.docstore.document import Document

source_file_name = "./food_items.json"

def read_data(file_path) -> list[Document]:
    with open(file_path) as file:
        json_data = json.load(file)
        
    documents = []
    absolute_path = os.path.abspath(file_path)
    for idx, item in enumerate(json_data):
         documents.append(
            Document(page_content=json.dumps(item), metadata={"source": absolute_path, "seq_num": idx + 1})
        )
    
    return documents



In [5]:
json_data = read_data("./food_items.json") 

In [6]:
print(json_data[0])

page_content='{"category": "Smoothies", "name": "Ashunti`Way Smoothie", "description": "Fruit n greens, mango bananas, tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples, and spinach. Special green with strawberry bananas juice blend . Our fruity tasty smoothies are blended to perfection.", "price": "5.49 USD"}' metadata={'source': 'c:\\Users\\HP VICTUS\\Downloads\\graphRag\\mongoDB_Vcore_RAG\\food_items.json', 'seq_num': 1}


### Initialize Embeddings Client

In [7]:
from langchain_openai import AzureOpenAIEmbeddings

openai_embeddings_model = os.getenv("AZURE_OPENAI_EMBEDDINGS_MODEL_NAME")
openai_embeddings_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME")

azure_openai_embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    model = openai_embeddings_model,
    azure_deployment = openai_embeddings_deployment
)

### Generate and Save Embeddings to MongoDB

In [8]:
from langchain_community.vectorstores.azure_cosmos_db import AzureCosmosDBVectorSearch

index_name = os.getenv("AZURE_COSMOS_INDEX_NAME", "indexName")

vector_store: AzureCosmosDBVectorSearch = AzureCosmosDBVectorSearch.from_documents(
    json_data,
    azure_openai_embeddings,
    collection=collection,
    index_name=index_name
)

In [9]:
from langchain_community.vectorstores.azure_cosmos_db import AzureCosmosDBVectorSearch

# Run this to connect to the vector store
vector_store: AzureCosmosDBVectorSearch = AzureCosmosDBVectorSearch.from_connection_string(
    connection_string=mongo_connection_string,
    namespace=f"{database_name}.{collection_name}",
    embedding=azure_openai_embeddings,
)

  client: MongoClient = MongoClient(connection_string, appname=appname)


### Create an IVF Vector Index

In [10]:
from langchain_community.vectorstores.azure_cosmos_db import (
    CosmosDBSimilarityType,
    CosmosDBVectorSearchType,
)

# Read more about these variables in detail here. https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search
num_lists = 100
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_IVF
m = 16
ef_construction = 64

# Create the collection and the index
vector_store.create_index(num_lists, dimensions, similarity_algorithm, kind, m, ef_construction)

{'raw': {'defaultShard': {'numIndexesBefore': 1,
   'numIndexesAfter': 2,
   'createdCollectionAutomatically': False,
   'ok': 1}},
 'ok': 1}

### Test Vector Similarity on a query

In [21]:
query = "Beef Bacon"
docs = vector_store.similarity_search(query)
print(docs[0].page_content)

{"category": "Sandwiches", "name": "Bacon Turkey Bravo Sandwich", "description": "Whole (1010 Cal.), Half (500 Cal.) Oven-roasted turkey breast raised without antibiotics, Applewood-smoked bacon, smoked Gouda, emerald greens, vine-ripened tomatoes, signature sauce , salt and pepper on Tomato Basil Bread. Allergens: Contains Wheat, Milk, Egg", "price": "8.79 USD"}


## Bringing RAG to Life!!!!

### Defining RAG Workflow with help of a python function


In [27]:
from openai import AzureOpenAI

azureOpenAIClient = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version="2024-05-01-preview"
)

GPT_model = os.getenv("AZURE_OPENAI_GPT_MODEL_NAME")

In [48]:
def RAG(query: str) -> str:
    docs = vector_store.similarity_search(query)
    print(docs)
    context = docs[0].page_content
    
    print(context)
    
    system_prompt = (
        "you are a cooking assistant meant to answer the user query \n"
        "you will be given context to the prompt based upon which you have to answer the user query \n"
        "the context of the prompt has been taken from an index created in Azure MongoDB vCore API \n"
        "the schema for the context is as follows: \n"
        "category: string (the category to which the dish belongs) \n"
        "name: string (the name of the dish) \n"
        "description: string (the description about the dish) \n"
        "price: string (the price of the dish in USD) \n"
        "---------------------------------------------- \n"
        "strictly answer the user query from the context provided \n"
    )
    
    user_prompt = (
        f"user query: {query} \n"
        "answer the user query from the context provided \n"
        "context: " + context
    )
    
    response = azureOpenAIClient.chat.completions.create(
        model="gpt-4",  # model = "deployment_name".
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    
    return str(response.choices[0].message.content)
    
    
    

In [51]:
response = RAG("is capuccino available and at what price?")
print(response)

[Document(metadata={'_id': ObjectId('6729aae70cd813d1bdcb6ac7'), 'vectorContent': [-0.001678338274359703, -0.003278614254668355, -0.014669058844447136, -0.032420746982097626, 0.000700899341609329, -0.004408025648444891, 0.011221032589673996, -0.016569126397371292, -0.012257433496415615, -0.024594590067863464, -0.0014939785469323397, 0.038851745426654816, -0.033138252794742584, -0.029470989480614662, -0.015466290526092052, 0.0213126540184021, 0.025843586772680283, -0.015692172572016716, 0.015984490513801575, -0.00442131282761693, -0.005035845097154379, 0.02497991919517517, -0.009181449189782143, 0.007241519168019295, -0.015147397294640541, 0.010589891113340855, -0.0084772277623415, -0.025019779801368713, 0.0036340467631816864, -0.00920802354812622, 0.023903656750917435, -0.011898679658770561, -0.0374167300760746, -0.0061253951862454414, -0.007108646910637617, -0.0008528715698048472, 0.0010430444963276386, -0.000835432147141546, 0.021007047966122627, -0.012529821135103703, 0.035078182816