# Install Required Libraries

In [8]:
%pip install langchain
%pip install langchain_openai
%pip install pymongo

# Load Environment Variables 

In [None]:
# Create .env file if it doesn't exist
%cp -n .env.example .env

In [13]:
# load the environment variables from .env file
import os

from dotenv import load_dotenv

load_dotenv()

True

# Initialize OpenAI Client

Save the `api_type`, `api_base`, `api_version`, and `api_key` as global variables to avoid the need to supply them later in code.

In [14]:
import openai

openai.api_type = os.getenv("OPENAI_API_TYPE", "azure")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT", "https://<YOUR-OPENAI-DEPLOYMENT-NAME>.openai.azure.com/")
openai.api_version = os.getenv("OPENAI_API_VERSION", "2023-09-15-preview")
openai.api_key = os.getenv("OPENAI_API_KEY", "<YOUR-DEPLOYMENT-KEY>")

# Intialize the MongoDB Client

In [1]:
from pymongo import MongoClient

# Connection string
mongo_connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING", "<YOUR-COSMOS-DB-CONNECTION-STRING>")
mongo_client = MongoClient(mongo_connection_string)

# Database name
db_name = os.getenv("AZURE_COSMOS_DATABASE_NAME", "DatabaseName")
db = mongo_client[db_name]

  mongo_client = MongoClient(mongo_connection_string)


# Load JSON Data

In [39]:
from langchain.docstore.document import Document
from pathlib import Path
import json

SOURCE_FILE_NAME = "./data/results.json"

def loadJSONFile(file_path):
    docs = []
    # Load JSON file
    with open(file_path) as file:
        data = json.load(file)
    
    # Process each item in the JSON data
    for index in range(len(data)):
        docs.append(Document(page_content=json.dumps(data[index]), metadata={'source': (Path(__name__).resolve().parent / SOURCE_FILE_NAME).__str__(), 'seq_num': index+1}))

    return docs

json_data = loadJSONFile(SOURCE_FILE_NAME)

In [40]:
# Display a sample from the data
print(json_data[1])

page_content='{"category": "Smoothies", "name": "J Wit Da Weezy Smoothie", "description": "Kale, pineapples, peaches, blueberries, and cran blackberries. Our fruity tasty smoothies are blended to perfection.", "price": "6.49 USD"}' metadata={'source': 'C:\\Users\\Khelan Modi\\OneDrive - Microsoft\\Desktop\\Build demo\\build-24-langchain-vcore\\data\\results.json', 'seq_num': 11}


# Initialize the Embeddings Client

In [7]:
from langchain_openai import AzureOpenAIEmbeddings

openai_embeddings_model = os.getenv("AZURE_OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")
openai_embeddings_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME", "text-embedding")

azure_openai_embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    model=openai_embeddings_model,
    azure_deployment=openai_embeddings_deployment,
)

# Generate and Save Embeddings to MongoDB

In [8]:
from langchain.vectorstores.azure_cosmos_db import AzureCosmosDBVectorSearch

collection_name = os.getenv("AZURE_COSMOS_COLLECTION_NAME", "collectionName")
index_name = os.getenv("AZURE_COSMOS_INDEX_NAME", "indexName")

collection = db[collection_name]

# Create embeddings from the data, save to the database and return a connection to MongoDB vCore
vector_store: AzureCosmosDBVectorSearch = AzureCosmosDBVectorSearch.from_documents(
    json_data[0:100],
    azure_openai_embeddings,
    collection=collection,
    index_name=index_name,
)

# Create Vector Index (HNSW)

In [9]:
from langchain_community.vectorstores.azure_cosmos_db import (
    CosmosDBSimilarityType,
    CosmosDBVectorSearchType,
)

# Read more about these variables in detail here. https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search
num_lists = 100
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_HNSW
m = 16
ef_construction = 64

# Create the collection and the index
vector_store.create_index(
    num_lists, dimensions, similarity_algorithm, kind, m, ef_construction
)

{'raw': {'defaultShard': {'numIndexesBefore': 1,
   'numIndexesAfter': 2,
   'createdCollectionAutomatically': False,
   'ok': 1}},
 'ok': 1}

## Test Vector Search Flow

In [10]:
query = "Beef Bacon"
docs = vector_store.similarity_search(query)
print(docs[0].page_content)

{"category": "Sandwiches", "name": "Bacon Turkey Bravo Sandwich", "description": "Whole (1010 Cal.), Half (500 Cal.) Oven-roasted turkey breast raised without antibiotics, Applewood-smoked bacon, smoked Gouda, emerald greens, vine-ripened tomatoes, signature sauce , salt and pepper on Tomato Basil Bread. Allergens: Contains Wheat, Milk, Egg", "price": "8.79 USD"}


# Initialize the Chat Client

In [11]:
from langchain_openai import AzureChatOpenAI

openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL_NAME", "gpt-35-turbo")
openai_chat_deployment= os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "chat-gpt")

azure_openai_chat: AzureChatOpenAI = AzureChatOpenAI(
    model=openai_chat_model,
    azure_deployment=openai_chat_deployment,
)

In [12]:
# Test the chat flow
chat_response = azure_openai_chat.invoke("Tell me a joke")
print(chat_response.content)

Why did the tomato turn red? Because it saw the salad dressing!


# Create RAG Function

In [13]:
order_prompt="""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

In [14]:
from langchain.prompts import PromptTemplate

ORDER_PROMPT_TEMPLATE = PromptTemplate.from_template(order_prompt)

In [15]:
from langchain.chains import ConversationalRetrievalChain

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5, 'score_threshold': 0.2})

rag_chain = ConversationalRetrievalChain.from_llm(
    llm=azure_openai_chat,
    retriever=retriever,
    condense_question_prompt=ORDER_PROMPT_TEMPLATE,
    return_source_documents=False,
    verbose=False
)

## Test RAG Flow

In [16]:
question = "recommend me a strawberry smoothi"
chat_history = []
response = rag_chain.invoke({"question": question, "chat_history": chat_history})
print(response['answer'])

I'm sorry, there are a few smoothies with strawberries in the description. Which one would you like more information about? 
- Jimmy Jam Smoothie
- Aw Shuckie Shuckie Now Smoothie
- Ashunti`Way Smoothie


In [17]:
chat_history.append((question, response['answer']))
question = "What did I just ask you about?"
response = rag_chain.invoke({"question": question, "chat_history": chat_history})

In [18]:
print(response['answer'])

There are two smoothies that include strawberries in their description: the Jimmy Jam Smoothie and the Aw Shuckie Shuckie Now Smoothie.


# Test with Gradio

In [None]:
%pip install ipywidgets gradio

In [20]:
import gradio as gr


def setup_gradio_interface(chain):    
    with gr.Blocks() as demo_interface:
        chatbot = gr.Chatbot(label="Food Ordering System")
        chat_history = gr.State([])
        lc_chat_history = gr.State([])
        msg = gr.Textbox(label="Your question")
        gr.ClearButton([msg, chatbot])
  
        def fetch_response(message, chat_history, lc_chat_history):
            response = chain.invoke({"question": message, "chat_history": lc_chat_history})
            lc_chat_history.append((message, response['answer']))
            chat_history.append([message, response["answer"]])
            return "", chat_history, lc_chat_history

        msg.submit(fetch_response, inputs=[msg, chatbot, lc_chat_history], outputs=[msg, chatbot, lc_chat_history])
    
    return demo_interface

In [None]:
food_ordering_demo = setup_gradio_interface(rag_chain)
food_ordering_demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




# Inline Embeddings Generation

In [42]:
from langchain.docstore.document import Document
from pymongo import MongoClient
import os
import json
import urllib

SOURCE_FILE_NAME = "./data/results.json"

def loadJSONFile(file_path):
    docs = []
    # Load JSON file
    with open(file_path) as file:
        data = json.load(file)
    
    # Process each item in the JSON data
    for item in data:
        docs.append(Document(page_content=json.dumps(item), metadata={}))

    return docs

json_data = loadJSONFile(SOURCE_FILE_NAME)

mongo_connection_string = "mongodb+srv://clusteradmin:"+ urllib.parse.quote(os.getenv(PASSWORD)) +"@0qzfpdvavvmtaz6e0tz41xotrxxa7npr.global.pgmongo-dev.cosmos.windows-int.net/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
#mongo_connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING_AUTO_EMBEDDING", "conn_string")
mongo_client = MongoClient(mongo_connection_string)

db_name = os.getenv("AZURE_COSMOS_DATABASE_NAME", "DatabaseName")
db = mongo_client[db_name]

collection_name = os.getenv("AZURE_COSMOS_COLLECTION_NAME", "collectionName")
index_name = os.getenv("AZURE_COSMOS_INDEX_NAME", "indexName")

collection = db[collection_name]

# Insert data
docs = [json.loads(item.page_content) for item in json_data[0:20]]
for doc in docs:
    collection.insert_one(doc)

# Inline generate embeddings
collection.update_many({}, {"$generateEmbeddings": {"description": "embeddings"}})

# Create HNSW index
createIndexCommand = {
    "createIndexes": collection_name,
    "indexes": [
        {
            "key": {"embeddings": "cosmosSearch"},
            "name": "hnsw_index",
            "cosmosSearchOptions": {
                "kind": "vector-hnsw",
                "m": 4,
                "efConstruction": 16,
                "similarity": "COS",
                "dimensions": 1536
            }
        }
    ]
}
db.command(createIndexCommand)


InvalidURI: Username and password must be escaped according to RFC 3986, use urllib.parse.quote_plus

In [41]:
from langchain.document_loaders.json_loader import JSONLoader
from pymongo import MongoClient
import json
import os
from dotenv import load_dotenv

load_dotenv()

SOURCE_FILE_NAME = "./data/results.json"

loader = JSONLoader(file_path=SOURCE_FILE_NAME, jq_schema=".[]", text_content=False)
json_data = loader.load_and_split()

#mongo_connection_string = "mongodb+srv://clusteradmin:"+ urllib.parse.quote("P@ssw.rd") +"@0qzfpdvavvmtaz6e0tz41xotrxxa7npr.global.pgmongo-dev.cosmos.windows-int.net/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
mongo_connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING_AUTO_EMBEDDING", "Conn_String")
mongo_client = MongoClient(mongo_connection_string)

db_name = os.getenv("AZURE_COSMOS_DATABASE_NAME", "DatabaseName")
db = mongo_client[db_name]

collection_name = os.getenv("AZURE_COSMOS_COLLECTION_NAME", "collectionName")
collection_name = collection_name + "_for_inline"

index_name = os.getenv("AZURE_COSMOS_INDEX_NAME", "indexName")

collection = db[collection_name]

# insert data
docs = [ json.loads(item.page_content) for item in json_data[0:10]]
for doc in docs:
    collection.insert_one(doc)

# inline generate embeddings
collection.update_many({}, {"$generateEmbeddings":{"description":"embeddings"}})

# create hnsw index
createIndexCommand = {
    "createIndexes": collection_name, 
    "indexes": [ 
        { 
            "key": { "embeddings": "cosmosSearch" }, 
            "name": "hnsw_index", 
            "cosmosSearchOptions": { "kind": "vector-hnsw", "m": 4, "efConstruction": 16, "similarity": "COS", "dimensions": 1536 } 
        } 
    ] 
} 
db.command(createIndexCommand)

ImportError: jq package not found, please install it with `pip install jq`

In [10]:
search_pipeline = [ 
    { "$search": { "cosmosSearch": { "query": docs[0]["description"], "k": 5, "path": "embeddings", "efSearch": 100 }}} , 
    { "$project": { "similarityScore": { "$meta": "searchScore" }, "_id":0, "name":1, "description":1 } }
]

results = collection.aggregate(search_pipeline)

for result in results:
    print(f"[Score: {result['similarityScore']:.3f}] {result['name']}: {result['description']}")

[Score: 1.000] Ashunti`Way Smoothie: Fruit n greens, mango bananas, tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples, and spinach. Special green with strawberry bananas juice blend . Our fruity tasty smoothies are blended to perfection.
[Score: 0.986] Dayton 500 Smoothie: Tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples. Special green juice blend. Our fruity tasty smoothies are blended to perfection.
[Score: 0.973] Tongue Teaser Smoothie: Tropical fruit blend, dragon fruit, pineapples, bananas, mango, apples, spinach, ginger powder. Special green blend, pineapple and ginger smoothies. Our fruity tasty smoothies are blended to perfection.
[Score: 0.967] Tejay Impact Smoothie: Tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples, and spinach. Special blue juice blend smoothies.
[Score: 0.961] Jimmy Jam Smoothie: Berries n kale, strawberries, bananas, blueberries kale, tropical fruit blend, and dragon fruit. Our fru