# Build an end-to-end Langchain Applications with Azure Open AI

In [31]:
import os
from dotenv import load_dotenv
import openai
import langchain
import matplotlib as plt
from langchain import PromptTemplate
from langchain.llms import OpenAI

from langchain import OpenAI, SQLDatabase
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_sql_agent
from langchain.embeddings import OpenAIEmbeddings
from langchain.utilities import SerpAPIWrapper
from langchain.agents import initialize_agent, AgentType, Tool
from langchain.llms import OpenAI
from langchain.utilities import SerpAPIWrapper
from langchain.chains import LLMMathChain, LLMChain, RetrievalQA, SequentialChain, TransformChain
from langchain.vectorstores.azuresearch import AzureSearch
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from pydantic import BaseModel, Field, validator
from langchain.output_parsers import PydanticOutputParser

In [32]:
# Azure
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")
OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_EMBEDDING_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")
OPENAI_EMBEDDING_VERSION = os.getenv("OPENAI_EMBEDDING_VERSION")

# SQL
database_user = os.getenv("DATABASE_USERNAME")
database_password = os.getenv("DATABASE_PASSWORD")
database_server = os.getenv("DATABASE_SERVER")
database_db = os.getenv("DATABASE_DB")

# SerpAPI
search = SerpAPIWrapper(serpapi_api_key = os.getenv("SERPER_API_KEY"))

# cognitive service
vector_store_address = os.getenv("VECTOR_STORE_ADDRESS")
vector_store_password = os.getenv("VECTOR_STORE_PASSWORD")

#init Azure OpenAI
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

In [33]:
llm = OpenAI(engine="text-davinci-003", temperature=0)
llm("give me instructions on how to make a cake")

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


'\n\n1. Preheat oven to 350°F (177°C). Grease and flour a 9x13 inch cake pan.\n\n2. In a large bowl, mix together the cake mix, eggs, oil, and water. Beat with an electric mixer on low speed for 30 seconds, then on medium speed for 2 minutes.\n\n3. Pour the batter into the prepared pan and spread evenly.\n\n4. Bake for 25 to 30 minutes, or until a toothpick inserted into the center of the cake comes out clean.\n\n5. Allow the cake to cool in the pan for 10 minutes, then turn out onto a wire rack to cool completely.\n\n6. Frost the cooled cake with your favorite frosting. Enjoy!'

## 1. Querying Transactional datastores

In [34]:
connection_string = f"mssql+pymssql://{database_user}:{database_password}@{database_server}.database.windows.net:1433/{database_db}"
db = SQLDatabase.from_uri(connection_string)
toolkit = SQLDatabaseToolkit(db=db, llm=llm, reduce_k_below_max_tokens=True)
agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True
)

## 2. Populate Vector DB with Embeddings

In [35]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

sentence = "Hello, my name is John and I am a doctor."
print(f'Tokenized using Split: {sentence.split()}')
print(f'Tokenized sentence: {word_tokenize(sentence)}')
without_stop_words = [word for word in word_tokenize(sentence) if word not in stopwords.words("english")]
new_sentence = " ".join(without_stop_words)
# print(f'Stopwords: {stopwords.words("english")}')
print(f'New sentence: {new_sentence}')
print(f'Tokenized sentence: {word_tokenize(new_sentence)}')


Tokenized using Split: ['Hello,', 'my', 'name', 'is', 'John', 'and', 'I', 'am', 'a', 'doctor.']
Tokenized sentence: ['Hello', ',', 'my', 'name', 'is', 'John', 'and', 'I', 'am', 'a', 'doctor', '.']
New sentence: Hello , name John I doctor .
Tokenized sentence: ['Hello', ',', 'name', 'John', 'I', 'doctor', '.']


In [36]:
# creating simple embeddings
response = openai.Embedding.create(
    input=new_sentence,
    engine="text-embedding-ada-002"
)
embeddings = response['data'][0]['embedding']
print(embeddings[:5])

[-0.0039870780892670155, 0.02236991934478283, -0.015776541084051132, -0.02851404808461666, 0.006586771458387375]


In [37]:
# check cosine similarity between two sentences
from openai.embeddings_utils import get_embedding, cosine_similarity
# high cosine similary
text1 = "I love eating ice cream."
text2 = "My favorite dessert is ice cream."
# low cosine similarity
text3 = "The ocean waves crashed against the shore."
text4 = "Apples are a delicious and healthy snack."

engine = "text-similarity-davinci-001"
response1 = get_embedding(text1, engine)
response2 = get_embedding(text2, engine)
response3 = get_embedding(text3, engine)
response4 = get_embedding(text4, engine)
    
print(cosine_similarity(response1, response2))
print(cosine_similarity(response3, response4))

0.9577565531153822
0.668192394370417


In [38]:
# converting documents to embeddings
model = 'text-embedding-ada-002'
os.environ["OPENAI_API_VERSION"] = os.getenv("OPENAI_EMBEDDING_VERSION")
index_name: str = "langchain-vector-demo"
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment=model, chunk_size=1)
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)
loader = DirectoryLoader('./data', glob='*.pdf', show_progress=True)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
list_of_docs = vector_store.add_documents(documents=docs)


100%|██████████| 3/3 [00:03<00:00,  1.26s/it]


In [39]:
# Perform a similarity search
docs = vector_store.similarity_search(
    query="What are some good locations in goa to visit?",
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)

SINQUERIM :With its magnificent 17th century fort which has now been converted into a prison, Sinquerim is one of the finest beaches in Goa, offering international class facilities for water-skiing, parasailing, fishing, scuba-diving and wind-surfing.

Home to the Taj Hotel Group, which dominates the headland around the historic Fort Aguada, Sinquerim is located some 13 kms from Panaji. The uninterrupted stretch of firm sand stretches all the way north to Baga, offering visitors a temptingly long walk along the beach.

CANDOLIM: Candolim is the first beach that can be approached from the city of Panaji and is like a gateway to the other more famous beaches. Though individual accommodation is available here, there are only a few hotels with restaurants attached. One highlight of Candolim is the parasailing and water skiing facility, besides other water sports.


## 3. Lang Chain (Prompt Templates, Chaining, Agents)

In [40]:
class QueryInput(BaseModel):
    Source: str
    Destination: str
    Pax: int
    Date: str
    
parser = PydanticOutputParser(pydantic_object=QueryInput)

def parse_output(inputs: dict) -> dict:
    text = inputs["json_string"]
    d = parser.parse(text)
    return {"Source" : d.Source, "Destination" : d.Destination, "Pax": d.Pax, "Date": d.Date}

def run(user_query, llm, vector_store):
    
   prompt = """
   Extract source, destination, number of people as 'Pax' and date of travel as 'Date' from below provided text.
   Provide information as json
   text: {input}
   """
   
   promptEngine = PromptTemplate(
       input_variables=["input"],
       template=prompt       
   )
   
   parse_chain = LLMChain(llm=llm, prompt=promptEngine, output_key="json_string")
   
   transform_chain = TransformChain(input_variables=["json_string"], output_variables=["Source", "Destination", "Pax", "Date"],  transform=parse_output)
   
   # query database for available flights
   sql_template = """You are an expert trip adviser. \
       Give me a complete itinerary for the trip with the total price using cheapest flight, places to visit in the destination city.
       Find 2 flights from {Source} to {Destination}. Fine the cheapest flight. Find the total price for {Pax} passengers. \
       Find the weather in {Destination} on {Date} in degrees celsius only. \
       Respond with complete itinerary in a list format for the trip with the total price using cheapest flight, weather in degrees celsius and top 5 places to visit in the destination city."""
   
   sql_prompt_template = PromptTemplate(input_variables=["Source", "Destination", 'Pax', 'Date'], template=sql_template)
   
   # get weather information using serp api.
   search = SerpAPIWrapper(serpapi_api_key = os.getenv("SERPER_API_KEY"))
   
   # get math information using LLM math chain
   llm_math = LLMMathChain.from_llm(llm=llm)
   
   # get information about places to visit using retrieval QA and vector store
   places_to_visit = RetrievalQA.from_chain_type(
       llm=llm, chain_type="stuff", retriever=vector_store.as_retriever()
   )
   
   tools = [
       Tool(
           name = "Search",
           func = search.run,
           description = '''useful for when you need to answer questions about current events,
               getting weather forecast infomation like temperature and humidity
               do not use this for searching flight information
               do not use this for searching information about places to visit
               '''
       ),
       Tool(
           name = 'Calculator',
           func = llm_math.run,
           description = 'Useful for when you need to answer questions about math.'
       ),
       Tool(
           name = 'SQL',
           func = agent_executor.run,
           description='''Useful when you need to search flights using source, destination, pax only. 
           Use to find the cheapest flight. Use to find the total price for pax passengers.
           do not use this for searching information about places to visit           
           '''
       ),
       Tool(
           name = 'vector_store',
           func = places_to_visit.run,
           description = 'Useful when you need to find places to visit in a city'
       )
   ]
   
   agent = initialize_agent(
       tools,
       llm,
       agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
       verbose = True
   )
   
   overall_chain = SequentialChain(
       chains=[parse_chain, transform_chain],
       output_variables=["Source", "Destination", 'Pax', 'Date'],
       input_variables=["input"],
       verbose=True
   )
    
   inputs  = overall_chain(user_query)
   
   final_prompt = sql_prompt_template.format(Source=inputs["Source"], Destination = inputs["Destination"], Pax = inputs["Pax"], Date = inputs["Date"])
   
   print(f'final prompt: {final_prompt}')
   
   final_output = agent.run(final_prompt)
   
   return final_output
    

## 4. Run Console

In [43]:
user_query = 'Two of my friends and I are travelling from Hyderabad to goa on 29th July, 2023.'
final_output = run(user_query, llm, vector_store)



[1m> Entering new  chain...[0m

[1m> Finished chain.[0m
final prompt: You are an expert trip adviser.        Give me a complete itinerary for the trip with the total price using cheapest flight, places to visit in the destination city.
       Find 2 flights from Hyderabad to Goa. Fine the cheapest flight. Find the total price for 3 passengers.        Find the weather in Goa on 29th July, 2023 in degrees celsius only.        Respond with complete itinerary in a list format for the trip with the total price using cheapest flight, weather in degrees celsius and top 5 places to visit in the destination city.


[1m> Entering new  chain...[0m
[32;1m[1;3m
Action:
```
{
  "action": "SQL",
  "action_input": "Find 2 flights from Hyderabad to Goa. Find the cheapest flight. Find the total price for 3 passengers."
}
```
[0m

[1m> Entering new  chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m
Observation: [38;5;200m[1;3mflights[0m
Thought:[32;1m[1;3m I should 

In [44]:
final_output

'The cheapest two flights from Hyderabad to Goa are: Flight ID: 1, Price: 5678, Departure: 0700, Arrival: 0800, Number of Stops: 0, Date: 29-07-2023 and Flight ID: 2, Price: 4653, Departure: 0600, Arrival: 0830, Number of Stops: 0, Date: 29-07-2023. The total price for 3 passengers is 13959. The weather in Goa in July is very hot. The average temperatures are between 77°F and 82°F, drinking water regularly is advisable. The top 5 places to visit in Goa are Agonda Beach, Goa Cavelossim Beach, Mandrem Beach, Anjuna Beach/Market, and Baga Beach.'