### Conversational Chain

In [2]:
# Installing Dependencies
%pip install langchain langchain-openai google-generativeai sentence-transformers langchain_community langchain-community langchain-experimental InstructorEmbedding python-dotenv==1.0.0 openai tiktoken==0.4.0 faiss-cpu==1.7.4 protobuf

Note: you may need to restart the kernel to use updated packages.
Collecting tiktoken==0.4.0
  Downloading tiktoken-0.4.0-cp310-cp310-win_amd64.whl.metadata (5.4 kB)
INFO: pip is looking at multiple versions of langchain-openai to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-openai
  Downloading langchain_openai-0.0.6-py3-none-any.whl.metadata (2.5 kB)
  Downloading langchain_openai-0.0.4-py3-none-any.whl.metadata (2.5 kB)
  Downloading langchain_openai-0.0.3-py3-none-any.whl.metadata (2.5 kB)
  Downloading langchain_openai-0.0.2.post1-py3-none-any.whl.metadata (2.4 kB)
  Downloading langchain_openai-0.0.2-py3-none-any.whl.metadata (570 bytes)

The conflict is caused by:
    The user requested tiktoken==0.4.0
    langchain-openai 0.0.5 depends on tiktoken<0.6.0 and >=0.5.2
    The user requested tiktoken==0.4.0
    langchain-openai 0.0.6 depends on tiktoken<1 and >=0.5.2
    The user requested tiktoken==0.4.0
    langchain

ERROR: Cannot install langchain-openai==0.0.2, langchain-openai==0.0.2.post1, langchain-openai==0.0.3, langchain-openai==0.0.4, langchain-openai==0.0.5, langchain-openai==0.0.6 and tiktoken==0.4.0 because these package versions have conflicting dependencies.
ERROR: ResolutionImpossible: for help visit https://pip.pypa.io/en/latest/topics/dependency-resolution/#dealing-with-dependency-conflicts


In [1]:
# Open-AI LLM
from dotenv import load_dotenv
import os
load_dotenv()

OPENAI_API_KEYS=os.getenv('OPENAI_API_KEYS')

from langchain_openai import OpenAI
llm = OpenAI(openai_api_key=OPENAI_API_KEYS)
llm

OpenAI(client=<openai.resources.completions.Completions object at 0x000001FC290838E0>, async_client=<openai.resources.completions.AsyncCompletions object at 0x000001FC290AD120>, openai_api_key='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', openai_proxy='')

In [2]:
llm("who are you")

  warn_deprecated(


'\n\nMy name is OpenAI, a powerful AI designed to assist and interact with users. How can I help you?'

In [2]:
# Loading Company's data as CSV
# Note: It can be loaded and updated in any format according to need

from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="Transformed-Slots.csv")
data = loader.load()
data

[Document(page_content='Time Slots: Eash Hoskote\n8:00:00: Available\n8:15:00: Booked\n8:30:00: Available\n8:45:00: Available\n9:00:00: Booked\n9:15:00: Not Available\n9:30:00: Booked\n9:45:00: Not Available\n10:00:00: Not Available\n10:15:00: Not Available\n10:30:00: Not Available\n10:45:00: Available\n11:00:00: Not Available\n11:15:00: Available\n11:30:00: Not Available\n11:45:00: Booked\n12:00:00: Not Available\n12:15:00: Not Available\n12:30:00: Not Available\n12:45:00: Booked\n13:00:00: Booked\n13:15:00: Available\n13:30:00: Booked\n13:45:00: Not Available\n14:00:00: Available\n14:15:00: Available\n14:30:00: Booked\n14:45:00: Not Available\n15:00:00: Not Available\n15:15:00: Booked\n15:30:00: Booked\n15:45:00: Not Available\n16:00:00: Not Available\n16:15:00: Booked\n16:30:00: Available\n16:45:00: Not Available\n17:00:00: Not Available\n17:15:00: Not Available\n17:30:00: Not Available\n17:45:00: Not Available\n18:00:00: Available\n18:15:00: Booked\n18:30:00: Not Available\n18:45:0

In [3]:
from langchain_community.document_loaders import JSONLoader

loader = JSONLoader(
    file_path='./slots.json',
    jq_schema='.doctors',
    text_content=False)

data = loader.load()


ImportError: jq package not found, please install it with `pip install jq`

In [3]:
# Creating Embedding using Langchain's InstructEmbeddings
# Doc: https://python.langchain.com/docs/integrations/text_embedding/instruct_embeddings
# Note: Most voted embedding on LangChain leaderboard should be used

from langchain_community.embeddings import HuggingFaceInstructEmbeddings
instructor_embeddings = HuggingFaceInstructEmbeddings()

  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer


  return self.fget.__get__(instance, owner)()


max_seq_length  512


In [4]:
# Storing Embedding in Vector Database (FAISS) 
# Vectorisation
# Note: Chroma Vector Database should be efficient

from langchain.vectorstores import FAISS

vector_db = FAISS.from_documents(documents=data, embedding=instructor_embeddings)

In [5]:
# Saving and loading vector database

vector_db.save_local("./transformed-slots_vector")


In [6]:
new_db = FAISS.load_local("./slots_vector", instructor_embeddings)

In [11]:
# Retriever

retriever1 = new_db.as_retriever(search_kwargs={"k":3})

In [27]:
# Creating Conversational chain with memory, retriever and LLM
# Note: Different methods to pass chat-history in the main prompt are demonstrated here: 
# https://colab.research.google.com/drive/1q3K5Rq_l58p9cHmP-yIApCnYE-eAtzDG?usp=sharing#scrollTo=X6_ptbbCvReJ

from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.conversation.memory import ConversationBufferMemory, ConversationKGMemory, ConversationSummaryBufferMemory
from langchain.prompts import PromptTemplate

prompt_template = """Given the following context, question and chat_history, generate an answer based on this context and chat history.
    In the source document, there are rows indicating different Doctors and columns indicating time slots in 24 Hour Format. /n
    If in that particular time slot have 'Available' value, it means that doctor is available in that time slot. /n
    If in that particular time slot have 'Booked' value, it means that doctor is not available in that time slot. /n
    And if in that particular time slot have 'Not Available' value, it means that doctor is not available in that time slot. /n
    If you do not get valid answer, do not make up my your own and return 'i dont know'.
    /n
    CONTEXT: {context}
    /n
    CHAT_HISTORY: {chat_history}
    /n
    QUESTION: {question}"""

prompt = PromptTemplate(
        template=prompt_template, input_variables=["context", "chat_history", "question"]
        )

memory = ConversationSummaryBufferMemory(
        llm=llm,
        memory_key='chat_history', input_key="question", output_key="answer")

conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type = "stuff",
        verbose=True,
        retriever=retriever1,
        memory=memory,
        return_source_documents=True,
        get_chat_history=lambda h : h,
        combine_docs_chain_kwargs={'prompt': prompt},
    )

In [28]:
conversation_chain("what doctors available at 8am")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following context, question and chat_history, generate an answer based on this context and chat history.
    In the source document, there are rows indicating different Doctors and columns indicating time slots in 24 Hour Format. /n
    If in that particular time slot have 'Available' value, it means that doctor is available in that time slot. /n
    If in that particular time slot have 'Booked' value, it means that doctor is not available in that time slot. /n
    And if in that particular time slot have 'Not Available' value, it means that doctor is not available in that time slot. /n
    If you do not get valid answer, do not make up my your own and return 'i dont know'.
    /n
    CONTEXT: : 
Eash Hoskote: Available
Sarbari Gupta: Available
Dr. LakshmiKant N: Available
Dr. KVS Mahesh: NA
1: 2
Available: Booked

: 
Eash Hoskote: Available
S

{'question': 'what doctors available at 8am',
 'chat_history': '',
 'answer': '\n\n\nThe only doctor that is available at 8am is Dr. KVS Mahesh.',
 'source_documents': [Document(page_content=': \nEash Hoskote: Available\nSarbari Gupta: Available\nDr. LakshmiKant N: Available\nDr. KVS Mahesh: NA\n1: 2\nAvailable: Booked', metadata={'source': 'slots.csv', 'row': 0}),
  Document(page_content=': \nEash Hoskote: Available\nSarbari Gupta: Booked\nDr. LakshmiKant N: Available\nDr. KVS Mahesh: Available\n1: \nAvailable: ', metadata={'source': 'slots.csv', 'row': 43}),
  Document(page_content=': \nEash Hoskote: Available\nSarbari Gupta: Available\nDr. LakshmiKant N: NA\nDr. KVS Mahesh: Available\n1: \nAvailable: ', metadata={'source': 'slots.csv', 'row': 40})]}

In [27]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI

In [28]:
import pandas as pd
from langchain_openai import OpenAI

df = pd.read_csv("slots.csv")

In [39]:
model = ChatOpenAI(openai_api_key = os.getenv('OPENAI_API_KEYS'), temperature=0, model="gpt-3.5-turbo-0613")

In [60]:
agent = create_pandas_dataframe_agent(
    model,
    df,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    
)

In [56]:
from langchain.prompts.chat import ChatPromptTemplate

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", 
         """
          You are a helpful AI assistant expert in quering from csv to find answers. Return answer in descriptive form not in form of array of data or something.
          /n
          In the source document, there are rows indicating different Doctors and columns indicating time slots. /n
            If in that particular time slot have 'Available' value, it means that doctor is available in that time slot. /n
            If in that particular time slot have 'Booked' value, it means that doctor is not available in that time slot. /n
            And if in that particular time slot have 'NA' value, it means that doctor is not available in that time slot. /n
            If you do not get valid answer, do not make up my your own and return 'i dont know'.
         """
         ),
        ("user", "{question}\n ai: "),
    ]
)

In [63]:
result = agent.run(
        "What doctors are 'Available' at 8am"
  )
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[df['8:00 AM'] == 'Available'].columns.tolist()"}`


[0m[36;1m[1;3mKeyError: '8:00 AM'[0m[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': 'df.columns.tolist()'}`
responded: It seems that there is an error because the column name '8:00 AM' does not exist in the dataframe. Let me check the column names to see if there is a similar column.

[0m[36;1m[1;3m['Unnamed: 0', 'Eash Hoskote', 'Sarbari Gupta', 'Dr. LakshmiKant N', 'Dr. KVS Mahesh', 'Unnamed: 5', 'Unnamed: 6', '1', 'Available', 'Unnamed: 9'][0m[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[df['Unnamed: 0'] == '8:00 AM']['Available'].tolist()"}`
responded: The available column names in the dataframe are: 'Unnamed: 0', 'Eash Hoskote', 'Sarbari Gupta', 'Dr. LakshmiKant N', 'Dr. KVS Mahesh', 'Unnamed: 5', 'Unnamed: 6', '1', 'Available', and 'Unnamed: 9'. 

Let me check which doctors are 'Available

"According to the dataframe, the doctor who is 'Available' at 8am is 'Booked'."

In [64]:
final_prompt.format(
        question="what is the deatils of the product purchased by 'John Doe'"
  )

agent("What doctors are 'Available' at 8am")  



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[df['8:00 AM'] == 'Available'].columns.tolist()"}`


[0m[36;1m[1;3mKeyError: '8:00 AM'[0m[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': 'df.columns.tolist()'}`
responded: It seems that there is an error because the column name '8:00 AM' does not exist in the dataframe. Let me check the column names to see if there is a similar column.

[0m[36;1m[1;3m['Unnamed: 0', 'Eash Hoskote', 'Sarbari Gupta', 'Dr. LakshmiKant N', 'Dr. KVS Mahesh', 'Unnamed: 5', 'Unnamed: 6', '1', 'Available', 'Unnamed: 9'][0m[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[df['Available'] == 'Available'].columns.tolist()"}`
responded: The available column names in the dataframe are: 'Unnamed: 0', 'Eash Hoskote', 'Sarbari Gupta', 'Dr. LakshmiKant N', 'Dr. KVS Mahesh', 'Unnamed: 5', 'Unnamed: 6', '1', 'Available', and 'Unnamed: 9'. 

Let me check which doctors are 'Available' at

{'input': "What doctors are 'Available' at 8am",
 'output': "Based on the available column names, it seems that all doctors are 'Available' at 8am."}